feat(package): hemar: antlr grammar, but still does not work
This commit is contained in:
28
package/hemar/grammar/antlr/GoodTry.g4
Normal file
28
package/hemar/grammar/antlr/GoodTry.g4
Normal file
@@ -0,0 +1,28 @@
|
||||
grammar Hemar;
|
||||
|
||||
// ----------------- parser rules -----------------
|
||||
|
||||
hemar: elements? EOF ;
|
||||
|
||||
elements: element+ ;
|
||||
|
||||
element
|
||||
: segment
|
||||
| interpoltion
|
||||
;
|
||||
|
||||
segment : for elements? end ;
|
||||
|
||||
for : 'for' 'in' ;
|
||||
|
||||
end: 'end' ;
|
||||
|
||||
interpoltion : 'mcha' ;
|
||||
|
||||
OPEN : '{[' ;
|
||||
CLOSE : ']}' ;
|
||||
|
||||
WS : [ \t\n\r]+ -> skip ;
|
||||
LEADING_TEXT : { getCharPositionInLine() == 0 }? (~'{'|'{'~'[')* OPEN -> skip;
|
||||
MIDLE_TEXT : CLOSE (~'{'|'{'~'[')* OPEN -> skip;
|
||||
ENDING_TEXT : CLOSE (~'{'|'{'~'[')* EOF -> skip ;
|
||||
49
package/hemar/grammar/antlr/HemarLexer.g4
Normal file
49
package/hemar/grammar/antlr/HemarLexer.g4
Normal file
@@ -0,0 +1,49 @@
|
||||
lexer grammar HemarLexer;
|
||||
|
||||
// ---------- default mode: plain text ----------
|
||||
|
||||
// Everything that is not the start of "{[" is TEXT
|
||||
TEXT
|
||||
: ( ~'{' | '{' ~'[' )+
|
||||
;
|
||||
|
||||
// When we see "{[", emit LeftBrace and enter TAG mode
|
||||
LeftBrace
|
||||
: '{[' -> pushMode(TAG)
|
||||
;
|
||||
|
||||
// skip whitespace in plain text if you want
|
||||
SKIP_WS
|
||||
: [ \t\r\n]+ -> skip
|
||||
;
|
||||
|
||||
// ---------- TAG mode: inside {[ ... ]} ----------
|
||||
|
||||
mode TAG;
|
||||
|
||||
fragment WS: [ \t\r\n] ;
|
||||
|
||||
For : 'for';
|
||||
In : 'in';
|
||||
End : 'end';
|
||||
|
||||
// identifier inside tag
|
||||
Path
|
||||
: String
|
||||
| String '.' Path
|
||||
;
|
||||
|
||||
String
|
||||
: ( ~[.\] \t\r\n] | ']' ~[}. \t\r\n] )+
|
||||
| '"' ( ~'"' | '\\' '"' )+ '"'
|
||||
;
|
||||
|
||||
// closing "]}": emit RightBrace and go back to default mode
|
||||
RightBrace
|
||||
: ']}' -> popMode
|
||||
;
|
||||
|
||||
// skip whitespace inside tag
|
||||
SKIP_TAG_WS
|
||||
: WS+ -> skip
|
||||
;
|
||||
18
package/hemar/grammar/antlr/HemarParser.g4
Normal file
18
package/hemar/grammar/antlr/HemarParser.g4
Normal file
@@ -0,0 +1,18 @@
|
||||
parser grammar HemarParser;
|
||||
|
||||
options { tokenVocab=HemarLexer; }
|
||||
|
||||
hemar : element*? EOF ;
|
||||
|
||||
element
|
||||
: TEXT
|
||||
| segment
|
||||
| interpoltion
|
||||
;
|
||||
|
||||
segment : for element*? end;
|
||||
|
||||
for : LeftBrace For Path In Path RightBrace;
|
||||
end : LeftBrace End RightBrace;
|
||||
|
||||
interpoltion : LeftBrace Path RightBrace;
|
||||
149
package/hemar/grammar/antlr/_Hemar.g4
Normal file
149
package/hemar/grammar/antlr/_Hemar.g4
Normal file
@@ -0,0 +1,149 @@
|
||||
grammar Hemar;
|
||||
|
||||
// ----------------- parser rules -----------------
|
||||
|
||||
hemar
|
||||
: elements? EOF
|
||||
;
|
||||
|
||||
elements
|
||||
: element+
|
||||
;
|
||||
|
||||
element
|
||||
: tag
|
||||
| TEXT
|
||||
;
|
||||
|
||||
// tag
|
||||
tag
|
||||
: OPEN path CLOSE
|
||||
| OPEN loopStatement CLOSE
|
||||
| OPEN includeHeader CLOSE
|
||||
| OPEN 'end' CLOSE
|
||||
| OPEN function CLOSE
|
||||
| OPEN OPEN CLOSE // literal "{[" output
|
||||
;
|
||||
|
||||
// loop tag: "for" string "in" path
|
||||
loopStatement
|
||||
: 'for' STRING 'in' path
|
||||
;
|
||||
|
||||
// include tag: "include" path
|
||||
includeHeader
|
||||
: 'include' path
|
||||
;
|
||||
|
||||
// function tag
|
||||
function
|
||||
: 'compute' language functionBody? // "compute" language body
|
||||
| 'compute' '-' functionBody? // "compute" - body
|
||||
;
|
||||
|
||||
language
|
||||
: 'dash'
|
||||
| 'plpgsql'
|
||||
;
|
||||
|
||||
// everything up to (but not including) "]}"
|
||||
// (raw body, including "{[" etc, at *token* level)
|
||||
functionBody
|
||||
: ( ~CLOSE )*
|
||||
;
|
||||
|
||||
// path
|
||||
path
|
||||
: '.'
|
||||
| segmentedPath
|
||||
;
|
||||
|
||||
segmentedPath
|
||||
: segment ('.' segment)*
|
||||
;
|
||||
|
||||
segment
|
||||
: STRING
|
||||
| index
|
||||
;
|
||||
|
||||
// index: \0 .. \9, \1.. \9\d*, and negative forms
|
||||
index
|
||||
: '\\' DIGIT
|
||||
| '\\' ONENINE DIGITS?
|
||||
| '\\' '-' DIGIT
|
||||
| '\\' '-' ONENINE DIGITS?
|
||||
;
|
||||
|
||||
// ----------------- lexer rules -----------------
|
||||
|
||||
OPEN : '{[';
|
||||
CLOSE : ']}';
|
||||
|
||||
// text outside tags: anything except the "{[" sequence
|
||||
TEXT
|
||||
: TEXT_CHAR+
|
||||
;
|
||||
|
||||
/*
|
||||
* Strings used in paths/loop variables:
|
||||
* "..." with escapes similar to your spec.
|
||||
*/
|
||||
STRING
|
||||
: '"' ( ESC | STRING_CHAR )* '"'
|
||||
;
|
||||
|
||||
fragment STRING_CHAR
|
||||
: ~["\\\r\n]
|
||||
;
|
||||
|
||||
/*
|
||||
* Escapes:
|
||||
* . (literal dot)
|
||||
* ]} (literal "]}") -- note this is two chars after '\'
|
||||
* " \"
|
||||
* \ \\
|
||||
* / \/
|
||||
* b f n r t
|
||||
* uXXXX (hex)
|
||||
* whitespace after backslash (your ws-in-escape)
|
||||
*/
|
||||
fragment ESC
|
||||
: '\\'
|
||||
(
|
||||
'.'
|
||||
| ']}'
|
||||
| '"'
|
||||
| '\\'
|
||||
| '/'
|
||||
| 'b'
|
||||
| 'f'
|
||||
| 'n'
|
||||
| 'r'
|
||||
| 't'
|
||||
| 'u' HEX HEX HEX HEX
|
||||
| WS_CHAR
|
||||
)
|
||||
;
|
||||
|
||||
// digits / hex
|
||||
DIGITS : DIGIT+ ;
|
||||
DIGIT : [0-9] ;
|
||||
ONENINE: [1-9] ;
|
||||
HEX : [0-9a-fA-F] ;
|
||||
|
||||
// whitespace for normal lexing
|
||||
WS
|
||||
: [ \t\r\n]+ -> skip
|
||||
;
|
||||
|
||||
// whitespace used inside escapes
|
||||
fragment WS_CHAR
|
||||
: [ \t\r\n]
|
||||
;
|
||||
|
||||
|
||||
fragment TEXT_CHAR
|
||||
: ~'{' // any except '{'
|
||||
| '{' ~'[' // '{' only if not starting OPEN
|
||||
;
|
||||
36
package/hemar/grammar/antlr/default.nix
Normal file
36
package/hemar/grammar/antlr/default.nix
Normal file
@@ -0,0 +1,36 @@
|
||||
{ stdenv, symlinkJoin, jre, antlr4, runtimeShell, jdk }:
|
||||
|
||||
let
|
||||
hemar-grammar = stdenv.mkDerivation {
|
||||
pname = "hemar-grammar";
|
||||
version = "0.1.0";
|
||||
|
||||
src = ./.; # directory with Hemar.g4
|
||||
|
||||
nativeBuildInputs = [
|
||||
antlr4
|
||||
jdk
|
||||
];
|
||||
|
||||
buildPhase = ''
|
||||
antlr4 HemarLexer.g4 HemarParser.g4
|
||||
javac *.java
|
||||
'';
|
||||
|
||||
installPhase = ''
|
||||
mkdir -p "$out/lib" "$out/bin"
|
||||
cp *.class *.tokens "$out/lib"
|
||||
|
||||
cat > "$out/bin/hemar-grammar" <<EOF
|
||||
#!${runtimeShell}
|
||||
CLASSPATH="$out/lib:${antlr4}/share/java/*"
|
||||
exec ${jre}/bin/java -cp "\$CLASSPATH" org.antlr.v4.gui.TestRig Hemar hemar "\$@"
|
||||
EOF
|
||||
chmod +x "$out/bin/hemar-grammar"
|
||||
'';
|
||||
};
|
||||
in
|
||||
symlinkJoin {
|
||||
name = "hemar-grammar";
|
||||
paths = [ hemar-grammar ];
|
||||
}
|
||||
@@ -310,12 +310,6 @@ find_close_pattern() {
|
||||
return 1
|
||||
fi
|
||||
;;
|
||||
'.'|'/'|b|f|n|r|t)
|
||||
;;
|
||||
']')
|
||||
;;
|
||||
u)
|
||||
;;
|
||||
*)
|
||||
if [ "${TAG_escape_flag+x}" ]; then
|
||||
if is_ws "$char"; then
|
||||
Reference in New Issue
Block a user