Files
util.nix/package/hemar/grammar/antlr/_Hemar.g4

150 lines
2.3 KiB
ANTLR

grammar Hemar;
// ----------------- parser rules -----------------
hemar
: elements? EOF
;
elements
: element+
;
element
: tag
| TEXT
;
// tag
tag
: OPEN path CLOSE
| OPEN loopStatement CLOSE
| OPEN includeHeader CLOSE
| OPEN 'end' CLOSE
| OPEN function CLOSE
| OPEN OPEN CLOSE // literal "{[" output
;
// loop tag: "for" string "in" path
loopStatement
: 'for' STRING 'in' path
;
// include tag: "include" path
includeHeader
: 'include' path
;
// function tag
function
: 'compute' language functionBody? // "compute" language body
| 'compute' '-' functionBody? // "compute" - body
;
language
: 'dash'
| 'plpgsql'
;
// everything up to (but not including) "]}"
// (raw body, including "{[" etc, at *token* level)
functionBody
: ( ~CLOSE )*
;
// path
path
: '.'
| segmentedPath
;
segmentedPath
: segment ('.' segment)*
;
segment
: STRING
| index
;
// index: \0 .. \9, \1.. \9\d*, and negative forms
index
: '\\' DIGIT
| '\\' ONENINE DIGITS?
| '\\' '-' DIGIT
| '\\' '-' ONENINE DIGITS?
;
// ----------------- lexer rules -----------------
OPEN : '{[';
CLOSE : ']}';
// text outside tags: anything except the "{[" sequence
TEXT
: TEXT_CHAR+
;
/*
* Strings used in paths/loop variables:
* "..." with escapes similar to your spec.
*/
STRING
: '"' ( ESC | STRING_CHAR )* '"'
;
fragment STRING_CHAR
: ~["\\\r\n]
;
/*
* Escapes:
* . (literal dot)
* ]} (literal "]}") -- note this is two chars after '\'
* " \"
* \ \\
* / \/
* b f n r t
* uXXXX (hex)
* whitespace after backslash (your ws-in-escape)
*/
fragment ESC
: '\\'
(
'.'
| ']}'
| '"'
| '\\'
| '/'
| 'b'
| 'f'
| 'n'
| 'r'
| 't'
| 'u' HEX HEX HEX HEX
| WS_CHAR
)
;
// digits / hex
DIGITS : DIGIT+ ;
DIGIT : [0-9] ;
ONENINE: [1-9] ;
HEX : [0-9a-fA-F] ;
// whitespace for normal lexing
WS
: [ \t\r\n]+ -> skip
;
// whitespace used inside escapes
fragment WS_CHAR
: [ \t\r\n]
;
fragment TEXT_CHAR
: ~'{' // any except '{'
| '{' ~'[' // '{' only if not starting OPEN
;