feat(package): hemar: antlr grammar, but still does not work

This commit is contained in:
2025-11-26 14:48:51 +00:00
parent 8a08272f68
commit 777d48bf3d
16 changed files with 282 additions and 7 deletions

View File

@@ -142,7 +142,8 @@ in {
onlinepubs2man = pkgs.callPackage ./onlinepubs2man {};
migrator = pkgs.callPackage ./migrator {};
nbt2json = pkgs.callPackage ./nbt2json {};
hemar = pkgs.callPackage ./hemar {};
hemar-parser = pkgs.callPackage ./hemar/parser {};
hemar-grammar = pkgs.callPackage ./hemar/grammar {};
pg-17-ext-http = buildHttpExt pkgs "17";
pg-17-ext-smtp-client = buildSmtpExt pkgs "17";
pg-17-ext-plhaskell = buildPlHaskellExt pkgs "17";

View File

@@ -0,0 +1,28 @@
grammar Hemar;
// ----------------- parser rules -----------------
hemar: elements? EOF ;
elements: element+ ;
element
: segment
| interpoltion
;
segment : for elements? end ;
for : 'for' 'in' ;
end: 'end' ;
interpoltion : 'mcha' ;
OPEN : '{[' ;
CLOSE : ']}' ;
WS : [ \t\n\r]+ -> skip ;
LEADING_TEXT : { getCharPositionInLine() == 0 }? (~'{'|'{'~'[')* OPEN -> skip;
MIDLE_TEXT : CLOSE (~'{'|'{'~'[')* OPEN -> skip;
ENDING_TEXT : CLOSE (~'{'|'{'~'[')* EOF -> skip ;

View File

@@ -0,0 +1,49 @@
lexer grammar HemarLexer;
// ---------- default mode: plain text ----------
// Everything that is not the start of "{[" is TEXT
TEXT
: ( ~'{' | '{' ~'[' )+
;
// When we see "{[", emit LeftBrace and enter TAG mode
LeftBrace
: '{[' -> pushMode(TAG)
;
// skip whitespace in plain text if you want
SKIP_WS
: [ \t\r\n]+ -> skip
;
// ---------- TAG mode: inside {[ ... ]} ----------
mode TAG;
fragment WS: [ \t\r\n] ;
For : 'for';
In : 'in';
End : 'end';
// identifier inside tag
Path
: String
| String '.' Path
;
String
: ( ~[.\] \t\r\n] | ']' ~[}. \t\r\n] )+
| '"' ( ~'"' | '\\' '"' )+ '"'
;
// closing "]}": emit RightBrace and go back to default mode
RightBrace
: ']}' -> popMode
;
// skip whitespace inside tag
SKIP_TAG_WS
: WS+ -> skip
;

View File

@@ -0,0 +1,18 @@
parser grammar HemarParser;
options { tokenVocab=HemarLexer; }
hemar : element*? EOF ;
element
: TEXT
| segment
| interpoltion
;
segment : for element*? end;
for : LeftBrace For Path In Path RightBrace;
end : LeftBrace End RightBrace;
interpoltion : LeftBrace Path RightBrace;

View File

@@ -0,0 +1,149 @@
grammar Hemar;
// ----------------- parser rules -----------------
hemar
: elements? EOF
;
elements
: element+
;
element
: tag
| TEXT
;
// tag
tag
: OPEN path CLOSE
| OPEN loopStatement CLOSE
| OPEN includeHeader CLOSE
| OPEN 'end' CLOSE
| OPEN function CLOSE
| OPEN OPEN CLOSE // literal "{[" output
;
// loop tag: "for" string "in" path
loopStatement
: 'for' STRING 'in' path
;
// include tag: "include" path
includeHeader
: 'include' path
;
// function tag
function
: 'compute' language functionBody? // "compute" language body
| 'compute' '-' functionBody? // "compute" - body
;
language
: 'dash'
| 'plpgsql'
;
// everything up to (but not including) "]}"
// (raw body, including "{[" etc, at *token* level)
functionBody
: ( ~CLOSE )*
;
// path
path
: '.'
| segmentedPath
;
segmentedPath
: segment ('.' segment)*
;
segment
: STRING
| index
;
// index: \0 .. \9, \1.. \9\d*, and negative forms
index
: '\\' DIGIT
| '\\' ONENINE DIGITS?
| '\\' '-' DIGIT
| '\\' '-' ONENINE DIGITS?
;
// ----------------- lexer rules -----------------
OPEN : '{[';
CLOSE : ']}';
// text outside tags: anything except the "{[" sequence
TEXT
: TEXT_CHAR+
;
/*
* Strings used in paths/loop variables:
* "..." with escapes similar to your spec.
*/
STRING
: '"' ( ESC | STRING_CHAR )* '"'
;
fragment STRING_CHAR
: ~["\\\r\n]
;
/*
* Escapes:
* . (literal dot)
* ]} (literal "]}") -- note this is two chars after '\'
* " \"
* \ \\
* / \/
* b f n r t
* uXXXX (hex)
* whitespace after backslash (your ws-in-escape)
*/
fragment ESC
: '\\'
(
'.'
| ']}'
| '"'
| '\\'
| '/'
| 'b'
| 'f'
| 'n'
| 'r'
| 't'
| 'u' HEX HEX HEX HEX
| WS_CHAR
)
;
// digits / hex
DIGITS : DIGIT+ ;
DIGIT : [0-9] ;
ONENINE: [1-9] ;
HEX : [0-9a-fA-F] ;
// whitespace for normal lexing
WS
: [ \t\r\n]+ -> skip
;
// whitespace used inside escapes
fragment WS_CHAR
: [ \t\r\n]
;
fragment TEXT_CHAR
: ~'{' // any except '{'
| '{' ~'[' // '{' only if not starting OPEN
;

View File

@@ -0,0 +1,36 @@
{ stdenv, symlinkJoin, jre, antlr4, runtimeShell, jdk }:
let
hemar-grammar = stdenv.mkDerivation {
pname = "hemar-grammar";
version = "0.1.0";
src = ./.; # directory with Hemar.g4
nativeBuildInputs = [
antlr4
jdk
];
buildPhase = ''
antlr4 HemarLexer.g4 HemarParser.g4
javac *.java
'';
installPhase = ''
mkdir -p "$out/lib" "$out/bin"
cp *.class *.tokens "$out/lib"
cat > "$out/bin/hemar-grammar" <<EOF
#!${runtimeShell}
CLASSPATH="$out/lib:${antlr4}/share/java/*"
exec ${jre}/bin/java -cp "\$CLASSPATH" org.antlr.v4.gui.TestRig Hemar hemar "\$@"
EOF
chmod +x "$out/bin/hemar-grammar"
'';
};
in
symlinkJoin {
name = "hemar-grammar";
paths = [ hemar-grammar ];
}

View File

@@ -310,12 +310,6 @@ find_close_pattern() {
return 1
fi
;;
'.'|'/'|b|f|n|r|t)
;;
']')
;;
u)
;;
*)
if [ "${TAG_escape_flag+x}" ]; then
if is_ws "$char"; then