From 777d48bf3dbc9c16511f01ba6896117fafcac4e1 Mon Sep 17 00:00:00 2001 From: yukkop Date: Wed, 26 Nov 2025 14:48:51 +0000 Subject: [PATCH] feat(package): `hemar`: antlr grammar, but still does not work --- package/default.nix | 3 +- package/hemar/grammar/antlr/GoodTry.g4 | 28 ++++ package/hemar/grammar/antlr/HemarLexer.g4 | 49 ++++++ package/hemar/grammar/antlr/HemarParser.g4 | 18 +++ package/hemar/grammar/antlr/_Hemar.g4 | 149 ++++++++++++++++++ package/hemar/grammar/antlr/default.nix | 36 +++++ package/hemar/{ => parser}/default.nix | 0 package/hemar/{ => parser}/hemar.sh | 6 - .../{ => parser}/src/plex/backend/env_var.sh | 0 .../{ => parser}/src/plex/backend/file.sh | 0 .../{ => parser}/src/plex/backend/yq-go.sh | 0 package/hemar/{ => parser}/src/plex/plex.sh | 0 package/hemar/{ => parser}/test.sh | 0 .../test/plex/env_backend_time.sh | 0 .../{ => parser}/test/plex/jq_backend.sh | 0 .../{ => parser}/test/plex/jq_backend_time.sh | 0 16 files changed, 282 insertions(+), 7 deletions(-) create mode 100644 package/hemar/grammar/antlr/GoodTry.g4 create mode 100644 package/hemar/grammar/antlr/HemarLexer.g4 create mode 100644 package/hemar/grammar/antlr/HemarParser.g4 create mode 100644 package/hemar/grammar/antlr/_Hemar.g4 create mode 100644 package/hemar/grammar/antlr/default.nix rename package/hemar/{ => parser}/default.nix (100%) rename package/hemar/{ => parser}/hemar.sh (99%) rename package/hemar/{ => parser}/src/plex/backend/env_var.sh (100%) rename package/hemar/{ => parser}/src/plex/backend/file.sh (100%) rename package/hemar/{ => parser}/src/plex/backend/yq-go.sh (100%) rename package/hemar/{ => parser}/src/plex/plex.sh (100%) rename package/hemar/{ => parser}/test.sh (100%) rename package/hemar/{ => parser}/test/plex/env_backend_time.sh (100%) rename package/hemar/{ => parser}/test/plex/jq_backend.sh (100%) rename package/hemar/{ => parser}/test/plex/jq_backend_time.sh (100%) diff --git a/package/default.nix b/package/default.nix index 528d800..57fdd1a 100644 --- a/package/default.nix +++ b/package/default.nix @@ -142,7 +142,8 @@ in { onlinepubs2man = pkgs.callPackage ./onlinepubs2man {}; migrator = pkgs.callPackage ./migrator {}; nbt2json = pkgs.callPackage ./nbt2json {}; - hemar = pkgs.callPackage ./hemar {}; + hemar-parser = pkgs.callPackage ./hemar/parser {}; + hemar-grammar = pkgs.callPackage ./hemar/grammar {}; pg-17-ext-http = buildHttpExt pkgs "17"; pg-17-ext-smtp-client = buildSmtpExt pkgs "17"; pg-17-ext-plhaskell = buildPlHaskellExt pkgs "17"; diff --git a/package/hemar/grammar/antlr/GoodTry.g4 b/package/hemar/grammar/antlr/GoodTry.g4 new file mode 100644 index 0000000..8db8c6c --- /dev/null +++ b/package/hemar/grammar/antlr/GoodTry.g4 @@ -0,0 +1,28 @@ +grammar Hemar; + +// ----------------- parser rules ----------------- + +hemar: elements? EOF ; + +elements: element+ ; + +element + : segment + | interpoltion + ; + +segment : for elements? end ; + +for : 'for' 'in' ; + +end: 'end' ; + +interpoltion : 'mcha' ; + +OPEN : '{[' ; +CLOSE : ']}' ; + +WS : [ \t\n\r]+ -> skip ; +LEADING_TEXT : { getCharPositionInLine() == 0 }? (~'{'|'{'~'[')* OPEN -> skip; +MIDLE_TEXT : CLOSE (~'{'|'{'~'[')* OPEN -> skip; +ENDING_TEXT : CLOSE (~'{'|'{'~'[')* EOF -> skip ; diff --git a/package/hemar/grammar/antlr/HemarLexer.g4 b/package/hemar/grammar/antlr/HemarLexer.g4 new file mode 100644 index 0000000..f7abdcc --- /dev/null +++ b/package/hemar/grammar/antlr/HemarLexer.g4 @@ -0,0 +1,49 @@ +lexer grammar HemarLexer; + +// ---------- default mode: plain text ---------- + +// Everything that is not the start of "{[" is TEXT +TEXT + : ( ~'{' | '{' ~'[' )+ + ; + +// When we see "{[", emit LeftBrace and enter TAG mode +LeftBrace + : '{[' -> pushMode(TAG) + ; + +// skip whitespace in plain text if you want +SKIP_WS + : [ \t\r\n]+ -> skip + ; + +// ---------- TAG mode: inside {[ ... ]} ---------- + +mode TAG; + +fragment WS: [ \t\r\n] ; + +For : 'for'; +In : 'in'; +End : 'end'; + +// identifier inside tag +Path + : String + | String '.' Path + ; + +String + : ( ~[.\] \t\r\n] | ']' ~[}. \t\r\n] )+ + | '"' ( ~'"' | '\\' '"' )+ '"' + ; + +// closing "]}": emit RightBrace and go back to default mode +RightBrace + : ']}' -> popMode + ; + +// skip whitespace inside tag +SKIP_TAG_WS + : WS+ -> skip + ; diff --git a/package/hemar/grammar/antlr/HemarParser.g4 b/package/hemar/grammar/antlr/HemarParser.g4 new file mode 100644 index 0000000..319080c --- /dev/null +++ b/package/hemar/grammar/antlr/HemarParser.g4 @@ -0,0 +1,18 @@ +parser grammar HemarParser; + +options { tokenVocab=HemarLexer; } + +hemar : element*? EOF ; + +element + : TEXT + | segment + | interpoltion + ; + +segment : for element*? end; + +for : LeftBrace For Path In Path RightBrace; +end : LeftBrace End RightBrace; + +interpoltion : LeftBrace Path RightBrace; diff --git a/package/hemar/grammar/antlr/_Hemar.g4 b/package/hemar/grammar/antlr/_Hemar.g4 new file mode 100644 index 0000000..d7a7e5f --- /dev/null +++ b/package/hemar/grammar/antlr/_Hemar.g4 @@ -0,0 +1,149 @@ +grammar Hemar; + +// ----------------- parser rules ----------------- + +hemar + : elements? EOF + ; + +elements + : element+ + ; + +element + : tag + | TEXT + ; + +// tag +tag + : OPEN path CLOSE + | OPEN loopStatement CLOSE + | OPEN includeHeader CLOSE + | OPEN 'end' CLOSE + | OPEN function CLOSE + | OPEN OPEN CLOSE // literal "{[" output + ; + +// loop tag: "for" string "in" path +loopStatement + : 'for' STRING 'in' path + ; + +// include tag: "include" path +includeHeader + : 'include' path + ; + +// function tag +function + : 'compute' language functionBody? // "compute" language body + | 'compute' '-' functionBody? // "compute" - body + ; + +language + : 'dash' + | 'plpgsql' + ; + +// everything up to (but not including) "]}" +// (raw body, including "{[" etc, at *token* level) +functionBody + : ( ~CLOSE )* + ; + +// path +path + : '.' + | segmentedPath + ; + +segmentedPath + : segment ('.' segment)* + ; + +segment + : STRING + | index + ; + +// index: \0 .. \9, \1.. \9\d*, and negative forms +index + : '\\' DIGIT + | '\\' ONENINE DIGITS? + | '\\' '-' DIGIT + | '\\' '-' ONENINE DIGITS? + ; + +// ----------------- lexer rules ----------------- + +OPEN : '{['; +CLOSE : ']}'; + +// text outside tags: anything except the "{[" sequence +TEXT + : TEXT_CHAR+ + ; + +/* + * Strings used in paths/loop variables: + * "..." with escapes similar to your spec. + */ +STRING + : '"' ( ESC | STRING_CHAR )* '"' + ; + +fragment STRING_CHAR + : ~["\\\r\n] + ; + +/* + * Escapes: + * . (literal dot) + * ]} (literal "]}") -- note this is two chars after '\' + * " \" + * \ \\ + * / \/ + * b f n r t + * uXXXX (hex) + * whitespace after backslash (your ws-in-escape) + */ +fragment ESC + : '\\' + ( + '.' + | ']}' + | '"' + | '\\' + | '/' + | 'b' + | 'f' + | 'n' + | 'r' + | 't' + | 'u' HEX HEX HEX HEX + | WS_CHAR + ) + ; + +// digits / hex +DIGITS : DIGIT+ ; +DIGIT : [0-9] ; +ONENINE: [1-9] ; +HEX : [0-9a-fA-F] ; + +// whitespace for normal lexing +WS + : [ \t\r\n]+ -> skip + ; + +// whitespace used inside escapes +fragment WS_CHAR + : [ \t\r\n] + ; + + +fragment TEXT_CHAR + : ~'{' // any except '{' + | '{' ~'[' // '{' only if not starting OPEN + ; diff --git a/package/hemar/grammar/antlr/default.nix b/package/hemar/grammar/antlr/default.nix new file mode 100644 index 0000000..a3fcb89 --- /dev/null +++ b/package/hemar/grammar/antlr/default.nix @@ -0,0 +1,36 @@ +{ stdenv, symlinkJoin, jre, antlr4, runtimeShell, jdk }: + +let + hemar-grammar = stdenv.mkDerivation { + pname = "hemar-grammar"; + version = "0.1.0"; + + src = ./.; # directory with Hemar.g4 + + nativeBuildInputs = [ + antlr4 + jdk + ]; + + buildPhase = '' + antlr4 HemarLexer.g4 HemarParser.g4 + javac *.java + ''; + + installPhase = '' + mkdir -p "$out/lib" "$out/bin" + cp *.class *.tokens "$out/lib" + + cat > "$out/bin/hemar-grammar" <