From 4729770b59a68255c6d1b0acac5a00283af48b53 Mon Sep 17 00:00:00 2001 From: yukkop Date: Tue, 25 Nov 2025 13:19:45 +0000 Subject: [PATCH] feat(package): `hemar`: some stages of parsing --- package/hemar/default.nix | 2 + package/hemar/hemar.sh | 276 +++++++++++++++++---- package/hemar/src/plex/backend/yq-go.sh | 16 +- package/hemar/test/plex/jq_backend.sh | 16 +- package/hemar/test/plex/jq_backend_time.sh | 1 - test/package/default.nix | 4 +- test/package/hemar/default.nix | 48 ++++ test/package/hemar/lauch.sh | 24 ++ test/package/hemar/test/first.sh | 32 +++ 9 files changed, 359 insertions(+), 60 deletions(-) create mode 100644 test/package/hemar/default.nix create mode 100644 test/package/hemar/lauch.sh create mode 100644 test/package/hemar/test/first.sh diff --git a/package/hemar/default.nix b/package/hemar/default.nix index 0696d25..4bf0b5e 100644 --- a/package/hemar/default.nix +++ b/package/hemar/default.nix @@ -12,6 +12,7 @@ let runtimeInputs = [ ]; text = '' + # shellcheck disable=SC2034 WORKSPACE=${./.} ${builtins.readFile hectic.helpers.posix-shell.log} ${builtins.readFile ./test.sh} @@ -24,6 +25,7 @@ let runtimeInputs = [ ]; text = '' + # shellcheck disable=SC2034 WORKSPACE=${./.} ${builtins.readFile hectic.helpers.posix-shell.log} ${builtins.readFile ./hemar.sh} diff --git a/package/hemar/hemar.sh b/package/hemar/hemar.sh index 43c7566..722bda0 100644 --- a/package/hemar/hemar.sh +++ b/package/hemar/hemar.sh @@ -1,8 +1,6 @@ #!/bin/dash -# shellcheck disable=SC1091 -. "${WORKSPACE:?}/src/plex/plex.sh" -init_plex jq +log notice "running" # Syntax scheme: # @@ -118,10 +116,10 @@ init_plex jq # # paterns # ws # "" -# '0020' ws -# '000A' ws -# '000D' ws -# '0009' ws +# '\x20' ws +# '\x0a' ws +# '\x0d' ws +# '\x09' ws # # nopatern # '{' '0020' . '10FFFF' - '[' @@ -167,7 +165,12 @@ init_plex jq # AbstarctSyntaxTree (ATS) = { # e = [Element] # elements array # } -#AST='' +AST=$(mktemp) +trap 'rm -f "$AST"' EXIT INT HUP + +yq -o j -i '.' "$AST" + +log debug "AST path: ${WHITE}${AST}" # 0 - text # 1 - deside tag type @@ -176,44 +179,156 @@ init_plex jq # 4 - include # 5 - compute STAGE=0 -STAGE_BUFFER="$(mktemp)" -open_tag_flag=0 -# finds close pattern and store the char to the STAGE_BUFFER -find_close_pattern() { - char="${1:?}" - if [ "${close_tag_flag:?}" -eq 0 ] && [ "$char" = ']' ]; then - close_tag_flag=1 - elif [ "${close_tag_flag:?}" -eq 1 ] && [ "$char" = '}' ]; then - close_tag_flag=0 +# is_ws(char) -> bool +is_ws() { + ord=$(printf '%d' "'$1") + case $ord in + 32|10|13|9) # <-> \x20 | \x0a | \x0d | \x09 <-> space | \n | \r | \t + return 0 + ;; + esac + return 1 +} - # removes first and last white spaces from the buffer - sed -i 's/[[:space:]]$//g' "$STAGE_BUFFER" - sed -i 's/^[[:space:]]//g' "$STAGE_BUFFER" - - # removes last char from buffer (]) is part of close pattern - truncate -s -1 "$STAGE_BUFFER" - return 0 +# remove_last_double_quote(text) -> text +remove_last_double_quote() { + printf '%s' "$1" | sed 's/\(.*\)"\(.*\)/\1\2/' +} + +#buf_read(buf?) -> text +buf_read() { + local buf + if [ ${1+x} ]; then + buf=${1} else - printf '%s' "$char" >> "$STAGE_BUFFER" + buf=${CURRENT_STAGE_BUFFER} + fi + + cat "$buf" +} + +#buf_next() +buf_next() { + case "$CURRENT_STAGE_BUFFER" in + "$STAGE_BUFFER_1") + CURRENT_STAGE_BUFFER="$STAGE_BUFFER_2" + ;; + "$STAGE_BUFFER_2") + CURRENT_STAGE_BUFFER="$STAGE_BUFFER_3" + ;; + "$STAGE_BUFFER_3") + CURRENT_STAGE_BUFFER="$STAGE_BUFFER_4" + ;; + "$STAGE_BUFFER_4") + CURRENT_STAGE_BUFFER="$STAGE_BUFFER_1" + ;; + esac +} + +buf_reset() { + : > "$STAGE_BUFFER_1" + : > "$STAGE_BUFFER_2" + : > "$STAGE_BUFFER_3" + : > "$STAGE_BUFFER_4" + + CURRENT_STAGE_BUFFER="$STAGE_BUFFER_1" +} + +STAGE_BUFFER_1="$(mktemp)" +STAGE_BUFFER_2="$(mktemp)" +STAGE_BUFFER_3="$(mktemp)" +STAGE_BUFFER_4="$(mktemp)" +CURRENT_STAGE_BUFFER=$STAGE_BUFFER_1 +trap 'rm -f "$STAGE_BUFFER_1" "$STAGE_BUFFER_2" "$STAGE_BUFFER_3" "$STAGE_BUFFER_4"' EXIT INT HUP +log debug "stage buffer 1: ${WHITE}$STAGE_BUFFER_1" +log debug "stage buffer 2: ${WHITE}$STAGE_BUFFER_2" +log debug "stage buffer 3: ${WHITE}$STAGE_BUFFER_3" +log debug "stage buffer 4: ${WHITE}$STAGE_BUFFER_4" + +# json_escape(value) -> str +json_escape() { + # TODO: escape functionality + printf '%s' "${1}" | sed 's/"/\\"/g' +} + +# finds close pattern and store the char to the stage buffers separating by spaces +find_close_pattern() { + local char="${1:?}" + + regular_char() { + [ ${TAG_ws_started+x} ] && { + log debug 'b?' + unset TAG_ws_started + if [ "${TAG_first_ws_handled+x}" ]; then + buf_next + else + TAG_first_ws_handled=1 + fi + } + printf '%s' "$1" >> "$CURRENT_STAGE_BUFFER" + } + + if [ ! "${TAG_close_tag_flag+x}" ] && [ "$char" = ']' ]; then + TAG_close_tag_flag=1 + elif [ "${TAG_close_tag_flag+x}" ]; then + unset TAG_close_tag_flag + if [ "$char" = '}' ]; then + # removes first and last white spaces from the buffer + sed -i 's/[[:space:]]$//g' "$CURRENT_STAGE_BUFFER" + sed -i 's/^[[:space:]]//g' "$CURRENT_STAGE_BUFFER" + + return 0 + else + regular_char ']'"$char" + fi + else + # shellcheck disable=SC1003 + if [ "$char" = '\' ]; then + TAG_escape_flag=1 + fi + if [ "$char" = '"' ]; then + if [ ${TAG_escape_flag+x} ]; then + unset TAG_escape_flag + else + if [ ${TAG_double_quote_flag+x} ]; then + unset TAG_double_quote_flag + else + TAG_double_quote_flag=1 + fi + fi + fi + + if is_ws "$char"; then + if [ "${TAG_double_quote_flag+x}" ]; then + regular_char "$char" + else + TAG_ws_started=1 + fi + else + regular_char "$char" + fi fi return 1 } -# finds open pattern and stores the char to the STAGE_BUFFER +# finds open pattern and stores the char to the STAGE_BUFFER_1 find_open_pattern() { - char="${1:?}" - if [ "${open_tag_flag:?}" -eq 0 ] && [ "$char" = '{' ]; then + local char="${1:?}" + if [ ! "${open_tag_flag+x}" ] && [ "$char" = '{' ]; then open_tag_flag=1 - elif [ "${open_tag_flag:?}" -eq 1 ] && [ "$char" = '[' ]; then - open_tag_flag=0 - - # removes last char from buffer ({) is part of open pattern - truncate -s -1 "$STAGE_BUFFER" - return 0 + elif [ "${open_tag_flag+x}" ]; then + unset open_tag_flag + if [ "$char" = '[' ]; then + # removes last char from buffer ({) is part of open pattern + truncate -s -1 "$CURRENT_STAGE_BUFFER" + return 0 + else + printf '{%s' "$char" >> "$CURRENT_STAGE_BUFFER" + fi else - printf '%s' "$char" >> "$STAGE_BUFFER" + printf '%s' "$char" >> "$CURRENT_STAGE_BUFFER" fi return 1 @@ -222,39 +337,86 @@ find_open_pattern() { parse() { char="$1" - data_pointer= - case "$STAGE" in + # Text Stage - save char in STAGE_BUFFER_1 until next tag opens 0) if find_open_pattern "$char"; then - plex_set "$data_pointer"'' + log debug "open pattern founded" + buf=$(cat "$CURRENT_STAGE_BUFFER") + yq -o j -i ". += [{ + \"type\": \"text\", + \"value\": \"$(json_escape "$buf")\" + }]" "$AST" + + buf_reset STAGE=1 fi - ;; + ;; 1) if find_close_pattern "$char"; then - STAGE=0 - fi - ;; - 2) + case "$STAGE_BUFFER_1" in + compute) + ;; + include) + ;; + for) + ;; + end) + ;; + *) # interpolation tag + buf=$(cat "$STAGE_BUFFER_1") + yq -o j -i ". += [{ + \"type\": \"interpolation\", + \"path\": \"$(json_escape "$buf")\" + }]" "$AST" + ;; + esac - ;; + # zero-initialization + unset TAG_ws_started TAG_double_quote_flag TAG_escape_flag TAG_first_ws_handled TAG_close_tag_flag + + buf_reset + STAGE=1 + fi + ;; + 2) + + ;; 3) - ;; + ;; 4) - ;; + ;; *) - - ;; + log error "error: ${WHITE}impossible stage" + exit 13 + ;; esac } +while [ $# -gt 0 ]; do + case $1 in + -c|--compact-output) + OUTPUT_ARGS="${OUTPUT_ARGS+$OUTPUT_ARGS }-I=0" + shift + ;; + --*|-*) + log error "argument $1 does not exists" + exit 9 + ;; + *) + log error "subcommand $1 does not exists" + exit 9 + ;; + esac +done + # Using dd to read one character at a time input=$(cat) i=1 while :; do + #log trace "loop" char=$(printf '%s' "$input" | dd bs=1 skip=$((i-1)) count=1 2>/dev/null) [ -z "$char" ] && break @@ -262,3 +424,21 @@ while :; do i=$((i+1)) done + +# finish TEXT tag if file ends on it +if [ "$STAGE" -eq 0 ]; then + if [ "${open_tag_flag+x}" ]; then + unset open_tag_flag + printf '{' >> "$STAGE_BUFFER_1" + fi + + buf=$(cat "$STAGE_BUFFER_1") + yq -o j -i ". += [{ + \"type\": \"text\", + \"value\": \"$(json_escape "$buf")\" + }]" "$AST" +fi + +# return the output +# shellcheck disable=SC2086 +yq ${OUTPUT_ARGS:-} -o j "$AST" diff --git a/package/hemar/src/plex/backend/yq-go.sh b/package/hemar/src/plex/backend/yq-go.sh index 9386ba7..6787b4e 100644 --- a/package/hemar/src/plex/backend/yq-go.sh +++ b/package/hemar/src/plex/backend/yq-go.sh @@ -6,35 +6,35 @@ trap 'rm -rf $PLEX_TEMP' EXIT #plex_set(name, key, value) plex_set() { local plexfile key val - plexfile="${PLEX_TEMP:?}/${1:?}" key="${2:?}" val="${3:?}" + plexfile="${PLEX_TEMP:?}/${1:?}.json" key="${2:?}" val="${3:?}" touch "$plexfile" - yq -i ".$key += \"$val\"" "$plexfile" + yq -i ".$key = \"$val\"" "$plexfile" } -#plex_child(plexfile, key) +#plex_child(name, key) plex_child() { plex_fetch "${1:?}" "${2:?}" } -#plex_val(plexfile, key) +#plex_val(name, key) plex_val() { plex_fetch "${1:?}" "${2:?}" } -#plex_val(plexfile, key) +#plex_val(name, key) plex_fetch() { local plexfile key - plexfile="${PLEX_TEMP:?}/${1:?}" key="${2:?}" + plexfile="${PLEX_TEMP:?}/${1:?}.json" key="${2:?}" - yq -i ".$key" "$plexfile" + yq -r ".$key" "$plexfile" } #plex_push(name, prefix, val) plex_push() { local plexfile prefix val - plexfile="${PLEX_TEMP:?}/${1:?}" prefix="${2:?}" val="${3:?}" + plexfile="${PLEX_TEMP:?}/${1:?}.json" prefix="${2:?}" val="${3:?}" yq -i ".$prefix += [\"$val\"]" "$plexfile" } diff --git a/package/hemar/test/plex/jq_backend.sh b/package/hemar/test/plex/jq_backend.sh index 4e76707..c4c02b3 100644 --- a/package/hemar/test/plex/jq_backend.sh +++ b/package/hemar/test/plex/jq_backend.sh @@ -2,6 +2,18 @@ init_plex yq-go plex_set ZALUPA zalupa apulaz -log error "struct: $WHITE$(yq . "$PLEX_TEMP/ZALUPA")$NC" +log error "struct:\n$WHITE$(yq . "$PLEX_TEMP/ZALUPA.json")$NC" -plex_set ZALUPA zalupa.zalupa apulaz +plex_set ZALUPA kek.zalupa apulaz + +log error "struct:\n$WHITE$(yq . "$PLEX_TEMP/ZALUPA.json")$NC" + +plex_set ZALUPA zalupa apulaz + +log error "struct:\n$WHITE$(yq . "$PLEX_TEMP/ZALUPA.json")$NC" + +plex_val ZALUPA zalupa + +plex_child ZALUPA kek + +plex_fetch ZALUPA kek diff --git a/package/hemar/test/plex/jq_backend_time.sh b/package/hemar/test/plex/jq_backend_time.sh index 0b7b0de..02b83df 100644 --- a/package/hemar/test/plex/jq_backend_time.sh +++ b/package/hemar/test/plex/jq_backend_time.sh @@ -74,7 +74,6 @@ bench_set() { set +e plex_set 'MY_STRUCT' "$key" "$i" error_code=$? - log warning "error_code: $error_code" set -e if [ $error_code != 0 ]; then log error "key: $WHITE$key$NC, i: $WHITE$i$NC, struct: $WHITE$(jq . "$PLEX_TEMP/MY_STRUCT")$NC" diff --git a/test/package/default.nix b/test/package/default.nix index 0436afa..e786876 100644 --- a/test/package/default.nix +++ b/test/package/default.nix @@ -1 +1,3 @@ -{ system, inputs, self, pkgs }: (import ./migrator { inherit system inputs self pkgs; }) +{ system, inputs, self, pkgs }: + (import ./migrator { inherit system inputs self pkgs; }) // + (import ./hemar { inherit system inputs self pkgs; }) diff --git a/test/package/hemar/default.nix b/test/package/hemar/default.nix new file mode 100644 index 0000000..e6a2b09 --- /dev/null +++ b/test/package/hemar/default.nix @@ -0,0 +1,48 @@ +{ inputs, self, pkgs, system, ... }: let + lib = inputs.nixpkgs.lib; + + # turn anything under ./test into a derivation that exposes $out/run.sh + mkTestDrv = name: type: + if type == "directory" then + pkgs.runCommand "test-${name}" {} '' + if ! [ -f ${./test + "/${name}" + /run.sh} ]; then + echo no run.sh in test/${name} + exit 1 + fi + + mkdir -p "$out" + cp -r ${./test + "/${name}"}/* "$out/" + chmod +x "$out/run.sh" + '' + else if lib.hasSuffix ".sh" name then + pkgs.runCommand "test-${lib.removeSuffix ".sh" name}" {} '' + mkdir -p "$out" + install -Dm755 ${./test + "/${name}"} "$out/run.sh" + '' + else + null; + + testDir = builtins.readDir ./test; + + # attrset: testName -> drv with run.sh + testDrvs = + lib.mapAttrs' (n: v: + lib.nameValuePair (lib.removeSuffix ".sh" n) v + ) (lib.filterAttrs (_: v: v != null) + (lib.mapAttrs (n: t: mkTestDrv n t) testDir)); + + hemar = self.packages.${system}.hemar; + mkPgTest = testName: testDrv: pkgs.runCommand "hemar-test-${testName}" + { + nativeBuildInputs = [ pkgs.coreutils pkgs.gnugrep pkgs.gnused ]; + buildInputs = [ hemar pkgs.yq-go ]; + } '' + ${builtins.readFile self.legacyPackages.${system}.helpers.posix-shell.log} + test=${testDrv} + ${builtins.readFile ./lauch.sh} + + # success marker for Nix + # shellcheck disable=SC2154 + mkdir -p "$out" + ''; +in lib.mapAttrs (name: drv: mkPgTest name drv) testDrvs diff --git a/test/package/hemar/lauch.sh b/test/package/hemar/lauch.sh new file mode 100644 index 0000000..0bd2ed2 --- /dev/null +++ b/test/package/hemar/lauch.sh @@ -0,0 +1,24 @@ +#!/bin/dash + +# $test - test and assertion file + +json_diff() { + temp1=$(mktemp) + temp2=$(mktemp) + + yq -I=0 -o=j -n "$1" >"$temp1" + yq -I=0 -o=j -n "$2" >"$temp2" + + if ! diff -q "$temp1" "$temp2"; then + log error "$(yq -o=j -n "$1")" and "$(yq -o=j -n "$2")" + exit 1 + fi +} + +# run test +mkdir './test' +# shellcheck disable=SC2154 +cp -r "$test"/* './test/' +# shellcheck disable=SC2164 +cd './test' +. './run.sh' diff --git a/test/package/hemar/test/first.sh b/test/package/hemar/test/first.sh new file mode 100644 index 0000000..a536ad8 --- /dev/null +++ b/test/package/hemar/test/first.sh @@ -0,0 +1,32 @@ +answer="$(echo 'some text' | hemar -c)" + +expected="$(printf '[ + { + "type": "text", + "value": "some text" + } +]')" + +json_diff "$answer" "$expected" + +answer="$(echo 'some [] {} text' | hemar -c)" + +expected="$(printf '[ + { + "type": "text", + "value": "some [] {} text" + } +]')" + +json_diff "$answer" "$expected" + +answer="$(echo 'some {' | hemar -c)" + +expected="$(printf '[ + { + "type": "text", + "value": "some {" + } +]')" + +json_diff "$answer" "$expected"