feat(package): hemar: some stages of parsing

This commit is contained in:
2025-11-25 13:19:45 +00:00
parent f5d412997e
commit 4729770b59
9 changed files with 359 additions and 60 deletions

View File

@@ -12,6 +12,7 @@ let
runtimeInputs = [ ]; runtimeInputs = [ ];
text = '' text = ''
# shellcheck disable=SC2034
WORKSPACE=${./.} WORKSPACE=${./.}
${builtins.readFile hectic.helpers.posix-shell.log} ${builtins.readFile hectic.helpers.posix-shell.log}
${builtins.readFile ./test.sh} ${builtins.readFile ./test.sh}
@@ -24,6 +25,7 @@ let
runtimeInputs = [ ]; runtimeInputs = [ ];
text = '' text = ''
# shellcheck disable=SC2034
WORKSPACE=${./.} WORKSPACE=${./.}
${builtins.readFile hectic.helpers.posix-shell.log} ${builtins.readFile hectic.helpers.posix-shell.log}
${builtins.readFile ./hemar.sh} ${builtins.readFile ./hemar.sh}

View File

@@ -1,8 +1,6 @@
#!/bin/dash #!/bin/dash
# shellcheck disable=SC1091 log notice "running"
. "${WORKSPACE:?}/src/plex/plex.sh"
init_plex jq
# Syntax scheme: # Syntax scheme:
# #
@@ -118,10 +116,10 @@ init_plex jq
# # paterns # # paterns
# ws # ws
# "" # ""
# '0020' ws # '\x20' ws
# '000A' ws # '\x0a' ws
# '000D' ws # '\x0d' ws
# '0009' ws # '\x09' ws
# #
# nopatern # nopatern
# '{' '0020' . '10FFFF' - '[' # '{' '0020' . '10FFFF' - '['
@@ -167,7 +165,12 @@ init_plex jq
# AbstarctSyntaxTree (ATS) = { # AbstarctSyntaxTree (ATS) = {
# e = [Element] # elements array # e = [Element] # elements array
# } # }
#AST='' AST=$(mktemp)
trap 'rm -f "$AST"' EXIT INT HUP
yq -o j -i '.' "$AST"
log debug "AST path: ${WHITE}${AST}"
# 0 - text # 0 - text
# 1 - deside tag type # 1 - deside tag type
@@ -176,44 +179,156 @@ init_plex jq
# 4 - include # 4 - include
# 5 - compute # 5 - compute
STAGE=0 STAGE=0
STAGE_BUFFER="$(mktemp)"
open_tag_flag=0
# finds close pattern and store the char to the STAGE_BUFFER # is_ws(char) -> bool
find_close_pattern() { is_ws() {
char="${1:?}" ord=$(printf '%d' "'$1")
if [ "${close_tag_flag:?}" -eq 0 ] && [ "$char" = ']' ]; then case $ord in
close_tag_flag=1 32|10|13|9) # <-> \x20 | \x0a | \x0d | \x09 <-> space | \n | \r | \t
elif [ "${close_tag_flag:?}" -eq 1 ] && [ "$char" = '}' ]; then return 0
close_tag_flag=0 ;;
esac
return 1
}
# removes first and last white spaces from the buffer # remove_last_double_quote(text) -> text
sed -i 's/[[:space:]]$//g' "$STAGE_BUFFER" remove_last_double_quote() {
sed -i 's/^[[:space:]]//g' "$STAGE_BUFFER" printf '%s' "$1" | sed 's/\(.*\)"\(.*\)/\1\2/'
}
# removes last char from buffer (]) is part of close pattern
truncate -s -1 "$STAGE_BUFFER" #buf_read(buf?) -> text
return 0 buf_read() {
local buf
if [ ${1+x} ]; then
buf=${1}
else else
printf '%s' "$char" >> "$STAGE_BUFFER" buf=${CURRENT_STAGE_BUFFER}
fi
cat "$buf"
}
#buf_next()
buf_next() {
case "$CURRENT_STAGE_BUFFER" in
"$STAGE_BUFFER_1")
CURRENT_STAGE_BUFFER="$STAGE_BUFFER_2"
;;
"$STAGE_BUFFER_2")
CURRENT_STAGE_BUFFER="$STAGE_BUFFER_3"
;;
"$STAGE_BUFFER_3")
CURRENT_STAGE_BUFFER="$STAGE_BUFFER_4"
;;
"$STAGE_BUFFER_4")
CURRENT_STAGE_BUFFER="$STAGE_BUFFER_1"
;;
esac
}
buf_reset() {
: > "$STAGE_BUFFER_1"
: > "$STAGE_BUFFER_2"
: > "$STAGE_BUFFER_3"
: > "$STAGE_BUFFER_4"
CURRENT_STAGE_BUFFER="$STAGE_BUFFER_1"
}
STAGE_BUFFER_1="$(mktemp)"
STAGE_BUFFER_2="$(mktemp)"
STAGE_BUFFER_3="$(mktemp)"
STAGE_BUFFER_4="$(mktemp)"
CURRENT_STAGE_BUFFER=$STAGE_BUFFER_1
trap 'rm -f "$STAGE_BUFFER_1" "$STAGE_BUFFER_2" "$STAGE_BUFFER_3" "$STAGE_BUFFER_4"' EXIT INT HUP
log debug "stage buffer 1: ${WHITE}$STAGE_BUFFER_1"
log debug "stage buffer 2: ${WHITE}$STAGE_BUFFER_2"
log debug "stage buffer 3: ${WHITE}$STAGE_BUFFER_3"
log debug "stage buffer 4: ${WHITE}$STAGE_BUFFER_4"
# json_escape(value) -> str
json_escape() {
# TODO: escape functionality
printf '%s' "${1}" | sed 's/"/\\"/g'
}
# finds close pattern and store the char to the stage buffers separating by spaces
find_close_pattern() {
local char="${1:?}"
regular_char() {
[ ${TAG_ws_started+x} ] && {
log debug 'b?'
unset TAG_ws_started
if [ "${TAG_first_ws_handled+x}" ]; then
buf_next
else
TAG_first_ws_handled=1
fi
}
printf '%s' "$1" >> "$CURRENT_STAGE_BUFFER"
}
if [ ! "${TAG_close_tag_flag+x}" ] && [ "$char" = ']' ]; then
TAG_close_tag_flag=1
elif [ "${TAG_close_tag_flag+x}" ]; then
unset TAG_close_tag_flag
if [ "$char" = '}' ]; then
# removes first and last white spaces from the buffer
sed -i 's/[[:space:]]$//g' "$CURRENT_STAGE_BUFFER"
sed -i 's/^[[:space:]]//g' "$CURRENT_STAGE_BUFFER"
return 0
else
regular_char ']'"$char"
fi
else
# shellcheck disable=SC1003
if [ "$char" = '\' ]; then
TAG_escape_flag=1
fi
if [ "$char" = '"' ]; then
if [ ${TAG_escape_flag+x} ]; then
unset TAG_escape_flag
else
if [ ${TAG_double_quote_flag+x} ]; then
unset TAG_double_quote_flag
else
TAG_double_quote_flag=1
fi
fi
fi
if is_ws "$char"; then
if [ "${TAG_double_quote_flag+x}" ]; then
regular_char "$char"
else
TAG_ws_started=1
fi
else
regular_char "$char"
fi
fi fi
return 1 return 1
} }
# finds open pattern and stores the char to the STAGE_BUFFER # finds open pattern and stores the char to the STAGE_BUFFER_1
find_open_pattern() { find_open_pattern() {
char="${1:?}" local char="${1:?}"
if [ "${open_tag_flag:?}" -eq 0 ] && [ "$char" = '{' ]; then if [ ! "${open_tag_flag+x}" ] && [ "$char" = '{' ]; then
open_tag_flag=1 open_tag_flag=1
elif [ "${open_tag_flag:?}" -eq 1 ] && [ "$char" = '[' ]; then elif [ "${open_tag_flag+x}" ]; then
open_tag_flag=0 unset open_tag_flag
if [ "$char" = '[' ]; then
# removes last char from buffer ({) is part of open pattern # removes last char from buffer ({) is part of open pattern
truncate -s -1 "$STAGE_BUFFER" truncate -s -1 "$CURRENT_STAGE_BUFFER"
return 0 return 0
else
printf '{%s' "$char" >> "$CURRENT_STAGE_BUFFER"
fi
else else
printf '%s' "$char" >> "$STAGE_BUFFER" printf '%s' "$char" >> "$CURRENT_STAGE_BUFFER"
fi fi
return 1 return 1
@@ -222,39 +337,86 @@ find_open_pattern() {
parse() { parse() {
char="$1" char="$1"
data_pointer=
case "$STAGE" in case "$STAGE" in
# Text Stage - save char in STAGE_BUFFER_1 until next tag opens
0) 0)
if find_open_pattern "$char"; then if find_open_pattern "$char"; then
plex_set "$data_pointer"'' log debug "open pattern founded"
buf=$(cat "$CURRENT_STAGE_BUFFER")
yq -o j -i ". += [{
\"type\": \"text\",
\"value\": \"$(json_escape "$buf")\"
}]" "$AST"
buf_reset
STAGE=1 STAGE=1
fi fi
;; ;;
1) 1)
if find_close_pattern "$char"; then if find_close_pattern "$char"; then
STAGE=0 case "$STAGE_BUFFER_1" in
fi compute)
;; ;;
2) include)
;;
for)
;;
end)
;;
*) # interpolation tag
buf=$(cat "$STAGE_BUFFER_1")
yq -o j -i ". += [{
\"type\": \"interpolation\",
\"path\": \"$(json_escape "$buf")\"
}]" "$AST"
;;
esac
;; # zero-initialization
unset TAG_ws_started TAG_double_quote_flag TAG_escape_flag TAG_first_ws_handled TAG_close_tag_flag
buf_reset
STAGE=1
fi
;;
2)
;;
3) 3)
;; ;;
4) 4)
;; ;;
*) *)
log error "error: ${WHITE}impossible stage"
;; exit 13
;;
esac esac
} }
while [ $# -gt 0 ]; do
case $1 in
-c|--compact-output)
OUTPUT_ARGS="${OUTPUT_ARGS+$OUTPUT_ARGS }-I=0"
shift
;;
--*|-*)
log error "argument $1 does not exists"
exit 9
;;
*)
log error "subcommand $1 does not exists"
exit 9
;;
esac
done
# Using dd to read one character at a time # Using dd to read one character at a time
input=$(cat) input=$(cat)
i=1 i=1
while :; do while :; do
#log trace "loop"
char=$(printf '%s' "$input" | dd bs=1 skip=$((i-1)) count=1 2>/dev/null) char=$(printf '%s' "$input" | dd bs=1 skip=$((i-1)) count=1 2>/dev/null)
[ -z "$char" ] && break [ -z "$char" ] && break
@@ -262,3 +424,21 @@ while :; do
i=$((i+1)) i=$((i+1))
done done
# finish TEXT tag if file ends on it
if [ "$STAGE" -eq 0 ]; then
if [ "${open_tag_flag+x}" ]; then
unset open_tag_flag
printf '{' >> "$STAGE_BUFFER_1"
fi
buf=$(cat "$STAGE_BUFFER_1")
yq -o j -i ". += [{
\"type\": \"text\",
\"value\": \"$(json_escape "$buf")\"
}]" "$AST"
fi
# return the output
# shellcheck disable=SC2086
yq ${OUTPUT_ARGS:-} -o j "$AST"

View File

@@ -6,35 +6,35 @@ trap 'rm -rf $PLEX_TEMP' EXIT
#plex_set(name, key, value) #plex_set(name, key, value)
plex_set() { plex_set() {
local plexfile key val local plexfile key val
plexfile="${PLEX_TEMP:?}/${1:?}" key="${2:?}" val="${3:?}" plexfile="${PLEX_TEMP:?}/${1:?}.json" key="${2:?}" val="${3:?}"
touch "$plexfile" touch "$plexfile"
yq -i ".$key += \"$val\"" "$plexfile" yq -i ".$key = \"$val\"" "$plexfile"
} }
#plex_child(plexfile, key) #plex_child(name, key)
plex_child() { plex_child() {
plex_fetch "${1:?}" "${2:?}" plex_fetch "${1:?}" "${2:?}"
} }
#plex_val(plexfile, key) #plex_val(name, key)
plex_val() { plex_val() {
plex_fetch "${1:?}" "${2:?}" plex_fetch "${1:?}" "${2:?}"
} }
#plex_val(plexfile, key) #plex_val(name, key)
plex_fetch() { plex_fetch() {
local plexfile key local plexfile key
plexfile="${PLEX_TEMP:?}/${1:?}" key="${2:?}" plexfile="${PLEX_TEMP:?}/${1:?}.json" key="${2:?}"
yq -i ".$key" "$plexfile" yq -r ".$key" "$plexfile"
} }
#plex_push(name, prefix, val) #plex_push(name, prefix, val)
plex_push() { plex_push() {
local plexfile prefix val local plexfile prefix val
plexfile="${PLEX_TEMP:?}/${1:?}" prefix="${2:?}" val="${3:?}" plexfile="${PLEX_TEMP:?}/${1:?}.json" prefix="${2:?}" val="${3:?}"
yq -i ".$prefix += [\"$val\"]" "$plexfile" yq -i ".$prefix += [\"$val\"]" "$plexfile"
} }

View File

@@ -2,6 +2,18 @@
init_plex yq-go init_plex yq-go
plex_set ZALUPA zalupa apulaz plex_set ZALUPA zalupa apulaz
log error "struct: $WHITE$(yq . "$PLEX_TEMP/ZALUPA")$NC" log error "struct:\n$WHITE$(yq . "$PLEX_TEMP/ZALUPA.json")$NC"
plex_set ZALUPA zalupa.zalupa apulaz plex_set ZALUPA kek.zalupa apulaz
log error "struct:\n$WHITE$(yq . "$PLEX_TEMP/ZALUPA.json")$NC"
plex_set ZALUPA zalupa apulaz
log error "struct:\n$WHITE$(yq . "$PLEX_TEMP/ZALUPA.json")$NC"
plex_val ZALUPA zalupa
plex_child ZALUPA kek
plex_fetch ZALUPA kek

View File

@@ -74,7 +74,6 @@ bench_set() {
set +e set +e
plex_set 'MY_STRUCT' "$key" "$i" plex_set 'MY_STRUCT' "$key" "$i"
error_code=$? error_code=$?
log warning "error_code: $error_code"
set -e set -e
if [ $error_code != 0 ]; then if [ $error_code != 0 ]; then
log error "key: $WHITE$key$NC, i: $WHITE$i$NC, struct: $WHITE$(jq . "$PLEX_TEMP/MY_STRUCT")$NC" log error "key: $WHITE$key$NC, i: $WHITE$i$NC, struct: $WHITE$(jq . "$PLEX_TEMP/MY_STRUCT")$NC"

View File

@@ -1 +1,3 @@
{ system, inputs, self, pkgs }: (import ./migrator { inherit system inputs self pkgs; }) { system, inputs, self, pkgs }:
(import ./migrator { inherit system inputs self pkgs; }) //
(import ./hemar { inherit system inputs self pkgs; })

View File

@@ -0,0 +1,48 @@
{ inputs, self, pkgs, system, ... }: let
lib = inputs.nixpkgs.lib;
# turn anything under ./test into a derivation that exposes $out/run.sh
mkTestDrv = name: type:
if type == "directory" then
pkgs.runCommand "test-${name}" {} ''
if ! [ -f ${./test + "/${name}" + /run.sh} ]; then
echo no run.sh in test/${name}
exit 1
fi
mkdir -p "$out"
cp -r ${./test + "/${name}"}/* "$out/"
chmod +x "$out/run.sh"
''
else if lib.hasSuffix ".sh" name then
pkgs.runCommand "test-${lib.removeSuffix ".sh" name}" {} ''
mkdir -p "$out"
install -Dm755 ${./test + "/${name}"} "$out/run.sh"
''
else
null;
testDir = builtins.readDir ./test;
# attrset: testName -> drv with run.sh
testDrvs =
lib.mapAttrs' (n: v:
lib.nameValuePair (lib.removeSuffix ".sh" n) v
) (lib.filterAttrs (_: v: v != null)
(lib.mapAttrs (n: t: mkTestDrv n t) testDir));
hemar = self.packages.${system}.hemar;
mkPgTest = testName: testDrv: pkgs.runCommand "hemar-test-${testName}"
{
nativeBuildInputs = [ pkgs.coreutils pkgs.gnugrep pkgs.gnused ];
buildInputs = [ hemar pkgs.yq-go ];
} ''
${builtins.readFile self.legacyPackages.${system}.helpers.posix-shell.log}
test=${testDrv}
${builtins.readFile ./lauch.sh}
# success marker for Nix
# shellcheck disable=SC2154
mkdir -p "$out"
'';
in lib.mapAttrs (name: drv: mkPgTest name drv) testDrvs

View File

@@ -0,0 +1,24 @@
#!/bin/dash
# $test - test and assertion file
json_diff() {
temp1=$(mktemp)
temp2=$(mktemp)
yq -I=0 -o=j -n "$1" >"$temp1"
yq -I=0 -o=j -n "$2" >"$temp2"
if ! diff -q "$temp1" "$temp2"; then
log error "$(yq -o=j -n "$1")" and "$(yq -o=j -n "$2")"
exit 1
fi
}
# run test
mkdir './test'
# shellcheck disable=SC2154
cp -r "$test"/* './test/'
# shellcheck disable=SC2164
cd './test'
. './run.sh'

View File

@@ -0,0 +1,32 @@
answer="$(echo 'some text' | hemar -c)"
expected="$(printf '[
{
"type": "text",
"value": "some text"
}
]')"
json_diff "$answer" "$expected"
answer="$(echo 'some [] {} text' | hemar -c)"
expected="$(printf '[
{
"type": "text",
"value": "some [] {} text"
}
]')"
json_diff "$answer" "$expected"
answer="$(echo 'some {' | hemar -c)"
expected="$(printf '[
{
"type": "text",
"value": "some {"
}
]')"
json_diff "$answer" "$expected"