#!/bin/dash # segmented-path # segment # Syntax scheme: # # hemar # elements # # elements # element # element elements # # element # tag # text # # text # text-item # text-item text # # text-item # '0020' . '10FFFF' - '{' # nopatern # # tag # '{[' ws path ws ']}' # '{[' ws for ws ']}' # '{[' ws "done" ws ']}' # '{[' ws '{[' ws ']}' # # # loop tag # for # "for" ws string ws "in" ws path # # # path # path # '.' # segmented-path # # segmented-path # segment # segment '.' segmented-path # # segment # string # index # # index # '[' digit ']' # '[' onenine digits ']' # '[' '-' onenine ']' # '[' '-' onenine digits ']' # # # types # string # unquoted-string # quoted-string # # unquoted-string # unquoted-character # unquoted-character quoted-string # # unquoted-character # '0020' . '10FFFF' - '"' - '\' - '.' - '[' - ']' - '{' - '}' # # quoted-string # unquoted-character # unquoted-character string # # quoted-character # '0000' . '10FFFF' - '"' # '"' '"' # # digits # digit # digit digits # # digit # '0' # onenine # # onenine # '1' . '9' # # # paterns # ws # '' # '\x20' ws # '\x0a' ws # '\x0d' ws # '\x09' ws # # nopatern # '{' '0020' . '10FFFF' - '[' # AST Structure: # # The parser outputs a JSON array of elements directly (not wrapped in an object). # # Element types (currently implemented): # # Text = { # "type": "text", # "value": string # text content # } # # Interpolation = { # "type": "interpolation", # "path": string # path to variable in data model # } # # Element types (planned for MVP): # # Section = { # "type": "section", # "variable": string # item variable name for loop # "path": string # path to array for iteration # "body": [Element] # section body (nested elements) # } # # Element types (planned for future, not MVP): # # Include = { # "type": "include", # "path": string # path to template file to include # } # # Compute = { # "type": "compute", # "language": string # programming language (dash, plpgsql, etc.) # "body": string # function body # } # # AbstractSyntaxTree = [Element, ...] # array of elements # is_ws(char) -> bool is_ws() { ord=$(printf '%d' "'$1") case $ord in 32|10|13|9) # <-> \x20 | \x0a | \x0d | \x09 <-> space | \n | \r | \t return 0 ;; esac return 1 } log_buffers() { log debug "buff 1: $WHITE$(cat "$STAGE_BUFFER_1")" } # remove_last_double_quote(text) -> text remove_last_double_quote() { printf '%s' "$1" | sed 's/\(.*\)"\(.*\)/\1\2/' } #buf_read(buf?) -> text buf_read() { local buf if [ ${1+x} ]; then buf=${1} else buf=${CURRENT_STAGE_BUFFER} fi cat "$buf" } buf_reset() { : > "$STAGE_BUFFER_1" CURRENT_STAGE_BUFFER="$STAGE_BUFFER_1" } # json_escape(value) -> str json_escape() { local input="${1}" local output="" local char hex while [ -n "$input" ]; do char="${input%"${input#?}"}" # Get first character input="${input#?}" # Remove first character hex=$(printf '%d' "'$char") case "$hex" in 34) output="${output}\\\"" ;; # " 92) output="${output}\\\\" ;; # \ 10) output="${output}\\n" ;; # \n (newline) 13) output="${output}\\r" ;; # \r (carriage return) 9) output="${output}\\t" ;; # \t (tab) 8) output="${output}\\b" ;; # \b (backspace) 12) output="${output}\\f" ;; # \f (form feed) *) # NOTE(yukkop): escape control characters if they are not in the range 0x20-0x7E if [ "$hex" -lt 32 ]; then output="${output}\\u$(printf '%04x' "$hex")" else output="${output}${char}" fi ;; esac done printf '%s' "$output" } # finds close pattern and store the char to the stage buffers separating by spaces parse_tag() { local char="${1:?}" # NOTE: any return 1 - skip char, regular_char + return 1 - write char # TAG_seen_first_ws - we've already handled the first whitespace after `{[...]` # TAG_in_ws_run - we’re currently in a run of whitespace chars # TAG_pending_close - we saw `]` and are checking if the next char is `}` string_grammar() { if [ "${TAG_in_quoted_string+x}" ]; then if [ "${TAG_end_quote_pending+x}" ]; then case "$char" in '"') # Escaped quote: "" -> write single quote and continue in quoted string unset TAG_end_quote_pending printf '"' >> "$CURRENT_STAGE_BUFFER" return 1 ;; '.') # Closing quote followed by dot: exit quoted string, allow dot unset TAG_end_quote_pending unset TAG_in_quoted_string TAG_dote=1 return 1 ;; ']'|'}') # Closing quote followed by bracket: exit quoted string, let bracket be handled unset TAG_end_quote_pending unset TAG_in_quoted_string return 1 ;; *) if is_ws "$char"; then # Closing quote followed by whitespace: exit quoted string unset TAG_end_quote_pending unset TAG_in_quoted_string TAG_in_ws_run=1 return 1 else log error "unexpected character $WHITE$char$NC after closing quote on $WHITE$LINE_N$NC:$WHITE$CHAR_N" log error "expected: whitespace, dot, or end of tag" exit 1 fi ;; esac elif [ "$char" = '"' ]; then # We see a quote inside quoted string: might be closing or escaped TAG_end_quote_pending=1 return 1 else # Inside quoted string, all other chars are allowed (will be written by write_char) return 0 fi else # Not in quoted string: validate unquoted characters # shellcheck disable=SC1003 case "$char" in ']'|'}'|'"'|'\') log error "not allowed character $WHITE$char$NC on $WHITE$LINE_N$NC:$WHITE$CHAR_N" log error "try to use quoted string" exit 1 ;; '.') TAG_dote=1 return 1 ;; esac return 0 fi } write_char() { [ ${TAG_next_argument_redgect+x} ] && { log error "too many argument for tag type $WHITE${TAG_type:?}$NC on $WHITE$LINE_N$NC:$WHITE$CHAR_N$NC"; exit 1; } [ ${TAG_in_ws_run+x} ] && { unset TAG_in_ws_run if [ "${TAG_seen_first_ws+x}" ]; then log trace "tag in ws -> type: \`${TAG_type:-}\`" case "${TAG_type:-unknown}" in unknown) finalize_first_arg ;; for) # NOTE: # grammar: for i in key."subkey" ; so we know # 1st argument after `for` - string (name of variable) # 2nd - 'in' (just keyword) # 3rd - path (path to array in Model) case ${TAG_grammar_mode:-1} in string) ;; kw_in) ;; path) ;; esac ;; *) log panic 'unexpected TAG_type'; exit 13; ;; esac # NOTE: prepare to next argument buf_reset else TAG_seen_first_ws=1 fi } printf '%s' "$1" >> "$CURRENT_STAGE_BUFFER" } # Check if we need to exit quoted string due to pending quote followed by bracket if [ "${TAG_in_quoted_string+x}" ] && [ "${TAG_end_quote_pending+x}" ]; then if [ "$char" = ']' ] || [ "$char" = '}' ]; then # Closing quote followed by bracket: exit quoted string, handle bracket unset TAG_end_quote_pending unset TAG_in_quoted_string # Fall through to bracket handling fi fi if ! [ "${TAG_in_quoted_string+x}" ]; then if [ ! "${TAG_pending_close+x}" ] && [ "$char" = ']' ]; then TAG_pending_close=1 # NOTE: skip ']' but remember to check next char for a possible '}' return 1 elif [ "${TAG_pending_close+x}" ]; then unset TAG_pending_close if [ "$char" = '}' ]; then finish # NOTE: found `]}` — finish bracket parsing return 0 else # NOTE: `]` was not followed by `}`, so emit the `]` we skipped printf ']' >> "$CURRENT_STAGE_BUFFER" fi else is_ws "$char" && { TAG_in_ws_run=1; return 1; } fi fi case "${TAG_grammar_mode:-unknown}" in unknown) # NOTE: we always know grammar mode but first argument # just regular parse as string or as path if seen unquoted '.' # NOTE: this is after char's checked on ws # Quote is allowed: at start of tag (empty buffer), after whitespace, or after dot [ "${TAG_dote+x}" ] && { log panic "TAG_dote true in unknown TAG_grammar_mode"; exit 13; } if [ "$char" = '"' ]; then # Check if we're at start (empty buffer), after whitespace, or at tag start if [ ! -s "$CURRENT_STAGE_BUFFER" ] || [ "${TAG_in_ws_run+x}" ]; then [ "${TAG_in_quoted_string+x}" ] && { log panic "TAG_in_quoted_string already true"; exit 13; } TAG_in_quoted_string=1 return 1 fi # Quote in middle of unquoted string - let string_grammar reject it fi string_grammar || return 1 if [ ${TAG_dote+x} ]; then TAG_grammar_mode=path fi ;; path) # Quote is allowed: after whitespace or after dot if [ "$char" = '"' ]; then if [ "${TAG_in_ws_run+x}" ] || [ "${TAG_dote+x}" ] || [ ! -s "$CURRENT_STAGE_BUFFER" ]; then [ "${TAG_in_quoted_string+x}" ] && { log panic "TAG_in_quoted_string already true"; exit 13; } [ "${TAG_dote+x}" ] && unset TAG_dote TAG_in_quoted_string=1 return 1 fi # Quote in middle of unquoted string - let string_grammar reject it fi [ "${TAG_dote+x}" ] && unset TAG_dote string_grammar || return 1 ;; string) # Quote is allowed: after whitespace or at segment start if [ "$char" = '"' ]; then if [ "${TAG_in_ws_run+x}" ] || [ ! -s "$CURRENT_STAGE_BUFFER" ]; then [ "${TAG_in_quoted_string+x}" ] && { log panic "TAG_in_quoted_string already true"; exit 13; } TAG_in_quoted_string=1 return 1 fi # Quote in middle of unquoted string - let string_grammar reject it fi string_grammar || return 1 if [ ${TAG_dote+x} ]; then log error ". not allowed, use quote to escape it; on $WHITE$LINE_N$NC:$WHITE$CHAR_N$NC" fi ;; kw_in) ;; *) log panic 'unexpected TAG_grammar_mode'; exit 13; ;; esac write_char "$char" return 1 } finish() { case "${TAG_type:-unknown}" in unknown) finish_interpolation_tag ;; done) finish_done_tag ;; '{[') finish_bracket_tag ;; for) ;; *) log panic 'unexpected TAG_type on finish'; exit 13; ;; esac } finalize_first_arg() { log trace "finalize first arg" log trace "buffer: $(cat "$CURRENT_STAGE_BUFFER")" case "$(cat "$CURRENT_STAGE_BUFFER")" in for) TAG_type='for' # NOTE: we know that next argument after `for` is string TAG_grammar_mode=string log error 'for unimplemented' exit 13 ;; done) finish_done_tag ;; '{[') finish_bracket_tag ;; *) # interpolation tag finish_interpolation_tag ;; esac } finish_done_tag() { TAG_type='done' TAG_next_argument_redgect=1 # NOTE: Do not save {[ done ]} to the AST becouse it is useless there } finish_bracket_tag() { TAG_type='actual bracket' TAG_next_argument_redgect=1 if yq -e "${AST_key}[-1].type == \"text\"" "$AST" > /dev/null; then yq -o j -i "${AST_key}[-1].value += \"{[\"" "$AST" else yq -o j -i "$AST_key += [{ \"type\": \"text\", \"value\": \"{[\" }]" "$AST" fi } finish_interpolation_tag() { log trace 'finish interpolation tag' TAG_type='interpolation' TAG_next_argument_redgect=1 buf=$(cat "$STAGE_BUFFER_1") yq -o j -i "$AST_key += [{ \"type\": \"interpolation\", \"path\": \"$(json_escape "$buf")\" }]" "$AST" } # finds open pattern and stores the char to the STAGE_BUFFER_1 find_open_pattern() { local char="${1:?}" if [ ! "${open_tag_flag+x}" ] && [ "$char" = '{' ]; then open_tag_flag=1 elif [ "${open_tag_flag+x}" ]; then unset open_tag_flag if [ "$char" = '[' ]; then return 0 else printf '{%s' "$char" >> "$CURRENT_STAGE_BUFFER" fi else printf '%s' "$char" >> "$CURRENT_STAGE_BUFFER" fi return 1 } parse() { char="$1" case "$STAGE" in # Text Stage - save char in STAGE_BUFFER_1 until next tag opens 0) if find_open_pattern "$char"; then log trace "open pattern founded" buf=$(cat "$CURRENT_STAGE_BUFFER") # NOTE: Only add text element if buffer is not empty if [ -n "$buf" ]; then yq -o j -i "$AST_key += [{ \"type\": \"text\", \"value\": \"$(json_escape "$buf")\" }]" "$AST" fi buf_reset STAGE=1 fi ;; 1) if parse_tag "$char"; then log_buffers # zero-initialization unset TAG_seen_first_ws TAG_in_ws_run TAG_pending_close TAG_type TAG_next_argument_redgect TAG_grammar_mode TAG_in_quoted_string TAG_dote buf_reset STAGE=0 fi ;; 2) ;; 3) ;; 4) ;; *) log error "error: ${WHITE}impossible stage" exit 13 ;; esac } if [ -z "${AS_LIBRARY+x}" ]; then log notice "running" AST=$(mktemp) yq -o j -i "[]" "$AST" AST_key='.' trap 'rm -f "$AST"' EXIT INT HUP yq -o j -i '.' "$AST" log debug "AST path: ${WHITE}${AST}" # 0 - text # 1 - deside tag type # 2 - interpolation # 3 - section # 4 - include # 5 - compute STAGE=0 STAGE_BUFFER_1="$(mktemp)" CURRENT_STAGE_BUFFER=$STAGE_BUFFER_1 trap 'rm -f "$STAGE_BUFFER_1"' EXIT INT HUP log debug "stage buffer 1: ${WHITE}$STAGE_BUFFER_1" while [ $# -gt 0 ]; do case $1 in -c|--compact-output) OUTPUT_ARGS="${OUTPUT_ARGS+$OUTPUT_ARGS }-I=0" shift ;; --*|-*) log error "argument $1 does not exists" exit 9 ;; *) log error "subcommand $1 does not exists" exit 9 ;; esac done CHAR_N=1 LINE_N=1 while :; do hex="$(dd bs=1 count=1 2>/dev/null | od -An -t u1)" [ -z "$hex" ] && { break } # shellcheck disable=SC2059 char="$(printf "\\$(printf '%03o' "$hex")")" # NOTE: if $char is empty, it because `dd` returned '\n' but `$(...)` # removed it as trailing '\n', so I set $char as '\n' here [ -z "$char" ] && { LINE_N=$((LINE_N+1)) char=' ' } log trace "char: $WHITE$char" parse "${char:?}" CHAR_N=$((CHAR_N+1)) done log debug 'finishing' # finish TEXT tag if file ends on it if [ "$STAGE" -eq 0 ]; then if [ "${open_tag_flag+x}" ]; then unset open_tag_flag printf '{' >> "$STAGE_BUFFER_1" fi buf=$(cat "$STAGE_BUFFER_1") # Only add text element if buffer is not empty if [ -n "$buf" ]; then yq -o j -i "$AST_key += [{ \"type\": \"text\", \"value\": \"$(json_escape "$buf")\" }]" "$AST" fi fi # return the output # shellcheck disable=SC2086 yq ${OUTPUT_ARGS:-} -o j "$AST" fi