fix(nixos): senttinèlla-probe: module args

This commit is contained in:
2025-10-12 03:58:43 +00:00
parent 6dd7d112c5
commit dc6ca7a0a9
6 changed files with 152 additions and 46 deletions

View File

@@ -19,13 +19,15 @@ in {
probe = { probe = {
enable = lib.mkEnableOption "enable sentinèlla probe services, that provides endpoints for server status check"; enable = lib.mkEnableOption "enable sentinèlla probe services, that provides endpoints for server status check";
urls = lib.mkOption { urls = lib.mkOption {
type = lib.types.port; type = with lib.types; listOf str;
default = [];
description = '' description = ''
urls to check urls to check
''; '';
}; };
authFile = lib.mkOption { authFile = lib.mkOption {
type = lib.types.path; type = with lib.types; nullOr path;
default = null;
example = '' example = ''
config.sops.secrets."name-of-service/sentinèlla-probe".path config.sops.secrets."name-of-service/sentinèlla-probe".path
''; '';
@@ -34,19 +36,22 @@ in {
''; '';
}; };
volumes = lib.mkOption { volumes = lib.mkOption {
type = lib.types.port; type = with lib.types; listOf str;
default = [];
description = '' description = ''
volumes to check volumes to check
''; '';
}; };
port = lib.mkOption { port = lib.mkOption {
type = lib.types.port; type = lib.types.port;
default = 5988;
description = '' description = ''
service's port service's port
''; '';
}; };
environmentPath = lib.mkOption { environmentPath = lib.mkOption {
type = lib.types.path; type = with lib.types; nullOr path;
default = null;
example = '' example = ''
config.sops.secrets."name-of-service/environment".path config.sops.secrets."name-of-service/environment".path
''; '';
@@ -77,34 +82,36 @@ in {
}; };
config = lib.mkMerge [ config = lib.mkMerge [
(lib.mkIf cfg.probe.enable { (lib.mkIf cfg.probe.enable {
systemd.services."sentinèlla-probe" = { systemd.services."sentinella-probe" = {
description = "Hectic server health check"; description = "Hectic server health check";
after = [ "network.target" ]; after = [ "network.target" ];
wantedBy = [ "multi-user.target" ]; wantedBy = [ "multi-user.target" ];
serviceConfig = { serviceConfig = lib.mkMerge [
Type = "simple"; {
ExecStart = "${self.packages.${system}."sentinèlla"}/bin/probe"; Type = "simple";
EnvironmentFile = cfg.probe.environmentPath; ExecStart = "${self.packages.${system}."sentinèlla"}/bin/probe";
Environment = (if cfg.probe.urls != null then [ Environment = [
"URLS=${cfg.probe.urls}" "URLS=${lib.concatStringsSep "," cfg.probe.urls}"
] else []) ++ (if cfg.probe.volumes != null then [ "VOLUMES=${lib.concatStringsSep "," cfg.probe.volumes}"
"VOLUMES=${cfg.volumes}" "PORT=${builtins.toString cfg.probe.port}"
] else []) ++ (if cfg.probe.port != null then [ ];
"PORT=${builtins.toString cfg.probe.port}" Restart = "always";
] else []); RestartSec = "5s";
Restart = "always";
RestartSec = "5s";
# Shutdown configuration # Shutdown configuration
TimeoutStopSec = "30s"; TimeoutStopSec = "30s";
KillSignal = "SIGTERM"; KillSignal = "SIGTERM";
KillMode = "mixed"; KillMode = "mixed";
# Security and process management # Security and process management
RemainAfterExit = false; RemainAfterExit = false;
StandardOutput = "journal"; StandardOutput = "journal";
StandardError = "journal"; StandardError = "journal";
}; }
(if cfg.probe.environmentPath != null then {
EnvironmentFile = cfg.probe.environmentPath;
} else {})
];
}; };
}) })
(lib.mkIf cfg.sentinel.enable { (lib.mkIf cfg.sentinel.enable {

View File

@@ -1,3 +1,5 @@
#!/bin/dash
mod="${1:?}" mod="${1:?}"
case "$mod" in case "$mod" in

View File

@@ -1,4 +1,4 @@
{ symlinkJoin, writeShellApplication, socat, dash, hectic, curl }: { symlinkJoin, writeShellApplication, socat, dash, hectic, curl, gawk }:
let let
shell = "${dash}/bin/dash"; shell = "${dash}/bin/dash";
bashOptions = [ bashOptions = [
@@ -13,19 +13,18 @@ let
text = builtins.readFile ./base64.sh; text = builtins.readFile ./base64.sh;
}; };
# TODO: writeDashApplication
probe = hectic.writeShellApplication { probe = hectic.writeShellApplication {
inherit shell bashOptions; inherit shell bashOptions;
name = "probe"; name = "probe";
runtimeInputs = [ socat dash probe-loop ]; runtimeInputs = [ socat dash router ];
text = builtins.readFile ./probe.sh; text = builtins.readFile ./probe.sh;
}; };
probe-loop = hectic.writeShellApplication { router = hectic.writeShellApplication {
inherit shell bashOptions; inherit shell bashOptions;
name = "probe-loop"; name = "router";
runtimeInputs = [ base64 ]; runtimeInputs = [ base64 gawk ];
text = builtins.readFile ./probe-loop.sh; text = builtins.readFile ./router.sh;
}; };
sentinel = hectic.writeShellApplication { sentinel = hectic.writeShellApplication {

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env dash #!/bin/dash
socat -V >/dev/null socat -V >/dev/null
dash -c 'echo ok' >/dev/null dash -c 'echo ok' >/dev/null
socat -T5 -t5 TCP-LISTEN:"${PORT:-5988}",reuseaddr,fork EXEC:"probe-loop" socat -T5 -t5 TCP-LISTEN:"${PORT:-5988}",reuseaddr,fork EXEC:"router"

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env dash #!/bin/dash
# router.sh — POSIX sh HTTP backend (for socat) # router.sh — POSIX sh HTTP backend (for socat)
# usage: socat -T5 -t5 TCP-LISTEN:${port},reuseaddr,fork EXEC:"sh ${currentfile}" # usage: socat -T5 -t5 TCP-LISTEN:${port},reuseaddr,fork EXEC:"dash ${currentfile}"
# Routes: # Routes:
# GET /status -> check $URLS (0/0 if unset) # GET /status -> check $URLS (0/0 if unset)
# GET /disk -> check $VOLUMES (all if unset) # GET /disk -> check $VOLUMES (all if unset)

View File

@@ -1,11 +1,109 @@
#!/bin/dash #!/bin/dash
# sentinel.sh — polls probe backends (/status) and notifies on status change via Telegram
# Env:
# SERVERS="http://host1:8080,http://host2:8080"
# TOKENS="-,b64token2" # CSV aligned with SERVERS; "-" means no auth
# TOKEN="..." # Telegram bot token
# CHAT_ID="..." # Telegram chat id
# TIMEOUT=5 # curl timeout seconds (default 5)
# POLLING_INTERVAL_SEC=3 # default 3
# STATE_DIR=/tmp/sentinel # default /tmp/sentinel
TOKEN=8448534574:AAEvsdQqhUDu3RVRJWDGIVeqRmXlB0Dqn1Q set -eu
CHAT_ID=380055934
TIMEOUT=${TIMEOUT:-5}
POLLING_INTERVAL_SEC=${POLLING_INTERVAL_SEC:-3} POLLING_INTERVAL_SEC=${POLLING_INTERVAL_SEC:-3}
STATE_DIR=${STATE_DIR:-/tmp/sentinel}
SERVERS=${SERVERS:-}
TOKENS=${TOKENS:-}
TOKEN=${TOKEN:-}
CHAT_ID=${CHAT_ID:-}
while true; do [ -n "$SERVERS" ] || { printf >&2 'SERVERS not set\n'; exit 1; }
curl -s -X POST "https://api.telegram.org/bot${TOKEN}/sendMessage" \
-d "chat_id=${CHAT_ID}" \ # If TOKENS unset, synthesize "-" for each server
-d text="your message" if [ -z "$TOKENS" ]; then
n=$(printf '%s\n' "$SERVERS" | tr -cd ',' | wc -c | awk '{print $1+1}')
TOKENS=$(awk -v n="$n" 'BEGIN{for(i=1;i<=n;i++){printf("-"); if(i<n)printf(",")}}')
fi
mkdir -p "$STATE_DIR"
# --- helpers ---
# get_csv VAR idx -> echo idx-th field (1-based) from CSV string VAR
get_csv() {
# shellcheck disable=SC2001
printf '%s' "$1" | sed 's/,/\n/g' | awk -v n="$2" 'NR==n{print; exit}'
}
notify() {
msg=$1
if [ -n "$TOKEN" ] && [ -n "$CHAT_ID" ]; then
curl -sS -m "$TIMEOUT" -X POST "https://api.telegram.org/bot${TOKEN}/sendMessage" \
-d "chat_id=${CHAT_ID}" \
--data-urlencode "text=${msg}" >/dev/null || printf >&2 'notify failed: %s\n' "$msg"
else
printf >&2 '%s\n' "$msg"
fi
}
sid() { printf '%s' "$1" | cksum | awk '{print $1}'; }
parse_summary() {
sed -n 's/.*"summary":{"total":\([0-9][0-9]*\),"ok":\([0-9][0-9]*\)}.*/\1 \2/p'
}
list_failures() {
awk '
BEGIN{FS="\""; u=""; c=""}
/"url":/ {u=$4}
/"code":/ {c=$0; sub(/.*"code":/,"",c); sub(/,.*/,"",c)}
/"ok":false/ { if(u!=""){ printf "%s(%s) ", u, c; u=""; c="" } }
'
}
# --- main loop ---
while :; do
i=1
while :; do
srv=$(get_csv "$SERVERS" "$i") || true
[ -n "${srv:-}" ] || break
tok=$(get_csv "$TOKENS" "$i") || tok="-"
url="${srv%/}/status"
auth_h=""
[ "${tok}" != "-" ] && [ -n "${tok}" ] && auth_h="-H Authorization: Basic\ $tok"
tmpb=$(mktemp) || exit 1
code=$(sh -c "curl -sS -m \"$TIMEOUT\" -w '%{http_code}' -o \"$tmpb\" $auth_h \"$url\"") || code="000"
body=$(cat "$tmpb"); rm -f "$tmpb"
ok="down"; tot=0; good=0
if [ "$code" = "200" ]; then
s=$(printf '%s' "$body" | parse_summary || true)
[ -n "$s" ] && { tot=${s%% *}; good=${s#* }; }
[ "$tot" -eq "$good" ] && ok="up"
fi
msg_prefix=$( [ "$ok" = "up" ] && printf 'OK' || printf 'FAIL' )
fail_list=""
if [ "$ok" = "down" ] && [ -n "$body" ]; then
fails=$(printf '%s' "$body" | list_failures | sed 's/[ ]$//')
[ -n "$fails" ] && fail_list="${fails}"
fi
msg=$(printf '%s: %s [%s/%s]%s' "$msg_prefix" "$srv" "$good" "$tot" "$fail_list")
sfile="${STATE_DIR}/$(sid "$srv").state"
last=""; [ -f "$sfile" ] && last=$(cat "$sfile")
cur="${ok}:${good}/${tot}:${code}"
if [ "$cur" != "$last" ]; then
notify "$msg"
printf '%s' "$cur" >"$sfile"
fi
i=$((i+1))
done
sleep "$POLLING_INTERVAL_SEC"
done done