From 6035397e9bd11d87c17086b5c2a1e87e5e483623 Mon Sep 17 00:00:00 2001 From: yukkop Date: Sun, 26 Apr 2026 21:54:07 +0000 Subject: [PATCH] =?UTF-8?q?feat(`sentin=C3=A8lla`):=20p2p=20topology=20wit?= =?UTF-8?q?h=20DNS=20peer=20discovery?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace central sentinel with watcher: each node polls peers discovered via a single DNS name with multiple A records (e.g. peers.sentinella.com) - Auto-detect own IPs via hostname -I; SELF env var available as optional override for NAT/floating-IP setups - Fix Basic Auth bug in router.sh: compare tok against AUTH_TOKENS instead of unset $USER/$PASS - Rename sentinel binary to watcher; drop unused shellplot dep - Add inetutils to watcher runtime deps for hostname -I - Update NixOS module: replace sentinel options with watcher p2p options (peersDns, self, peersPort, peersScheme, pollingIntervalSec) - Add sentinèlla test suite: probe-status-empty, probe-disk, watcher-state-file --- .gitignore | 1 + docs/specs/sentinella-p2p-design.md | 101 +++++++ flake.lock | 20 +- nixos/module/hectic/service/sentinèlla.nix | 257 ++++++++++-------- nixos/system/hectic-lab/hectic-lab.nix | 14 +- package/sentinèlla/default.nix | 13 +- package/sentinèlla/router.sh | 5 +- package/sentinèlla/watcher.sh | 173 ++++++++++++ sus/hectic-lab.yaml | 6 +- test/package/default.nix | 5 +- test/package/sentinèlla/default.nix | 44 +++ test/package/sentinèlla/launch.sh | 45 +++ test/package/sentinèlla/test/probe-disk.sh | 35 +++ .../sentinèlla/test/probe-status-empty.sh | 27 ++ .../sentinèlla/test/watcher-state-file.sh | 75 +++++ 15 files changed, 687 insertions(+), 134 deletions(-) create mode 100644 docs/specs/sentinella-p2p-design.md create mode 100644 package/sentinèlla/watcher.sh create mode 100644 test/package/sentinèlla/default.nix create mode 100644 test/package/sentinèlla/launch.sh create mode 100644 test/package/sentinèlla/test/probe-disk.sh create mode 100644 test/package/sentinèlla/test/probe-status-empty.sh create mode 100644 test/package/sentinèlla/test/watcher-state-file.sh diff --git a/.gitignore b/.gitignore index 80ee597..ae3210a 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ result result-* rust-toolchain.toml target/ +docs/plans diff --git a/docs/specs/sentinella-p2p-design.md b/docs/specs/sentinella-p2p-design.md new file mode 100644 index 0000000..028c00c --- /dev/null +++ b/docs/specs/sentinella-p2p-design.md @@ -0,0 +1,101 @@ +# Spec: sentinella-p2p-design + +Scope: feature + +# sentinèlla P2P Design Spec + +## Goal +Replace the hub-and-spoke sentinel topology with a fully peer-to-peer model where every node is equal. + +## Topology +- Every node runs both `probe` and `watcher` +- No privileged coordinator; any node can go down without breaking monitoring of the others +- Duplicate Telegram alerts from multiple nodes detecting the same failure are **accepted** (reliability over deduplication) + +## Peer Discovery — DNS multi-A record +- One DNS name (e.g. `peers.sentinella.com`) has multiple A records, one per node IP +- Configured externally via any DNS registrar (Cloudflare, Namecheap, etc.) +- Recommended TTL: **60 seconds** so new nodes propagate quickly +- Each watcher resolves the name via `getent hosts $PEERS_DNS` on every poll cycle +- Own IP (`$SELF`) is stripped from the result so a node never polls itself +- No per-node DNS names needed; IP addresses are used directly in peer URLs + +``` +peers.sentinella.com A 1.2.3.4 TTL 60 +peers.sentinella.com A 5.6.7.8 TTL 60 +peers.sentinella.com A 9.10.11.12 TTL 60 +``` + +## Environment Variables + +### watcher (new, replaces sentinel) +| Variable | Default | Required | Description | +|---|---|---|---| +| `PEERS_DNS` | — | yes | DNS name resolving to all peer IPs | +| `SELF` | — | yes | This node's own IP; excluded from peer list | +| `PEERS_PORT` | `5988` | no | Port all peers listen on | +| `PEERS_SCHEME` | `http` | no | URL scheme for peer connections | +| `PEERS_TOKEN` | — | no | Single Basic Auth token sent to all peers (replaces per-server TOKENS) | +| `TG_TOKEN` | — | yes | Telegram bot token | +| `TG_CHAT_ID` | — | yes | Telegram chat ID | +| `TIMEOUT` | `5` | no | curl timeout seconds | +| `POLLING_INTERVAL_SEC` | `3` | no | Seconds between poll rounds | +| `STATE_DIR` | `/var/lib/sentinel` | no | Directory for state files | +| `SPAM` | `0` | no | If 1, notify on every poll | + +### probe / router (unchanged) +| Variable | Default | Description | +|---|---|---| +| `PORT` | `5988` | TCP port to listen on | +| `URLS` | — | Space-separated URLs to health-check | +| `VOLUMES` | all from df -P | Mount points to report | +| `TIMEOUT` | `5` | curl timeout | +| `AUTH_FILE` | — | Path to user:pass auth file | + +## Key Implementation Details + +### resolve_peers() in watcher.sh +```sh +resolve_peers() { + getent hosts "$PEERS_DNS" \ + | awk '{print $1}' \ + | grep -v "^${SELF}$" \ + | awk -v s="$PEERS_SCHEME" -v p="$PEERS_PORT" '{print s"://"$1":"p}' +} +``` +Called at the top of every outer poll loop iteration — no restart needed when DNS changes. + +### Auth simplification +- Old: per-server CSV `TOKENS` aligned with `SERVERS` +- New: single optional `PEERS_TOKEN`; either all peers require auth or none do + +### State files +- Unchanged: `$STATE_DIR/$(cksum url).state` contains last known state string +- Format: `up:N/M:200` or `down:0/0:000` + +## Binaries +| Old name | New name | Role | +|---|---|---| +| `sentinel` | `watcher` | Polls peers, sends alerts | +| `probe` | `probe` | socat TCP listener (unchanged) | +| `router` | `router` | HTTP handler (unchanged + auth bug fixed) | +| `base64` | `base64` | awk base64 util (unchanged) | + +## NixOS Module Options +``` +hectic.sentinella.enable bool +hectic.sentinella.peersDns string # e.g. "peers.sentinella.com" +hectic.sentinella.self string # this node's own IP +hectic.sentinella.port int # default 5988 +hectic.sentinella.urls [string] # URLs for probe to health-check +hectic.sentinella.volumes [string] # mount points for probe +hectic.sentinella.tgToken string +hectic.sentinella.tgChatId string +hectic.sentinella.pollingIntervalSec int # default 3 +``` +Generates two systemd services: `sentinella-probe` and `sentinella-watcher`. + +## Known Bug to Fix (router.sh) +The Basic Auth check references `$USER` and `$PASS` which are never populated. +Fix: move `auth_ok=false` before the header loop and compare `$tok` against +each entry in `$AUTH_TOKENS` (which is correctly populated from `AUTH_FILE`). \ No newline at end of file diff --git a/flake.lock b/flake.lock index cd6682d..bf4f4eb 100644 --- a/flake.lock +++ b/flake.lock @@ -177,6 +177,22 @@ "type": "github" } }, + "flake-compat_5": { + "flake": false, + "locked": { + "lastModified": 1767039857, + "narHash": "sha256-vNpUSpF5Nuw8xvDLj2KCwwksIbjua2LZCqhV1LNRDns=", + "owner": "edolstra", + "repo": "flake-compat", + "rev": "5edf11c44bc78a0d334f6334cdaf7d60d732daab", + "type": "github" + }, + "original": { + "owner": "edolstra", + "repo": "flake-compat", + "type": "github" + } + }, "flake-parts": { "inputs": { "nixpkgs-lib": [ @@ -799,7 +815,7 @@ }, "nixos-wsl": { "inputs": { - "flake-compat": "flake-compat_4", + "flake-compat": "flake-compat_5", "nixpkgs": [ "nixpkgs" ] @@ -946,8 +962,8 @@ "nix-minecraft": "nix-minecraft", "nixos-anywhere": "nixos-anywhere", "nixos-hardware": "nixos-hardware", - "nixos-wsl": "nixos-wsl", "nixos-mailserver": "nixos-mailserver", + "nixos-wsl": "nixos-wsl", "nixpkgs": "nixpkgs_2", "nixpkgs-fixed": "nixpkgs-fixed", "nixvim": "nixvim", diff --git a/nixos/module/hectic/service/sentinèlla.nix b/nixos/module/hectic/service/sentinèlla.nix index f7b22da..6ffa835 100644 --- a/nixos/module/hectic/service/sentinèlla.nix +++ b/nixos/module/hectic/service/sentinèlla.nix @@ -11,144 +11,175 @@ }: let system = pkgs.stdenv.hostPlatform.system; cfg = config.hectic.services."sentinèlla"; - # URLS="http://..." # default: none - # VOLUMES="/ /home" # default: all from df -P in { options = { hectic.services."sentinèlla" = { probe = { - enable = lib.mkEnableOption "enable sentinèlla probe services, that provides endpoints for server status check"; - urls = lib.mkOption { - type = with lib.types; listOf str; - default = []; - description = '' - urls to check - ''; + enable = lib.mkEnableOption "sentinèlla probe — HTTP server exposing this node's health"; + port = lib.mkOption { + type = lib.types.port; + default = 5988; + description = "TCP port the probe listens on."; }; - authFile = lib.mkOption { - type = with lib.types; nullOr path; - default = null; - example = '' - config.sops.secrets."name-of-service/sentinèlla-probe".path - ''; - description = '' - file with lines: user:pass - ''; + urls = lib.mkOption { + type = with lib.types; listOf str; + default = []; + description = "URLs the probe health-checks on GET /status."; }; volumes = lib.mkOption { - type = with lib.types; listOf str; - default = []; - description = '' - volumes to check - ''; + type = with lib.types; listOf str; + default = []; + description = "Mount points reported on GET /disk. Empty means all volumes."; }; - port = lib.mkOption { - type = lib.types.port; - default = 5988; - description = '' - service's port - ''; + authFile = lib.mkOption { + type = with lib.types; nullOr path; + default = null; + example = "config.sops.secrets.\"sentinella-probe-auth\".path"; + description = "Path to a file with lines of the form user:pass for Basic Auth."; }; - environmentPath = lib.mkOption { - type = with lib.types; nullOr path; - default = null; - example = '' - config.sops.secrets."name-of-service/environment".path - ''; - description = '' - in case when you do not want show configurations in repository - ``` - VOLUMES= # default: none - URLS= # default: all from df -P + environmentFile = lib.mkOption { + type = with lib.types; nullOr path; + default = null; + description = '' + Optional environment file for secrets. Supported variables: PORT= - AUTH_FILE= # lines: user:pass - ``` - ''; + URLS= + VOLUMES= + AUTH_FILE= + ''; }; }; - sentinel = { - enable = lib.mkEnableOption "enable sentinèlla sentinel services, that reported servers statuses based on probe polls"; - respondents = lib.mkOption { - type = lib.types.listOf lib.types.attrsOf ( - lib.types.submodule { - options = { - }; - } - ); - }; - environmentPath = lib.mkOption { - type = lib.types.path; - example = '' - config.sops.secrets."name-of-service/environment".path - ''; - description = '' - in case when you do not want show configurations in repository - ''; + watcher = { + enable = lib.mkEnableOption "sentinèlla watcher — polls peers discovered via DNS and sends Telegram alerts"; + peersDns = lib.mkOption { + type = lib.types.str; + example = "peers.sentinella.com"; + description = '' + DNS name with multiple A records, one per peer node. + Configure externally (e.g. Cloudflare) with TTL 60: + peers.sentinella.com A 1.2.3.4 + peers.sentinella.com A 5.6.7.8 + ''; + }; + self = lib.mkOption { + type = with lib.types; nullOr str; + default = null; + example = "1.2.3.4"; + description = '' + Override the auto-detected local IP. When null (default) the watcher + uses hostname -I to find all local IPs and excludes them from the + peer list automatically. Set this only if the node is behind NAT or + has a floating IP that hostname -I does not report correctly. + ''; + }; + peersPort = lib.mkOption { + type = lib.types.port; + default = 5988; + description = "Port all peer probes listen on."; + }; + peersScheme = lib.mkOption { + type = lib.types.str; + default = "http"; + description = "URL scheme used when connecting to peers (http or https)."; + }; + pollingIntervalSec = lib.mkOption { + type = lib.types.int; + default = 3; + description = "Seconds between polling rounds."; + }; + tgToken = lib.mkOption { + type = with lib.types; nullOr str; + default = null; + description = "Telegram bot token. Prefer environmentFile for secrets."; + }; + tgChatId = lib.mkOption { + type = with lib.types; nullOr str; + default = null; + description = "Telegram chat ID. Prefer environmentFile for secrets."; + }; + environmentFile = lib.mkOption { + type = with lib.types; nullOr path; + default = null; + example = "config.sops.secrets.\"sentinella-watcher-env\".path"; + description = '' + Optional environment file for secrets. Supported variables: + TG_TOKEN= + TG_CHAT_ID= + PEERS_TOKEN= # Basic Auth token sent to all peers + SELF= + PEERS_DNS= + ''; }; }; }; }; + config = lib.mkMerge [ (lib.mkIf cfg.probe.enable { systemd.services."sentinella-probe" = { - description = "Hectic server health check"; - after = [ "network.target" ]; - wantedBy = [ "multi-user.target" ]; + description = "sentinèlla probe — node health HTTP server"; + after = [ "network.target" ]; + wantedBy = [ "multi-user.target" ]; serviceConfig = lib.mkMerge [ - { - Type = "simple"; - ExecStart = "${self.packages.${system}."sentinèlla"}/bin/probe"; - Environment = [ - "URLS=${lib.concatStringsSep "," cfg.probe.urls}" - "VOLUMES=${lib.concatStringsSep "," cfg.probe.volumes}" - "PORT=${builtins.toString cfg.probe.port}" - ]; - Restart = "always"; - RestartSec = "5s"; - - # Shutdown configuration - TimeoutStopSec = "30s"; - KillSignal = "SIGTERM"; - KillMode = "mixed"; - - # Security and process management + { + Type = "simple"; + ExecStart = "${self.packages.${system}."sentinèlla"}/bin/probe"; + Restart = "always"; + RestartSec = "5s"; + TimeoutStopSec = "30s"; + KillSignal = "SIGTERM"; + KillMode = "mixed"; RemainAfterExit = false; - StandardOutput = "journal"; - StandardError = "journal"; + StandardOutput = "journal"; + StandardError = "journal"; + Environment = lib.filter (s: s != "") [ + "PORT=${builtins.toString cfg.probe.port}" + (lib.optionalString (cfg.probe.urls != []) "URLS=${lib.concatStringsSep " " cfg.probe.urls}") + (lib.optionalString (cfg.probe.volumes != []) "VOLUMES=${lib.concatStringsSep " " cfg.probe.volumes}") + (lib.optionalString (cfg.probe.authFile != null) "AUTH_FILE=${cfg.probe.authFile}") + ]; } - (if cfg.probe.environmentPath != null then { - EnvironmentFile = cfg.probe.environmentPath; - } else {}) - ]; + (lib.mkIf (cfg.probe.environmentFile != null) { + EnvironmentFile = cfg.probe.environmentFile; + }) + ]; }; }) - (lib.mkIf cfg.sentinel.enable { - systemd.services."sentinella-sentinel" = { - description = "Hectic server health check"; - after = [ "network.target" ]; - wantedBy = [ "multi-user.target" ]; - serviceConfig = { - Type = "simple"; - ExecStart = "${self.packages.${system}."sentinèlla"}/bin/probe"; - Environment = [ - "URLS=${lib.concatStringsSep " " cfg.probe.urls}" - "VOLUMES=${lib.concatStringsSep " " cfg.probe.volumes}" - "PORT=${builtins.toString cfg.probe.port}" - ]; - Restart = "always"; - RestartSec = "5s"; - - # Shutdown configuration - TimeoutStopSec = "30s"; - KillSignal = "SIGTERM"; - KillMode = "mixed"; - - # Security and process management - RemainAfterExit = false; - StandardOutput = "journal"; - StandardError = "journal"; - }; + + (lib.mkIf cfg.watcher.enable { + systemd.services."sentinella-watcher" = { + description = "sentinèlla watcher — p2p peer monitor"; + after = [ "network.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig = lib.mkMerge [ + { + Type = "simple"; + ExecStart = "${self.packages.${system}."sentinèlla"}/bin/watcher"; + Restart = "always"; + RestartSec = "5s"; + TimeoutStopSec = "30s"; + KillSignal = "SIGTERM"; + KillMode = "mixed"; + RemainAfterExit = false; + StandardOutput = "journal"; + StandardError = "journal"; + StateDirectory = "sentinella"; + Environment = lib.filter (s: s != "") [ + "PEERS_DNS=${cfg.watcher.peersDns}" + (lib.optionalString (cfg.watcher.self != null) "SELF=${cfg.watcher.self}") + "PEERS_PORT=${builtins.toString cfg.watcher.peersPort}" + "PEERS_SCHEME=${cfg.watcher.peersScheme}" + "POLLING_INTERVAL_SEC=${builtins.toString cfg.watcher.pollingIntervalSec}" + "STATE_DIR=/var/lib/sentinella" + (lib.optionalString (cfg.watcher.tgToken != null) "TG_TOKEN=${cfg.watcher.tgToken}") + (lib.optionalString (cfg.watcher.tgChatId != null) "TG_CHAT_ID=${cfg.watcher.tgChatId}") + ]; + } + (lib.mkIf (cfg.watcher.environmentFile != null) { + EnvironmentFile = cfg.watcher.environmentFile; + }) + ]; }; }) ]; diff --git a/nixos/system/hectic-lab/hectic-lab.nix b/nixos/system/hectic-lab/hectic-lab.nix index a551f5d..4aae6e8 100644 --- a/nixos/system/hectic-lab/hectic-lab.nix +++ b/nixos/system/hectic-lab/hectic-lab.nix @@ -100,11 +100,12 @@ in { ]; }; - sops.secrets."mailserver/security/hashedPassword" = {}; - sops.secrets."mailserver/yukkop/hashedPassword" = {}; - sops.secrets."mailserver/snuff/hashedPassword" = {}; - sops.secrets."mailserver/antoshka/hashedPassword" = {}; - sops.secrets."mailserver/founders/hashedPassword" = {}; + sops.secrets."mailserver/security/hashedPassword" = {}; + sops.secrets."mailserver/yukkop/hashedPassword" = {}; + sops.secrets."mailserver/daniil-pelyk/hashedPassword" = {}; + sops.secrets."mailserver/snuff/hashedPassword" = {}; + sops.secrets."mailserver/antoshka/hashedPassword" = {}; + sops.secrets."mailserver/founders/hashedPassword" = {}; services.mailserver = { enable = true; @@ -119,6 +120,9 @@ in { "yukkop" = { hashedPasswordFile = config.sops.secrets."mailserver/yukkop/hashedPassword".path; }; + "daniil-pelyk" = { + hashedPasswordFile = config.sops.secrets."mailserver/daniil-pelyk/hashedPassword".path; + }; "snuff" = { hashedPasswordFile = config.sops.secrets."mailserver/snuff/hashedPassword".path; }; diff --git a/package/sentinèlla/default.nix b/package/sentinèlla/default.nix index 24321c7..0f6ee9a 100644 --- a/package/sentinèlla/default.nix +++ b/package/sentinèlla/default.nix @@ -1,4 +1,4 @@ -{ symlinkJoin, writeTextFile, socat, dash, hectic, curl, gawk, jq }: +{ symlinkJoin, writeTextFile, socat, dash, hectic, curl, gawk, jq, inetutils }: let shell = "${dash}/bin/dash"; bashOptions = [ @@ -31,19 +31,18 @@ let ''; }; - sentinel = hectic.writeShellApplication { + watcher = hectic.writeShellApplication { inherit shell bashOptions; - name = "sentinel"; - runtimeInputs = [ hectic.shellplot curl jq ]; - + name = "watcher"; + runtimeInputs = [ curl jq inetutils ]; text = '' ${builtins.readFile ./log.sh} ${builtins.readFile ./colors.sh} - ${builtins.readFile ./sentinel.sh} + ${builtins.readFile ./watcher.sh} ''; }; in symlinkJoin { name = "sentinèlla"; - paths = [ probe sentinel ]; + paths = [ probe watcher ]; } diff --git a/package/sentinèlla/router.sh b/package/sentinèlla/router.sh index 4fa09cc..f01dc53 100644 --- a/package/sentinèlla/router.sh +++ b/package/sentinèlla/router.sh @@ -92,6 +92,8 @@ require_auth=false # --- read request & headers --- IFS= read -r req || exit 0 cr=$(printf '\r') +tok="" +auth_ok=false while IFS= read -r line; do [ -z "$line" ] && break [ "$line" = "$cr" ] && break @@ -99,8 +101,6 @@ while IFS= read -r line; do "Authorization: Basic "*) tok=${line#Authorization: Basic } tok=$(printf '%s' "$tok" | tr -d '\r\n') - expect=$(base64 encode "$USER:$PASS") - [ "$tok" = "$expect" ] && auth_ok=true ;; esac done @@ -117,7 +117,6 @@ unauth() { printf '%s' "$body" } -auth_ok=false if $require_auth; then for t in $AUTH_TOKENS; do [ "$tok" = "$t" ] && auth_ok=true && break diff --git a/package/sentinèlla/watcher.sh b/package/sentinèlla/watcher.sh new file mode 100644 index 0000000..903281a --- /dev/null +++ b/package/sentinèlla/watcher.sh @@ -0,0 +1,173 @@ +#!/bin/dash +# watcher.sh — p2p peer monitor; polls all peers discovered via DNS and notifies on status change +# +# Every node runs both probe (HTTP server) and watcher (this script). +# Peer discovery: a single DNS name with multiple A records is resolved via +# getent(1) on every poll cycle. Local IPs are detected automatically via +# hostname(1) and excluded so the node never polls itself. +# No central coordinator; all nodes are equal. +# +# DNS setup (external, any registrar, TTL 60): +# peers.example.com A 1.2.3.4 +# peers.example.com A 5.6.7.8 +# peers.example.com A 9.10.11.12 +# +# Required env: +# PEERS_DNS DNS name that resolves to all peer IPs +# TG_TOKEN Telegram bot token +# TG_CHAT_ID Telegram chat ID +# +# Optional env: +# SELF Override auto-detected local IP (useful behind NAT +# or with floating IPs where hostname -I is unreliable) +# PEERS_PORT default 5988 +# PEERS_SCHEME default http +# PEERS_TOKEN Basic Auth token sent to all peers; omit for no auth +# TIMEOUT curl timeout seconds (default 5) +# POLLING_INTERVAL_SEC default 3 +# STATE_DIR default /var/lib/sentinella +# SPAM if 1, notify on every poll regardless of state change + +set -eu + +PREFIX_OK="OK " +PREFIX_FAIL="FAIL" + +TIMEOUT=${TIMEOUT:-5} +POLLING_INTERVAL_SEC=${POLLING_INTERVAL_SEC:-3} +PEERS_DNS=${PEERS_DNS:-} +SELF=${SELF:-} +PEERS_PORT=${PEERS_PORT:-5988} +PEERS_SCHEME=${PEERS_SCHEME:-http} +PEERS_TOKEN=${PEERS_TOKEN:-} +TG_TOKEN=${TG_TOKEN:-} +TG_CHAT_ID=${TG_CHAT_ID:-} +SPAM=${SPAM:-0} + +STATE_DIR=${STATE_DIR:-/var/lib/sentinella} +mkdir -p "$STATE_DIR" 2>/dev/null || { + STATE_DIR="$HOME/.local/$(basename "$STATE_DIR")" + mkdir -p "$STATE_DIR" +} + +[ -n "$PEERS_DNS" ] || { printf >&2 'PEERS_DNS not set\n'; exit 3; } +[ -n "$TG_TOKEN" ] || { printf >&2 'TG_TOKEN not set\n'; exit 3; } +[ -n "$TG_CHAT_ID" ] || { printf >&2 'TG_CHAT_ID not set\n'; exit 3; } + +# --- helpers --- + +# local_ips — returns space-separated list of IPs assigned to local interfaces. +# If SELF is set it is used directly (useful behind NAT / floating IPs). +local_ips() { + if [ -n "$SELF" ]; then + printf '%s' "$SELF" + return + fi + hostname -I 2>/dev/null || true +} + +# is_local_ip(ip) — returns 0 if ip belongs to this node +is_local_ip() { + _target=${1:?} + _locals=$(local_ips) + case " $_locals " in + *" $_target "*) return 0 ;; + esac + return 1 +} + +# resolve_peers — resolves PEERS_DNS to a newline-separated list of peer URLs, +# excluding all local IPs. Re-called every poll cycle so DNS changes are +# picked up without restarting the watcher. +resolve_peers() { + getent hosts "$PEERS_DNS" \ + | awk '{print $1}' \ + | while IFS= read -r ip; do + is_local_ip "$ip" || printf '%s://%s:%s\n' "$PEERS_SCHEME" "$ip" "$PEERS_PORT" + done +} + +notify() { + msg=${1:?} + curl -sS -m "$TIMEOUT" -X POST \ + "https://api.telegram.org/bot${TG_TOKEN}/sendMessage" \ + -d "chat_id=${TG_CHAT_ID}" \ + --data-urlencode "text=${msg}" >/dev/null \ + || log error "notify failed: $msg" + log notice "notify message: ${WHITE}${msg}${NC}" +} + +# sid(url) — stable filename token for state files +sid() { printf '%s' "$1" | cksum | awk '{print $1}'; } + +# | parse_summary +parse_summary() { + jq -r '.status.summary | "\(.total) \(.ok)"' +} + +# | list_failures — extract failing URL(code) pairs from JSON body +list_failures() { + awk ' + BEGIN { FS="\""; u=""; c="" } + /"url":/ { u=$4 } + /"code":/ { c=$0; sub(/.*"code":/, "", c); sub(/,.*/, "", c) } + /"ok":false/ { if (u != "") { printf "%s(%s) ", u, c; u=""; c="" } } + ' +} + +# server_status_message(prefix, peer_url, ok, total, fail_list) +server_status_message() { + printf '%s: %s [%s/%s]%s' "${1:?}" "${2:?}" "${3:?}" "${4:?}" "$5" +} + +# --- main loop --- + +trap 'rm -f "$tmpb" 2>/dev/null' EXIT INT HUP + +while :; do + log info "polling peers via ${WHITE}${PEERS_DNS}${NC} every ${WHITE}${POLLING_INTERVAL_SEC}${NC}s" + + peers=$(resolve_peers) || peers="" + + if [ -z "$peers" ]; then + log warn "no peers resolved from ${WHITE}${PEERS_DNS}${NC} (all IPs are local or DNS returned nothing)" + fi + + printf '%s\n' "$peers" | while IFS= read -r url; do + [ -n "$url" ] || continue + + auth_h="" + [ -n "$PEERS_TOKEN" ] && auth_h="-H 'Authorization: Basic $PEERS_TOKEN'" + + tmpb=$(mktemp) || exit 1 + # shellcheck disable=SC2086 + code=$(sh -c "curl -sS -m \"$TIMEOUT\" -w '%{http_code}' -o \"$tmpb\" $auth_h \"$url\"") \ + || code="000" + body=$(cat "$tmpb"); rm -f "$tmpb" + + ok="down"; total=0; good=0 + if [ "$code" = "200" ]; then + summary=$(printf '%s' "$body" | parse_summary || true) + [ -n "$summary" ] && { total=${summary%% *}; good=${summary#* }; } + [ "$total" -eq "$good" ] && ok="up" + fi + + msg_prefix=$([ "$ok" = "up" ] && printf '%s' "$PREFIX_OK" || printf '%s' "$PREFIX_FAIL") + fail_list="" + if [ "$ok" = "down" ] && [ -n "$body" ]; then + fails=$(printf '%s' "$body" | list_failures | sed 's/[ ]$//') + [ -n "$fails" ] && fail_list=" — ${fails}" + fi + msg=$(server_status_message "$msg_prefix" "$url" "$good" "$total" "$fail_list") + + sfile="${STATE_DIR}/$(sid "$url").state" + last=""; [ -f "$sfile" ] && last=$(cat "$sfile") + cur="${ok}:${good}/${total}:${code}" + if [ "$cur" != "$last" ] || [ "$SPAM" = "1" ]; then + notify "$msg" + printf '%s' "$cur" >"$sfile" + fi + done + + sleep "$POLLING_INTERVAL_SEC" +done diff --git a/sus/hectic-lab.yaml b/sus/hectic-lab.yaml index 848e42e..129fa25 100644 --- a/sus/hectic-lab.yaml +++ b/sus/hectic-lab.yaml @@ -3,6 +3,8 @@ mailserver: hashedPassword: ENC[AES256_GCM,data:Z03x7tWHIhlRPaRZSrukyYOKhs6LdasZhZdizHdhlaJp2bywQZXKBaDABj2ab4rhwAPCHWhSiBjz35zV,iv:Z3hLC/A4YLVQkflr4cg9/wkKzo/RUdnLTwYC7ZhS0Hk=,tag:mSF/mbzH7iG6PwzyEsmyGg==,type:str] yukkop: hashedPassword: ENC[AES256_GCM,data:zjhCFkmmMzQHn09uRz1S4NTNU8hVRY5ZSYRHn6Gd0u09Fc7inNVSPrO+Br41UagPmv526w9MMQoIbV3RiJq6E/mfhAouqybYbQ==,iv:aVjn+/X2ESgZU7p7jETONaqtsD2/NAFOd7IIbunTRaI=,tag:kueml9QdQYVBceFMCgWoGA==,type:str] + daniil-perlyk: + hashedPassword: ENC[AES256_GCM,data:vlmz39OxJ0iavBYW1ij2pn/1wFOx+yBWR16ZCDlnGvc9/BNwd8vhAPJw7D+bujA89UMAvIC7ERiSNzR2uamp0+aH8gKyzchEXg==,iv:H1NVynfp5X18PcyR2/R4mNZ69KpcbzAc+D2akl3aAmE=,tag:YjYh/y5o3bkajWDsyctJUw==,type:str] founders: hashedPassword: ENC[AES256_GCM,data:E+Xu/Ul3rFLlYDD0LGGRnc6RADlWmXpOM2OfkZFSzAf5thm8leRi9x5sroP25cO8CcSyBcOiUCBv3RC5ooXklm4cmpOx8LA8ug==,iv:RLbU6SBHKOBRCNZ6naxQMaNjWZOlNui6OaaVM2QkdZs=,tag:sO7CD+PVkdtvPvlUrpzW+g==,type:str] snuff: @@ -70,7 +72,7 @@ sops: Yk43ZmlTc09aNFV1VjdjN2RWQlFWTDQKcYSvA2lHP8GS0lkYY19Tm8RXmFHQX5Ck qV2Fn22Fic4M5FVKDEMfaO6WmeXgki9a8dGeO9LlC+Phf16SOq7eLw== -----END AGE ENCRYPTED FILE----- - lastmodified: "2026-04-26T10:30:18Z" - mac: ENC[AES256_GCM,data:D8O/NTSgI//jdDA8UX56t7EfqH+YKvGsDKCTopPPfg/o9uey+onhxDfiiHniUBWJf5lArgZVLR5KOoVwQQWY1fz3lp/2ZBzaDJnt+IiqVeXgImNuOmdWgmvJF6o2UmpjEISRGtC1ih8UHplaQw2e7YEiH/QUMHoz/TVRWDHaMas=,iv:UWJkyc6YYMush8ASgb0ntHXEBeo9u2eGJ93wBfQVm4g=,tag:nhaAeTvoObP9GT2iNNrAzA==,type:str] + lastmodified: "2026-04-26T15:09:14Z" + mac: ENC[AES256_GCM,data:xDzKY+rn12ORC3HZHmMs3orTcg3kZiLwZYip/0ZZ7UJLJxoO98TIWvB1rxl8aAOjJCJ54LWRYkhMACn+4tdUhiy+RlsqVcL0jG9Vb1jpfVtPKy6tschoyVmWYOrc9dMHrnP3OEyYzqlAOdZ5tY9GubWx5hkdFOp17CtlCJV9faI=,iv:OXMlQ/ssDqiL9Lwv0EQefIIlv/VFBMwTSZ0WE3746k0=,tag:M3Oic7tiR/n71xEZrTa9tw==,type:str] unencrypted_suffix: _unencrypted version: 3.10.2 diff --git a/test/package/default.nix b/test/package/default.nix index e786876..d0a2e4e 100644 --- a/test/package/default.nix +++ b/test/package/default.nix @@ -1,3 +1,4 @@ -{ system, inputs, self, pkgs }: +{ system, inputs, self, pkgs }: (import ./migrator { inherit system inputs self pkgs; }) // - (import ./hemar { inherit system inputs self pkgs; }) + (import ./hemar { inherit system inputs self pkgs; }) // + (import (./. + "/sentinèlla") { inherit system inputs self pkgs; }) diff --git a/test/package/sentinèlla/default.nix b/test/package/sentinèlla/default.nix new file mode 100644 index 0000000..f0a8d8c --- /dev/null +++ b/test/package/sentinèlla/default.nix @@ -0,0 +1,44 @@ +{ inputs, self, pkgs, system, ... }: let + lib = inputs.nixpkgs.lib; + + mkTestDrv = name: type: + if type == "directory" then + pkgs.runCommand "test-${name}" {} '' + if ! [ -f ${./test + "/${name}" + /run.sh} ]; then + echo "no run.sh in test/${name}" + exit 1 + fi + mkdir -p "$out" + cp -r ${./test + "/${name}"}/* "$out/" + chmod +x "$out/run.sh" + '' + else if lib.hasSuffix ".sh" name then + pkgs.runCommand "test-${lib.removeSuffix ".sh" name}" {} '' + mkdir -p "$out" + install -Dm755 ${./test + "/${name}"} "$out/run.sh" + '' + else + null; + + testDir = builtins.readDir ./test; + testDrvs = + lib.mapAttrs' (n: v: + lib.nameValuePair (lib.removeSuffix ".sh" n) v + ) (lib.filterAttrs (_: v: v != null) + (lib.mapAttrs (n: t: mkTestDrv n t) testDir)); + + sentinella = self.packages.${system}."sentinèlla"; + + mkTest = testName: testDrv: pkgs.runCommand "sentinella-test-${testName}" + { + nativeBuildInputs = [ pkgs.coreutils pkgs.gnugrep pkgs.gnused ]; + buildInputs = [ sentinella pkgs.curl pkgs.jq pkgs.socat ]; + } '' + ${builtins.readFile self.legacyPackages.${system}.helpers.posix-shell.log} + export HECTIC_LOG=trace + test=${testDrv} + ${builtins.readFile ./launch.sh} + + mkdir -p "$out" + ''; +in lib.mapAttrs (name: drv: mkTest name drv) testDrvs diff --git a/test/package/sentinèlla/launch.sh b/test/package/sentinèlla/launch.sh new file mode 100644 index 0000000..7e56bab --- /dev/null +++ b/test/package/sentinèlla/launch.sh @@ -0,0 +1,45 @@ +#!/bin/dash +# launch.sh — sets up helpers and runs the test pointed to by $test + +# assert_eq(label, got, expected) +assert_eq() { + label=${1:?} + got=${2:?} + expected=${3:?} + if [ "$got" != "$expected" ]; then + log error "FAIL: $label" + log error " expected: $WHITE$expected" + log error " got: $WHITE$got" + exit 1 + fi + log info "PASS: $label" +} + +# assert_file_contains(label, file, pattern) +assert_file_contains() { + label=${1:?} + file=${2:?} + pattern=${3:?} + if ! grep -q "$pattern" "$file" 2>/dev/null; then + log error "FAIL: $label — pattern '$pattern' not found in $file" + exit 1 + fi + log info "PASS: $label" +} + +# wait_for_file(file, timeout_sec) +wait_for_file() { + file=${1:?} + timeout=${2:-10} + i=0 + while [ $i -lt "$timeout" ]; do + [ -f "$file" ] && return 0 + sleep 1 + i=$((i+1)) + done + log error "timeout waiting for file: $file" + exit 1 +} + +# run the actual test +. "$test/run.sh" diff --git a/test/package/sentinèlla/test/probe-disk.sh b/test/package/sentinèlla/test/probe-disk.sh new file mode 100644 index 0000000..bef552e --- /dev/null +++ b/test/package/sentinèlla/test/probe-disk.sh @@ -0,0 +1,35 @@ +#!/bin/dash +# Test: probe GET /disk returns JSON with at least one volume entry + +log notice "test case: ${WHITE}probe GET /disk returns volume data" + +PORT=15989 +export PORT URLS="" VOLUMES="/" + +probe & +probe_pid=$! +trap 'kill $probe_pid 2>/dev/null; exit' EXIT INT HUP + +sleep 2 + +response=$(curl -sS --max-time 5 "http://127.0.0.1:${PORT}/disk") +log info "response: $WHITE$response" + +count=$(printf '%s' "$response" | jq -r '.volumes | length') +log info "volume count: $WHITE$count" + +if [ "$count" -lt 1 ]; then + log error "expected at least 1 volume, got $count" + exit 1 +fi +log info "PASS: at least one volume returned" + +# each entry must have a mount field +mount=$(printf '%s' "$response" | jq -r '.volumes[0].mount') +if [ -z "$mount" ] || [ "$mount" = "null" ]; then + log error "volumes[0].mount is missing or null" + exit 1 +fi +log info "PASS: volumes[0].mount = $mount" + +log notice "test passed" diff --git a/test/package/sentinèlla/test/probe-status-empty.sh b/test/package/sentinèlla/test/probe-status-empty.sh new file mode 100644 index 0000000..03c24aa --- /dev/null +++ b/test/package/sentinèlla/test/probe-status-empty.sh @@ -0,0 +1,27 @@ +#!/bin/dash +# Test: probe responds on GET /status with valid JSON when URLS is empty + +log notice "test case: ${WHITE}probe GET /status returns JSON with empty checks" + +# start probe on a free port +PORT=15988 +export PORT URLS="" VOLUMES="/" + +probe & +probe_pid=$! +trap 'kill $probe_pid 2>/dev/null; exit' EXIT INT HUP + +# wait for probe to be ready +sleep 2 + +response=$(curl -sS --max-time 5 "http://127.0.0.1:${PORT}/status") +log info "response: $WHITE$response" + +# must be valid JSON with summary.total == 0 +total=$(printf '%s' "$response" | jq -r '.summary.total') +assert_eq "summary.total is 0 when URLS empty" "$total" "0" + +ok=$(printf '%s' "$response" | jq -r '.summary.ok') +assert_eq "summary.ok is 0 when URLS empty" "$ok" "0" + +log notice "test passed" diff --git a/test/package/sentinèlla/test/watcher-state-file.sh b/test/package/sentinèlla/test/watcher-state-file.sh new file mode 100644 index 0000000..4bdba5c --- /dev/null +++ b/test/package/sentinèlla/test/watcher-state-file.sh @@ -0,0 +1,75 @@ +#!/bin/dash +# Test: watcher writes a state file after polling a peer +# +# Setup: +# - Start a probe on 127.0.0.1:15990 +# - Stub getent to resolve peers.test -> 127.0.0.1 (the probe) and 10.0.0.1 (fake peer) +# - Stub hostname to return 10.0.0.1 as the local IP so 10.0.0.1 is excluded +# and 127.0.0.1 (the real probe) is kept as a peer +# - Assert a state file appears in STATE_DIR within 15s + +log notice "test case: ${WHITE}watcher writes state file after first successful poll" + +PORT=15990 +export PORT URLS="" VOLUMES="/" + +probe & +probe_pid=$! +trap 'kill "$probe_pid" 2>/dev/null; kill "$watcher_pid" 2>/dev/null; rm -rf "$stub_dir" "$state_dir"' EXIT INT HUP + +sleep 2 + +# Create stubs directory +stub_dir=$(mktemp -d) + +# Stub getent: returns two IPs for peers.test +cat >"${stub_dir}/getent" <<'EOF' +#!/bin/sh +if [ "$1" = "hosts" ] && [ "$2" = "peers.test" ]; then + printf '127.0.0.1 peers.test\n' + printf '10.0.0.1 peers.test\n' +else + /usr/bin/getent "$@" +fi +EOF +chmod +x "${stub_dir}/getent" + +# Stub hostname: -I returns 10.0.0.1 so watcher excludes it and keeps 127.0.0.1 +cat >"${stub_dir}/hostname" <<'EOF' +#!/bin/sh +case "$1" in + -I) printf '10.0.0.1\n' ;; + *) /bin/hostname "$@" ;; +esac +EOF +chmod +x "${stub_dir}/hostname" + +state_dir=$(mktemp -d) + +export PEERS_DNS="peers.test" +export PEERS_PORT="$PORT" +export PEERS_SCHEME="http" +export TG_TOKEN="test-token" +export TG_CHAT_ID="test-chat" +export STATE_DIR="$state_dir" +export POLLING_INTERVAL_SEC="1" +export SPAM="0" +unset SELF # ensure auto-detection is used + +PATH="${stub_dir}:${PATH}" watcher & +watcher_pid=$! + +log info "waiting for state file in $state_dir ..." +peer_url="http://127.0.0.1:${PORT}" +state_file="${state_dir}/$(printf '%s' "$peer_url" | cksum | awk '{print $1}').state" +wait_for_file "$state_file" 15 + +state=$(cat "$state_file") +log info "state file content: $WHITE$state" + +case "$state" in + up:*|down:*) log info "PASS: state file has expected format" ;; + *) log error "unexpected state file content: $state"; exit 1 ;; +esac + +log notice "test passed"