feat(package): sentinèlla: switch peer discovery from A records to SRV records
This commit is contained in:
@@ -13,7 +13,7 @@
|
|||||||
cfg = config.hectic.services."sentinèlla";
|
cfg = config.hectic.services."sentinèlla";
|
||||||
|
|
||||||
probePort = 5988;
|
probePort = 5988;
|
||||||
peersDns = "peers.sentinella.hectic-lab.com";
|
peersSrv = "_sentinella._tcp.hectic-lab.com";
|
||||||
in {
|
in {
|
||||||
options = {
|
options = {
|
||||||
hectic.services."sentinèlla" = {
|
hectic.services."sentinèlla" = {
|
||||||
@@ -99,7 +99,7 @@ in {
|
|||||||
TG_CHAT_ID=
|
TG_CHAT_ID=
|
||||||
PEERS_TOKEN= # Basic Auth token sent to all peers
|
PEERS_TOKEN= # Basic Auth token sent to all peers
|
||||||
SELF=
|
SELF=
|
||||||
PEERS_DNS=
|
PEERS_SRV=
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
@@ -168,9 +168,8 @@ in {
|
|||||||
StandardError = "journal";
|
StandardError = "journal";
|
||||||
StateDirectory = "sentinella";
|
StateDirectory = "sentinella";
|
||||||
Environment = lib.filter (s: s != "") [
|
Environment = lib.filter (s: s != "") [
|
||||||
"PEERS_DNS=${peersDns}"
|
"PEERS_SRV=${peersSrv}"
|
||||||
(lib.optionalString (cfg.watcher.self != null) "SELF=${cfg.watcher.self}")
|
(lib.optionalString (cfg.watcher.self != null) "SELF=${cfg.watcher.self}")
|
||||||
"PEERS_PORT=${builtins.toString probePort}"
|
|
||||||
"PEERS_SCHEME=${cfg.watcher.peersScheme}"
|
"PEERS_SCHEME=${cfg.watcher.peersScheme}"
|
||||||
"POLLING_INTERVAL_SEC=${builtins.toString cfg.watcher.pollingIntervalSec}"
|
"POLLING_INTERVAL_SEC=${builtins.toString cfg.watcher.pollingIntervalSec}"
|
||||||
"STATE_DIR=/var/lib/sentinella"
|
"STATE_DIR=/var/lib/sentinella"
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
{ symlinkJoin, writeTextFile, socat, dash, hectic, curl, gawk, jq, inetutils, getent }:
|
{ symlinkJoin, writeTextFile, socat, dash, hectic, curl, gawk, jq, inetutils, getent, bind }:
|
||||||
let
|
let
|
||||||
shell = "${dash}/bin/dash";
|
shell = "${dash}/bin/dash";
|
||||||
bashOptions = [
|
bashOptions = [
|
||||||
@@ -36,7 +36,7 @@ let
|
|||||||
watcher = hectic.writeShellApplication {
|
watcher = hectic.writeShellApplication {
|
||||||
inherit shell bashOptions;
|
inherit shell bashOptions;
|
||||||
name = "watcher";
|
name = "watcher";
|
||||||
runtimeInputs = [ curl jq gawk inetutils getent ];
|
runtimeInputs = [ curl jq gawk inetutils getent bind.dnsutils ];
|
||||||
text = ''
|
text = ''
|
||||||
${builtins.readFile ./log.sh}
|
${builtins.readFile ./log.sh}
|
||||||
${builtins.readFile ./colors.sh}
|
${builtins.readFile ./colors.sh}
|
||||||
|
|||||||
@@ -1,26 +1,26 @@
|
|||||||
#!/bin/dash
|
#!/bin/dash
|
||||||
# watcher.sh — p2p peer monitor; polls all peers discovered via DNS and notifies on status change
|
# watcher.sh — p2p peer monitor; polls all peers discovered via DNS SRV records
|
||||||
|
# and notifies on status change via Telegram.
|
||||||
#
|
#
|
||||||
# Every node runs both probe (HTTP server) and watcher (this script).
|
# Every node runs both probe (HTTP server) and watcher (this script).
|
||||||
# Peer discovery: a single DNS name with multiple A records is resolved via
|
# Peer discovery: a single SRV record name resolved on every poll cycle.
|
||||||
# getent(1) on every poll cycle. Local IPs are detected automatically via
|
# Each SRV entry yields (priority, weight, port, target-hostname); the target
|
||||||
# hostname(1) and excluded so the node never polls itself.
|
# is resolved to an IP via getent and excluded if it belongs to this node.
|
||||||
# No central coordinator; all nodes are equal.
|
# No central coordinator; all nodes are equal.
|
||||||
#
|
#
|
||||||
# DNS setup (external, any registrar, TTL 60):
|
# DNS setup (any registrar, TTL 60):
|
||||||
# peers.example.com A 1.2.3.4
|
# _sentinella._tcp.example.com. SRV 0 10 5988 node-a.peers.example.com.
|
||||||
# peers.example.com A 5.6.7.8
|
# _sentinella._tcp.example.com. SRV 0 10 5988 node-b.peers.example.com.
|
||||||
# peers.example.com A 9.10.11.12
|
# node-a.peers.example.com. A 1.2.3.4
|
||||||
|
# node-b.peers.example.com. A 5.6.7.8
|
||||||
#
|
#
|
||||||
# Required env:
|
# Required env:
|
||||||
# PEERS_DNS DNS name that resolves to all peer IPs
|
# PEERS_SRV SRV record name (e.g. _sentinella._tcp.example.com)
|
||||||
# TG_TOKEN Telegram bot token
|
# TG_TOKEN Telegram bot token
|
||||||
# TG_CHAT_ID Telegram chat ID
|
# TG_CHAT_ID Telegram chat ID
|
||||||
#
|
#
|
||||||
# Optional env:
|
# Optional env:
|
||||||
# SELF Override auto-detected local IP (useful behind NAT
|
# SELF Override auto-detected local IPs (space-separated)
|
||||||
# or with floating IPs where hostname -I is unreliable)
|
|
||||||
# PEERS_PORT default 5988
|
|
||||||
# PEERS_SCHEME default http
|
# PEERS_SCHEME default http
|
||||||
# PEERS_TOKEN Basic Auth token sent to all peers; omit for no auth
|
# PEERS_TOKEN Basic Auth token sent to all peers; omit for no auth
|
||||||
# TIMEOUT curl timeout seconds (default 5)
|
# TIMEOUT curl timeout seconds (default 5)
|
||||||
@@ -35,9 +35,8 @@ PREFIX_FAIL="FAIL"
|
|||||||
|
|
||||||
TIMEOUT=${TIMEOUT:-5}
|
TIMEOUT=${TIMEOUT:-5}
|
||||||
POLLING_INTERVAL_SEC=${POLLING_INTERVAL_SEC:-3}
|
POLLING_INTERVAL_SEC=${POLLING_INTERVAL_SEC:-3}
|
||||||
PEERS_DNS=${PEERS_DNS:-}
|
PEERS_SRV=${PEERS_SRV:-}
|
||||||
SELF=${SELF:-}
|
SELF=${SELF:-}
|
||||||
PEERS_PORT=${PEERS_PORT:-5988}
|
|
||||||
PEERS_SCHEME=${PEERS_SCHEME:-http}
|
PEERS_SCHEME=${PEERS_SCHEME:-http}
|
||||||
PEERS_TOKEN=${PEERS_TOKEN:-}
|
PEERS_TOKEN=${PEERS_TOKEN:-}
|
||||||
TG_TOKEN=${TG_TOKEN:-}
|
TG_TOKEN=${TG_TOKEN:-}
|
||||||
@@ -50,14 +49,13 @@ mkdir -p "$STATE_DIR" 2>/dev/null || {
|
|||||||
mkdir -p "$STATE_DIR"
|
mkdir -p "$STATE_DIR"
|
||||||
}
|
}
|
||||||
|
|
||||||
[ -n "$PEERS_DNS" ] || { printf >&2 'PEERS_DNS not set\n'; exit 3; }
|
[ -n "$PEERS_SRV" ] || { printf >&2 'PEERS_SRV not set\n'; exit 3; }
|
||||||
[ -n "$TG_TOKEN" ] || { printf >&2 'TG_TOKEN not set\n'; exit 3; }
|
[ -n "$TG_TOKEN" ] || { printf >&2 'TG_TOKEN not set\n'; exit 3; }
|
||||||
[ -n "$TG_CHAT_ID" ] || { printf >&2 'TG_CHAT_ID not set\n'; exit 3; }
|
[ -n "$TG_CHAT_ID" ] || { printf >&2 'TG_CHAT_ID not set\n'; exit 3; }
|
||||||
|
|
||||||
# --- helpers ---
|
# --- helpers ---
|
||||||
|
|
||||||
# local_ips — returns space-separated list of IPs assigned to local interfaces.
|
# local_ips — space-separated list of IPs assigned to this node.
|
||||||
# If SELF is set it is used directly (useful behind NAT / floating IPs).
|
|
||||||
local_ips() {
|
local_ips() {
|
||||||
if [ -n "$SELF" ]; then
|
if [ -n "$SELF" ]; then
|
||||||
printf '%s' "$SELF"
|
printf '%s' "$SELF"
|
||||||
@@ -76,14 +74,19 @@ is_local_ip() {
|
|||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
# resolve_peers — resolves PEERS_DNS to a newline-separated list of peer URLs,
|
# resolve_peers — SRV-resolves PEERS_SRV, then A-resolves each target.
|
||||||
# excluding all local IPs. Re-called every poll cycle so DNS changes are
|
# Emits "host port ip" per non-local peer, one per line.
|
||||||
# picked up without restarting the watcher.
|
|
||||||
resolve_peers() {
|
resolve_peers() {
|
||||||
getent hosts "$PEERS_DNS" \
|
# host -t SRV output:
|
||||||
| awk '{print $1}' \
|
# <name> has SRV record <prio> <weight> <port> <target>.
|
||||||
| while IFS= read -r ip; do
|
host -t SRV "$PEERS_SRV" 2>/dev/null \
|
||||||
is_local_ip "$ip" || printf '%s://%s:%s\n' "$PEERS_SCHEME" "$ip" "$PEERS_PORT"
|
| awk '/has SRV record/ { sub(/\.$/, "", $NF); print $(NF-1), $NF }' \
|
||||||
|
| while IFS=' ' read -r port target; do
|
||||||
|
[ -n "$target" ] || continue
|
||||||
|
ip=$(getent hosts "$target" | awk '{print $1; exit}')
|
||||||
|
[ -n "$ip" ] || { log warn "could not resolve ${WHITE}${target}${NC}"; continue; }
|
||||||
|
is_local_ip "$ip" && continue
|
||||||
|
printf '%s %s %s\n' "$target" "$port" "$ip"
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -115,7 +118,7 @@ list_failures() {
|
|||||||
'
|
'
|
||||||
}
|
}
|
||||||
|
|
||||||
# server_status_message(prefix, peer_url, ok, total, fail_list)
|
# server_status_message(prefix, peer_label, ok, total, fail_list)
|
||||||
server_status_message() {
|
server_status_message() {
|
||||||
printf '%s: %s [%s/%s]%s' "${1:?}" "${2:?}" "${3:?}" "${4:?}" "$5"
|
printf '%s: %s [%s/%s]%s' "${1:?}" "${2:?}" "${3:?}" "${4:?}" "$5"
|
||||||
}
|
}
|
||||||
@@ -125,21 +128,24 @@ server_status_message() {
|
|||||||
trap 'rm -f "$tmpb" 2>/dev/null' EXIT INT HUP
|
trap 'rm -f "$tmpb" 2>/dev/null' EXIT INT HUP
|
||||||
|
|
||||||
while :; do
|
while :; do
|
||||||
log info "polling peers via ${WHITE}${PEERS_DNS}${NC} every ${WHITE}${POLLING_INTERVAL_SEC}${NC}s"
|
log info "polling peers via SRV ${WHITE}${PEERS_SRV}${NC} every ${WHITE}${POLLING_INTERVAL_SEC}${NC}s"
|
||||||
|
|
||||||
peers=$(resolve_peers) || peers=""
|
peers=$(resolve_peers) || peers=""
|
||||||
|
|
||||||
if [ -z "$peers" ]; then
|
if [ -z "$peers" ]; then
|
||||||
log warn "no peers resolved from ${WHITE}${PEERS_DNS}${NC} (all IPs are local or DNS returned nothing)"
|
log warn "no peers resolved from ${WHITE}${PEERS_SRV}${NC} (all targets local or DNS empty)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
printf '%s\n' "$peers" | while IFS= read -r url; do
|
printf '%s\n' "$peers" | while IFS=' ' read -r host port ip; do
|
||||||
[ -n "$url" ] || continue
|
[ -n "$host" ] || continue
|
||||||
|
|
||||||
|
url="${PEERS_SCHEME}://${ip}:${port}"
|
||||||
|
label="${host} (${ip})"
|
||||||
|
|
||||||
tmpb=$(mktemp) || exit 1
|
tmpb=$(mktemp) || exit 1
|
||||||
set -- curl -sS -m "$TIMEOUT" -w '%{http_code}' -o "$tmpb"
|
set -- curl -sS -m "$TIMEOUT" -w '%{http_code}' -o "$tmpb"
|
||||||
[ -n "$PEERS_TOKEN" ] && set -- "$@" -H "Authorization: Basic $PEERS_TOKEN"
|
[ -n "$PEERS_TOKEN" ] && set -- "$@" -H "Authorization: Basic $PEERS_TOKEN"
|
||||||
set -- "$@" "$url"
|
set -- "$@" "${url}/status"
|
||||||
code=$("$@" 2>/dev/null) || code="000"
|
code=$("$@" 2>/dev/null) || code="000"
|
||||||
body=$(cat "$tmpb"); rm -f "$tmpb"
|
body=$(cat "$tmpb"); rm -f "$tmpb"
|
||||||
|
|
||||||
@@ -156,9 +162,9 @@ while :; do
|
|||||||
fails=$(printf '%s' "$body" | list_failures | sed 's/[ ]$//')
|
fails=$(printf '%s' "$body" | list_failures | sed 's/[ ]$//')
|
||||||
[ -n "$fails" ] && fail_list=" — ${fails}"
|
[ -n "$fails" ] && fail_list=" — ${fails}"
|
||||||
fi
|
fi
|
||||||
msg=$(server_status_message "$msg_prefix" "$url" "$good" "$total" "$fail_list")
|
msg=$(server_status_message "$msg_prefix" "$label" "$good" "$total" "$fail_list")
|
||||||
|
|
||||||
sfile="${STATE_DIR}/$(sid "$url").state"
|
sfile="${STATE_DIR}/$(sid "$host").state"
|
||||||
last=""; [ -f "$sfile" ] && last=$(cat "$sfile")
|
last=""; [ -f "$sfile" ] && last=$(cat "$sfile")
|
||||||
cur="${ok}:${good}/${total}:${code}"
|
cur="${ok}:${good}/${total}:${code}"
|
||||||
if [ "$cur" != "$last" ] || [ "$SPAM" = "1" ]; then
|
if [ "$cur" != "$last" ] || [ "$SPAM" = "1" ]; then
|
||||||
|
|||||||
Reference in New Issue
Block a user