This commit is contained in:
2026-05-23 22:57:09 +00:00
parent 9c6de9d067
commit 4c0a178646
6 changed files with 946 additions and 21 deletions

View File

@@ -47,6 +47,17 @@ creation_rules:
- *hectic-lab-server
- *umbriel-bfs
- path_regex: sus/matrix-cluster.yaml$
key_groups:
- age:
- *nrv
- *yukkop
- *snuff
- *yukkop-alt
- *hectic-lab-server
- *bfs-pol-server
- *umbriel-bfs
- path_regex: sus/sentinella-default.yaml$
key_groups:
- age:

View File

@@ -0,0 +1,511 @@
{
inputs,
flake,
self,
}: {
pkgs,
lib,
config,
...
}: let
cfg = config.hectic.generic.matrix-cluster;
s3Cfg = cfg.objectStorage.s3;
s3Plugin = pkgs.matrix-synapse-plugins.matrix-synapse-s3-storage-provider;
s3ConfigDir = "/run/matrix-synapse";
s3ConfigFile = "${s3ConfigDir}/s3-media-storage.yaml";
pgDataDir = "/var/lib/postgresql/17";
matrixUsers = builtins.attrNames cfg.users;
mkUserRegistration = name: let
user = cfg.users.${name};
adminFlag = if user.admin then "--admin" else "--no-admin";
in ''
if [ ! -r "${user.passwordFile}" ]; then
printf 'Missing Matrix password file for %s: %s\n' '${name}' '${user.passwordFile}' >&2
exit 1
fi
${pkgs.matrix-synapse}/bin/register_new_matrix_user \
-u '${name}' \
-p "$(tr -d '\n' < "${user.passwordFile}")" \
-k "$REGISTRATION_SHARED_SECRET" \
${adminFlag} \
http://127.0.0.1:8008 || true
'';
synapseEnabled =
if cfg.overrideEnableSynapse != null
then cfg.overrideEnableSynapse
else cfg.role == "primary";
mkS3Config = ''
if [ ! -r "${s3Cfg.credentialsFile}" ]; then
printf 'Missing Matrix object storage credentials file: %s\n' '${s3Cfg.credentialsFile}' >&2
exit 1
fi
. "${s3Cfg.credentialsFile}"
if [ -z "$ACCESS_KEY_ID" ] || [ -z "$SECRET_ACCESS_KEY" ]; then
printf 'ACCESS_KEY_ID or SECRET_ACCESS_KEY missing in %s\n' '${s3Cfg.credentialsFile}' >&2
exit 1
fi
mkdir -p "${s3ConfigDir}"
cat > "${s3ConfigFile}" <<EOF
media_storage_providers:
- module: s3_storage_provider.S3StorageProviderBackend
store_local: ${lib.boolToString s3Cfg.storeLocal}
store_remote: ${lib.boolToString s3Cfg.storeRemote}
store_synchronous: ${lib.boolToString s3Cfg.storeSynchronous}
config:
bucket: ${s3Cfg.bucket}
endpoint_url: ${s3Cfg.endpointUrl}
region_name: ${s3Cfg.regionName}
prefix: "${s3Cfg.prefix}"
storage_class: "${s3Cfg.storageClass}"
threadpool_size: ${toString s3Cfg.threadpoolSize}
access_key_id: $ACCESS_KEY_ID
secret_access_key: $SECRET_ACCESS_KEY
EOF
chown matrix-synapse:matrix-synapse "${s3ConfigFile}"
chmod 0400 "${s3ConfigFile}"
'';
in {
options.hectic.generic.matrix-cluster = {
enable = lib.mkEnableOption "Matrix Synapse active/passive cluster node";
role = lib.mkOption {
type = lib.types.enum [ "primary" "standby" ];
description = ''
Cluster role of this node. The primary runs Synapse and accepts WAL
streaming connections; the standby runs a hot-standby Postgres replica
only and keeps Synapse disabled until failover.
'';
};
matrixDomain = lib.mkOption {
type = lib.types.str;
description = "Matrix server_name (also nginx vhost / ACME cert name).";
};
signingKeyFile = lib.mkOption {
type = lib.types.path;
description = ''
Path to the Synapse homeserver signing key. Mounted into place at
/var/lib/matrix-synapse/homeserver.signing.key on activation.
'';
};
secretsFile = lib.mkOption {
type = lib.types.nullOr lib.types.path;
default = null;
description = ''
Extra Synapse YAML config (registration_shared_secret, macaroon_secret_key,
form_secret). Loaded via matrix-synapse extraConfigFiles. Required when
Synapse is enabled on this node (primary, or standby after failover).
'';
};
maxUploadSize = lib.mkOption {
type = lib.types.str;
default = "2G";
};
enableRegistration = lib.mkOption {
type = lib.types.bool;
default = false;
};
users = lib.mkOption {
type = lib.types.attrsOf (lib.types.submodule {
options = {
passwordFile = lib.mkOption { type = lib.types.str; };
admin = lib.mkOption { type = lib.types.bool; default = false; };
};
});
default = {};
description = "Declarative Matrix users provisioned via register_new_matrix_user.";
};
overrideEnableSynapse = lib.mkOption {
type = lib.types.nullOr lib.types.bool;
default = null;
description = ''
When non-null, forces Synapse on/off regardless of role. Used during
failover: set to true on the standby once it has been promoted, or
false on the primary to drain it.
'';
};
objectStorage.s3 = {
bucket = lib.mkOption { type = lib.types.str; };
regionName = lib.mkOption { type = lib.types.str; };
endpointUrl = lib.mkOption { type = lib.types.str; };
credentialsFile = lib.mkOption {
type = lib.types.path;
description = ''
env-style file with ACCESS_KEY_ID= and SECRET_ACCESS_KEY=. MUST be
the SAME credentials/bucket on both primary and standby.
'';
};
mediaStorePath = lib.mkOption {
type = lib.types.str;
default = "/var/lib/matrix-synapse/media_store";
};
prefix = lib.mkOption { type = lib.types.str; default = ""; };
storageClass = lib.mkOption { type = lib.types.str; default = "STANDARD"; };
threadpoolSize = lib.mkOption { type = lib.types.int; default = 40; };
storeLocal = lib.mkOption { type = lib.types.bool; default = true; };
storeRemote = lib.mkOption { type = lib.types.bool; default = true; };
storeSynchronous = lib.mkOption { type = lib.types.bool; default = true; };
};
replication = {
peerHost = lib.mkOption {
type = lib.types.str;
description = "Public IP/hostname of the other cluster node.";
};
peerPort = lib.mkOption {
type = lib.types.port;
default = 5432;
};
passwordFile = lib.mkOption {
type = lib.types.path;
description = ''
File containing either a raw replication password or a libpq passfile
line. Used as `passfile=` in primary_conninfo on the standby and to
set the password of the `replication` Postgres role on the primary.
'';
};
allowedSourceIPs = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [];
description = ''
CIDRs allowed to connect to Postgres for replication. Used on the
primary in pg_hba.conf hostssl entries and to gate the firewall.
'';
};
sslMode = lib.mkOption {
type = lib.types.str;
default = "require";
};
};
acme = {
enable = lib.mkEnableOption "Porkbun DNS-01 ACME for matrixDomain";
email = lib.mkOption {
type = lib.types.str;
default = "hectic.yukkop.it@gmail.com";
description = "ACME registration email (passed to security.acme.defaults.email).";
};
porkbunApiKeyFile = lib.mkOption {
type = lib.types.path;
description = "File containing PORKBUN_API_KEY value.";
};
porkbunSecretApiKeyFile = lib.mkOption {
type = lib.types.path;
description = "File containing PORKBUN_SECRET_API_KEY value.";
};
};
};
config = lib.mkIf cfg.enable (lib.mkMerge [
{
# signing key mount: copy into matrix-synapse data dir with correct perms
# regardless of whether Synapse is currently enabled on this node, so a
# failover flip does not need a separate provisioning step.
systemd.tmpfiles.rules = [
"d /var/lib/matrix-synapse 0750 matrix-synapse matrix-synapse -"
];
systemd.services.matrix-cluster-signing-key = {
description = "Install Matrix Synapse signing key from secrets";
wantedBy = [ "multi-user.target" ];
after = [ "sops-install-secrets.service" ];
requires = [ "sops-install-secrets.service" ];
before = lib.optional synapseEnabled "matrix-synapse.service";
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
};
script = ''
set -eu
install -d -o matrix-synapse -g matrix-synapse -m 0750 /var/lib/matrix-synapse
install -o matrix-synapse -g matrix-synapse -m 0400 \
"${cfg.signingKeyFile}" \
/var/lib/matrix-synapse/homeserver.signing.key
'';
};
users.users.matrix-synapse = {
isSystemUser = true;
group = "matrix-synapse";
};
users.groups.matrix-synapse = {};
}
(lib.mkIf synapseEnabled {
assertions = [
{
assertion = cfg.secretsFile != null;
message = "hectic.generic.matrix-cluster.secretsFile must be set when Synapse runs on this node.";
}
];
services.matrix-synapse = {
enable = true;
plugins = [ s3Plugin ];
extraConfigFiles = [ cfg.secretsFile s3ConfigFile ];
settings = {
server_name = cfg.matrixDomain;
public_baseurl = "https://${cfg.matrixDomain}";
max_upload_size = cfg.maxUploadSize;
media_store_path = s3Cfg.mediaStorePath;
signing_key_path = "/var/lib/matrix-synapse/homeserver.signing.key";
experimental_features = {
msc3266_enabled = true;
msc4140_enabled = true;
msc4143_enabled = true;
msc4222_enabled = true;
};
listeners = [
{
port = 8008;
bind_addresses = [ "0.0.0.0" ];
type = "http";
tls = false;
resources = [
{
names = [ "client" "federation" "openid" ];
compress = false;
}
];
}
];
enable_registration = cfg.enableRegistration;
enable_registration_without_verification = cfg.enableRegistration;
};
};
environment.systemPackages = [ pkgs.matrix-synapse ];
systemd.services.matrix-synapse-s3-config = {
description = "Generate Synapse S3 media storage config";
after = [ "sops-install-secrets.service" ];
requires = [ "sops-install-secrets.service" ];
before = [ "matrix-synapse.service" ];
requiredBy = [ "matrix-synapse.service" ];
serviceConfig.Type = "oneshot";
script = mkS3Config;
};
services.nginx = {
enable = true;
virtualHosts.${cfg.matrixDomain} = {
forceSSL = true;
enableACME = true;
locations."/" = {
proxyPass = "http://127.0.0.1:8008";
extraConfig = ''
client_max_body_size ${cfg.maxUploadSize};
'';
};
locations."=/.well-known/matrix/server" = {
extraConfig = ''
default_type application/json;
add_header Access-Control-Allow-Origin *;
add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS";
add_header Access-Control-Allow-Headers "X-Requested-With, Content-Type, Authorization";
'';
return = "200 '{\"m.server\": \"${cfg.matrixDomain}:443\"}'";
};
};
};
systemd.services.matrix-synapse-users = lib.mkIf (matrixUsers != []) {
description = "Provision Matrix Synapse users";
wantedBy = [ "multi-user.target" ];
after = [ "matrix-synapse.service" ];
requires = [ "matrix-synapse.service" ];
path = with pkgs; [ curl coreutils gawk ];
serviceConfig = {
Type = "oneshot";
User = "matrix-synapse";
};
script = ''
until curl -sf http://127.0.0.1:8008/_matrix/client/versions >/dev/null; do
sleep 2
done
REGISTRATION_SHARED_SECRET="$(awk -F': *' '$1 == "registration_shared_secret" { print $2; exit }' "${cfg.secretsFile}")"
if [ -z "$REGISTRATION_SHARED_SECRET" ]; then
printf 'registration_shared_secret not found in %s\n' '${cfg.secretsFile}' >&2
exit 1
fi
${lib.concatStringsSep "\n" (map mkUserRegistration matrixUsers)}
'';
};
})
{
services.postgresql = {
enable = true;
package = pkgs.postgresql_17;
enableTCPIP = true;
initdbArgs = [ "--locale=C" "--encoding=UTF8" ];
settings = {
wal_level = "replica";
max_wal_senders = 4;
hot_standby = "on";
};
};
}
(lib.mkIf (cfg.role == "primary") {
services.postgresql = {
authentication = lib.concatStringsSep "\n" ([
"local all all trust"
"host sameuser all 127.0.0.1/32 scram-sha-256"
"host sameuser all ::1/128 scram-sha-256"
"host all all ::1/128 scram-sha-256"
"host all all 0.0.0.0/0 scram-sha-256"
"host replication postgres 127.0.0.1/32 scram-sha-256"
"host replication postgres ::1/128 scram-sha-256"
] ++ map (cidr:
"hostssl replication replication ${cidr} scram-sha-256"
) cfg.replication.allowedSourceIPs);
ensureUsers = [
{
name = "replication";
ensureClauses = {
login = true;
replication = true;
};
}
];
};
# Apply replication password from SOPS-mounted file after postgres start.
systemd.services.matrix-cluster-replication-password = {
description = "Set Postgres replication role password from SOPS";
wantedBy = [ "multi-user.target" ];
after = [ "postgresql.service" "sops-install-secrets.service" ];
requires = [ "postgresql.service" "sops-install-secrets.service" ];
serviceConfig = {
Type = "oneshot";
User = "postgres";
RemainAfterExit = true;
};
script = ''
set -eu
PW="$(tr -d '\n' < "${cfg.replication.passwordFile}")"
${config.services.postgresql.package}/bin/psql -v ON_ERROR_STOP=1 -c \
"ALTER ROLE replication WITH LOGIN REPLICATION PASSWORD '$PW';"
'';
};
})
(lib.mkIf (cfg.role == "standby") {
# Hot-standby bootstrap: standby.signal + primary_conninfo with passfile.
# pg_basebackup must be run manually (see runbook) before this activates
# for the first time.
systemd.services.matrix-cluster-standby-bootstrap = {
description = "Configure Matrix Postgres hot standby";
wantedBy = [ "postgresql.service" ];
after = [ "sops-install-secrets.service" ];
requires = [ "sops-install-secrets.service" ];
before = [ "postgresql.service" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
};
script = ''
set -eu
if [ ! -d "${pgDataDir}" ]; then
echo "Postgres data dir ${pgDataDir} missing; run pg_basebackup first (see MATRIX-FAILOVER-RUNBOOK.md)" >&2
exit 0
fi
# Materialize a libpq passfile from the raw password secret.
PASSFILE=/var/lib/postgresql/.matrix-cluster-replication.passfile
PW="$(tr -d '\n' < "${cfg.replication.passwordFile}")"
umask 077
printf '%s:%d:replication:replication:%s\n' \
'${cfg.replication.peerHost}' \
${toString cfg.replication.peerPort} \
"$PW" > "$PASSFILE"
chown postgres:postgres "$PASSFILE"
chmod 0600 "$PASSFILE"
touch "${pgDataDir}/standby.signal"
chown postgres:postgres "${pgDataDir}/standby.signal"
CONF="${pgDataDir}/postgresql.auto.conf"
touch "$CONF"
chown postgres:postgres "$CONF"
# Strip any prior primary_conninfo line, then append fresh one.
${pkgs.gnused}/bin/sed -i '/^primary_conninfo/d' "$CONF"
printf "primary_conninfo = 'host=%s port=%d user=replication passfile=%s sslmode=%s'\n" \
'${cfg.replication.peerHost}' \
${toString cfg.replication.peerPort} \
"$PASSFILE" \
'${cfg.replication.sslMode}' >> "$CONF"
'';
};
})
(lib.mkIf cfg.acme.enable {
security.acme = {
acceptTerms = true;
defaults.email = lib.mkDefault cfg.acme.email;
certs.${cfg.matrixDomain} = {
dnsProvider = "porkbun";
webroot = lib.mkForce null;
environmentFile = "/run/matrix-cluster/porkbun.env";
};
};
systemd.services.matrix-cluster-acme-env = {
description = "Assemble Porkbun ACME environment file";
wantedBy = [ "multi-user.target" ];
after = [ "sops-install-secrets.service" ];
requires = [ "sops-install-secrets.service" ];
before = [ "acme-${cfg.matrixDomain}.service" ];
requiredBy = [ "acme-${cfg.matrixDomain}.service" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
};
script = ''
set -eu
install -d -m 0755 /run/matrix-cluster
API="$(tr -d '\n' < "${cfg.acme.porkbunApiKeyFile}")"
SEC="$(tr -d '\n' < "${cfg.acme.porkbunSecretApiKeyFile}")"
OUT=/run/matrix-cluster/porkbun.env
umask 077
{
printf 'PORKBUN_API_KEY=%s\n' "$API"
printf 'PORKBUN_SECRET_API_KEY=%s\n' "$SEC"
} > "$OUT"
chmod 0400 "$OUT"
'';
};
})
]);
}

View File

@@ -0,0 +1,230 @@
# Matrix Cluster Failover Runbook (`accord.tube`)
Primary: `hectic-lab` (NL, `128.140.75.58`)
Standby: `bfs.poland.xray` (PL, `91.198.166.181`)
Module: `hectic.generic.matrix-cluster` (`nixos/module/generic/matrix-cluster.nix`).
Shared secrets: `sus/matrix-cluster.yaml`.
All `psql` and `pg_ctl` invocations use PostgreSQL **17** at data dir
`/var/lib/postgresql/17`.
## Initial setup
### 1. Provision shared SOPS file (`sus/matrix-cluster.yaml`)
On a workstation with both yukkop and yukkop-alt age keys available:
```sh
sudo cat /var/lib/matrix-synapse/homeserver.signing.key # on NL (hectic-lab)
# Copy the single line value into the buffer for the next step.
sops sus/matrix-cluster.yaml
```
Populate the editor with:
```yaml
matrix:
signing-key: <paste verbatim signing-key line from NL>
postgres-replication-password: <openssl rand -base64 32>
object-storage:
credentials: |
ACCESS_KEY_ID=<verbatim copy from sus/hectic-lab.yaml>
SECRET_ACCESS_KEY=<verbatim copy from sus/hectic-lab.yaml>
porkbun-api-key: <PORKBUN_API_KEY>
porkbun-secret-api-key: <PORKBUN_SECRET_API_KEY>
```
Verify recipients:
```sh
sops updatekeys sus/matrix-cluster.yaml
sops -d sus/matrix-cluster.yaml | grep -E 'signing-key|porkbun-api-key|object-storage'
```
Expected: all five keys present, exit 0.
### 2. Deploy NL primary first
```sh
nixos-rebuild switch --flake .#'hectic-lab|x86_64-linux' --target-host root@128.140.75.58
```
Verify on NL:
```sh
sudo systemctl status matrix-synapse postgresql matrix-cluster-replication-password
sudo -u postgres psql -c "select rolname, rolreplication from pg_roles where rolname='replication';"
# Expected: replication | t
```
### 3. Seed PL replica with `pg_basebackup`
On PL:
```sh
sudo systemctl stop postgresql
sudo rm -rf /var/lib/postgresql/17
sudo -u postgres install -d -m 0700 /var/lib/postgresql/17
sudo -u postgres PGPASSWORD="$(sudo cat /run/secrets/matrix/postgres-replication-password)" \
pg_basebackup \
-h 128.140.75.58 \
-p 5432 \
-U replication \
-D /var/lib/postgresql/17 \
-Fp -Xs -P -R \
--no-password
```
`-R` writes `standby.signal` and an initial `primary_conninfo`. The
matrix-cluster module's `matrix-cluster-standby-bootstrap` service will
overwrite `primary_conninfo` to use a libpq passfile on next boot.
### 4. Deploy PL standby
```sh
nixos-rebuild switch --flake .#'bfs.poland.xray|x86_64-linux' --target-host root@91.198.166.181
sudo systemctl start postgresql
```
Verify streaming on NL:
```sh
sudo -u postgres psql -c 'select client_addr, state, sync_state from pg_stat_replication;'
# Expected: 91.198.166.181 | streaming | async
```
Verify standby on PL:
```sh
sudo -u postgres psql -c 'select pg_is_in_recovery();'
# Expected: t
sudo systemctl is-active matrix-synapse
# Expected: inactive (standby keeps Synapse off)
```
### 5. Remove duplicate S3 credentials from `sus/hectic-lab.yaml`
Only AFTER NL is confirmed healthy reading from the new shared file:
```sh
sops sus/hectic-lab.yaml
# Delete the matrix/object-storage/credentials block.
sudo nixos-rebuild switch --flake .#'hectic-lab|x86_64-linux'
```
## Normal operations
```sh
# NL: replication health
sudo -u postgres psql -c 'select * from pg_stat_replication;'
# Expected: 1 row, state=streaming, sync_state=async
# PL: replay status
sudo -u postgres psql -c 'select now() - pg_last_xact_replay_timestamp() as lag;'
# Both: cert renewal
sudo systemctl status acme-accord.tube.timer
sudo journalctl -u acme-accord.tube.service --since '24 hours ago'
# Synapse health (NL primary)
curl -sf https://accord.tube/_matrix/client/versions | head
```
## Planned failover (NL -> PL)
```sh
# 1. Drain NL: stop accepting writes.
sudo systemctl stop matrix-synapse
sudo systemctl stop postgresql # ensure no new WAL after this point
# 2. Promote PL replica.
sudo -u postgres pg_ctl -D /var/lib/postgresql/17 promote
# Wait until pg_is_in_recovery() returns f:
sudo -u postgres psql -c 'select pg_is_in_recovery();'
# 3. Make the role switch declarative before rebuilding.
# Edit the flake so rebuilds match the promoted database state:
# - nixos/system/bfs.poland.xray/bfs.poland.xray.nix:
# hectic.generic.matrix-cluster.role = "primary";
# hectic.generic.matrix-cluster.overrideEnableSynapse = true;
# hectic.generic.matrix-cluster.secretsFile = config.sops.secrets."matrix/secrets".path;
# - nixos/system/hectic-lab/hectic-lab.nix:
# hectic.generic.matrix-cluster.role = "standby";
# hectic.generic.matrix-cluster.overrideEnableSynapse = false;
# hectic.generic.matrix-cluster.replication.peerHost = "91.198.166.181";
# hectic.generic.matrix-cluster.replication.allowedSourceIPs = [ "128.140.75.58/32" ];
# (You will also need a matrix/secrets entry on PL - copy from NL via SOPS.)
sudo nixos-rebuild switch --flake .#'bfs.poland.xray|x86_64-linux'
sudo nixos-rebuild switch --flake .#'hectic-lab|x86_64-linux'
sudo systemctl status matrix-synapse
# 4. Swap DNS A record at Porkbun:
# accord.tube A 91.198.166.181 (was 128.140.75.58)
# TTL: set to 300 in advance of any planned failover.
# Porkbun UI: https://porkbun.com/account/domainsSpeedy -> accord.tube -> DNS -> edit A record.
# Or via API:
sudo curl -sX POST https://api.porkbun.com/api/json/v3/dns/editByNameType/accord.tube/A \
-H 'content-type: application/json' \
-d "$(jq -n --arg k "$PORKBUN_API_KEY" --arg s "$PORKBUN_SECRET_API_KEY" \
'{secretapikey:$s,apikey:$k,content:"91.198.166.181",ttl:"300"}')"
# 5. Federation smoke test.
curl -s 'https://federationtester.matrix.org/api/report?server_name=accord.tube' | jq .FederationOK
# Expected: true
```
Expected after the rebuilds:
- `bfs.poland.xray` evaluates and runs as `role = "primary"`.
- `hectic-lab` evaluates as `role = "standby"` with Synapse forced off.
- Future `nixos-rebuild` runs preserve the promoted topology instead of reapplying standby settings to PL.
## Failback (PL -> NL)
```sh
# 1. Stop NL postgres if still up; clear its data dir.
sudo systemctl stop postgresql matrix-synapse
sudo rm -rf /var/lib/postgresql/17
# 2. Re-seed NL from PL (now the live primary).
sudo -u postgres install -d -m 0700 /var/lib/postgresql/17
sudo -u postgres PGPASSWORD="$(sudo cat /run/secrets/matrix/postgres-replication-password)" \
pg_basebackup -h 91.198.166.181 -p 5432 -U replication \
-D /var/lib/postgresql/17 -Fp -Xs -P -R --no-password
# 3. Temporarily flip roles in the flake:
# - hectic-lab.nix: role = "standby"; peerHost = "91.198.166.181";
# - bfs.poland.xray.nix: role = "primary"; peerHost = "128.140.75.58";
# Rebuild both.
# 4. Once NL is streaming green, do the reverse failover dance:
sudo systemctl stop matrix-synapse # on PL
sudo -u postgres pg_ctl -D /var/lib/postgresql/17 promote # on NL
# Then revert the flake role assignments back to NL=primary / PL=standby and
# rebuild both hosts.
# 5. Swap DNS back at Porkbun (A -> 128.140.75.58).
```
## Disaster recovery (NL permanently lost)
```sh
# 1. Promote PL as the new permanent primary.
sudo -u postgres pg_ctl -D /var/lib/postgresql/17 promote
# 2. Edit nixos/system/bfs.poland.xray/bfs.poland.xray.nix:
# hectic.generic.matrix-cluster.role = "primary";
# hectic.generic.matrix-cluster.overrideEnableSynapse = lib.mkForce null;
# hectic.generic.matrix-cluster.replication.peerHost = "<new-standby-ip>";
# hectic.generic.matrix-cluster.replication.allowedSourceIPs = [ "<new-standby-ip>/32" ];
# 3. Provision a new host (any region with Porkbun-managed DNS) and import
# self.nixosModules.matrix-cluster with role = "standby" pointed at PL's IP.
# 4. Bootstrap the new standby via pg_basebackup from PL exactly as in
# "Initial setup" step 3, replacing 128.140.75.58 with PL's IP.
# 5. Update Porkbun A record to PL's IP permanently.
```

View File

@@ -9,18 +9,64 @@
config,
...
}: {
# TODO:
# white list
# torent
# rate limit
# ping - game and speak
imports = [
self.nixosModules.xray-system
self.nixosModules.matrix-cluster
];
hectic.generic.xray-system = {
enable = true;
defaultSopsFile = ../../../sus/bfs.xray.yaml;
};
hectic.generic.matrix-cluster = {
enable = true;
role = "standby";
matrixDomain = "accord.tube";
signingKeyFile = config.sops.secrets."matrix/signing-key".path;
objectStorage.s3 = {
bucket = "matrix-hectic-lab";
regionName = "hel1";
endpointUrl = "https://hel1.your-objectstorage.com";
credentialsFile = config.sops.secrets."matrix/object-storage/credentials".path;
};
replication = {
peerHost = "128.140.75.58";
passwordFile = config.sops.secrets."matrix/postgres-replication-password".path;
};
acme = {
enable = true;
porkbunApiKeyFile = config.sops.secrets."matrix/porkbun-api-key".path;
porkbunSecretApiKeyFile = config.sops.secrets."matrix/porkbun-secret-api-key".path;
};
};
sops.secrets."matrix/signing-key" = {
key = "matrix/signing-key";
owner = "matrix-synapse";
mode = "0400";
sopsFile = "${flake}/sus/matrix-cluster.yaml";
};
sops.secrets."matrix/postgres-replication-password" = {
key = "matrix/postgres-replication-password";
owner = "postgres";
mode = "0400";
sopsFile = "${flake}/sus/matrix-cluster.yaml";
};
sops.secrets."matrix/object-storage/credentials" = {
key = "matrix/object-storage/credentials";
owner = "matrix-synapse";
mode = "0400";
sopsFile = "${flake}/sus/matrix-cluster.yaml";
};
sops.secrets."matrix/porkbun-api-key" = {
key = "matrix/porkbun-api-key";
mode = "0400";
sopsFile = "${flake}/sus/matrix-cluster.yaml";
};
sops.secrets."matrix/porkbun-secret-api-key" = {
key = "matrix/porkbun-secret-api-key";
mode = "0400";
sopsFile = "${flake}/sus/matrix-cluster.yaml";
};
}

View File

@@ -23,6 +23,7 @@ let
in {
imports = [
self.nixosModules.hectic
self.nixosModules.matrix-cluster
inputs.sops-nix.nixosModules.sops
self.nixosModules."shadowsocks-rust" # NOTE(nrv): impl
@@ -56,14 +57,15 @@ in {
ipv6 = "2a01:4f8:c2c:d54a";
};
services.matrix = {
enable = true;
enable = false;
};
generic.matrix-cluster = {
enable = true;
role = "primary";
inherit matrixDomain;
signingKeyFile = config.sops.secrets."matrix/signing-key".path;
secretsFile = config.sops.secrets."matrix/secrets".path;
turnSecretFile = config.sops.secrets."matrix/turn-secret".path;
publicIp = "128.140.75.58";
postgresql = {
port = 5432;
initialEnvFile = config.sops.secrets."init-postgresql".path;
};
users = {
yukkop = {
passwordFile = config.sops.secrets."matrix/users/yukkop/password".path;
@@ -80,13 +82,21 @@ in {
};
};
objectStorage.s3 = {
enable = true;
bucket = "matrix-hectic-lab";
regionName = "hel1";
endpointUrl = "https://hel1.your-objectstorage.com";
credentialsFile = config.sops.secrets."matrix/object-storage/credentials".path;
};
inherit matrixDomain;
replication = {
peerHost = "91.198.166.181";
passwordFile = config.sops.secrets."matrix/postgres-replication-password".path;
allowedSourceIPs = [ "91.198.166.181/32" ];
};
acme = {
enable = true;
porkbunApiKeyFile = config.sops.secrets."matrix/porkbun-api-key".path;
porkbunSecretApiKeyFile = config.sops.secrets."matrix/porkbun-secret-api-key".path;
};
};
services.media-browser = {
@@ -172,12 +182,6 @@ in {
key = "matrix/secrets";
owner = "matrix-synapse";
};
sops.secrets."matrix/turn-secret" = {
key = "matrix/turn-secret";
owner = "turnserver";
group = "turnserver";
mode = "0400";
};
sops.secrets."matrix/users/yukkop/password" = {
key = "matrix/users/yukkop/password";
owner = "matrix-synapse";
@@ -198,6 +202,31 @@ in {
key = "matrix/object-storage/credentials";
owner = "matrix-synapse";
mode = "0400";
sopsFile = "${flake}/sus/matrix-cluster.yaml";
};
# Shared cluster secrets (PL standby also reads from this file).
sops.secrets."matrix/signing-key" = {
key = "matrix/signing-key";
owner = "matrix-synapse";
mode = "0400";
sopsFile = "${flake}/sus/matrix-cluster.yaml";
};
sops.secrets."matrix/postgres-replication-password" = {
key = "matrix/postgres-replication-password";
owner = "postgres";
mode = "0400";
sopsFile = "${flake}/sus/matrix-cluster.yaml";
};
sops.secrets."matrix/porkbun-api-key" = {
key = "matrix/porkbun-api-key";
mode = "0400";
sopsFile = "${flake}/sus/matrix-cluster.yaml";
};
sops.secrets."matrix/porkbun-secret-api-key" = {
key = "matrix/porkbun-secret-api-key";
mode = "0400";
sopsFile = "${flake}/sus/matrix-cluster.yaml";
};
services.mailserver = {
@@ -252,6 +281,10 @@ in {
51820 # wg-bfs
55228 # ss-bfs
];
# Postgres replication: only the PL standby peer may reach 5432.
extraInputRules = ''
ip saddr 91.198.166.181/32 tcp dport 5432 accept
'';
};
virtualisation.docker.enable = true;

94
sus/matrix-cluster.yaml Normal file
View File

@@ -0,0 +1,94 @@
#ENC[AES256_GCM,data:BzhWtre9w6tLLlnoJvqMnXQDfYMVr6bmxG4sTqtm,iv:S83CLFKLBu5jEIrbsBxOcuOeZCL+/YqARcr/zk3kBAg=,tag:IrnzpRDPxhfzZA4R3fqQWQ==,type:comment]
#ENC[AES256_GCM,data:eOS4sIkeCR4f+d0jz8quCzxVGgodyklvWxiNegk5g7e2EMDTDHRXrHc0Pr4dFpI/S1W0fPkayKgZCXPlw8zMWg==,iv:fVC6MC+ljEFVS7AI9r9W8EpEs3eE5Gy7lem+dGiPyes=,tag:nn2fP7wVADy4xtHgmazG9g==,type:comment]
#ENC[AES256_GCM,data:HvSCPo8vhsJLY6He5MkcIx8QqxsRRXrS0nmp4gio3aJ8L9DmK6UNFTac7ty3/EkRwYI4XJun/DOYxgf3WhsOedTSqTbI,iv:Ver7K1aegAWS87XYfdXCzij3H3OBU8QIh0rL+IRb6ik=,tag:b3jc8hB4GPMJtxg33D0DDA==,type:comment]
#
#ENC[AES256_GCM,data:w8SjUxjNkRdqgBr+uNAKhN3eQX/P7lBHq/53ven8H4wtjg9OTw7RQMHwNoYgXy3qZlN2rUoJQKMQlIHlYXLvfzDtCeQllg==,iv:Pb6uxR0NyGtXe2N9lMoEtGa9CecxDM4h0dtJmxRSG7Q=,tag:z6J6V8Yb0aYjhqJz/ZMHyw==,type:comment]
#ENC[AES256_GCM,data:xHMZQFcuNJ21rBM3VBe6JNK47v0FJBceUH6aGHeFqJMbVOfJXMxMKTOmQ3EbN1p3ZnpjbYNT03csKUdpEVQn2cnAyqKZSrkftR1bQbRPuT4=,iv:/jcnpofv8aWHxvdmJ7AB0e3tkDlRXmf4FounT8Uuw0o=,tag:Wnmy3iR+tHiX8583HEiw+Q==,type:comment]
#ENC[AES256_GCM,data:Amp7pUg0yOrbDQ22Av/Q7ZlzYC6Rk0CYdzlvWxTS8czQi3NBhmQ6j01oJlYolRCaGMQzFrsbDXNuHiay29YPV4FU/T2NELBVz9yCyMzQpQ==,iv:tVi/jBNDbwx8NFSmm6EanKAUYu1srLRn71RA34TbuG4=,tag:5gtN46WCTOi4ZCExDqPV6g==,type:comment]
#ENC[AES256_GCM,data:K/MJsdAUd+G0gJxag7rebBCZfdrIt1It3I/kwqUdecGx+PUR+y5RXb8rxoIArMWneuMEo6f7cW5ICo/52JF1vNJThsKtdlNeohDEXM3JOFTvQG+BVlFs,iv:7X1WKjg1WyLWoobrG6iXnSh1nqUzWlMGFziKqtZREPs=,tag:e/K310IxJA/ME69HbIQGaw==,type:comment]
#ENC[AES256_GCM,data:q9Jf0i01TSSB26axKK/9q7yQ91N8L890O0b9qZRpb5ExaHWyRDlFkNAb1HVGKsefpVh2sGY5gTsmmFB/31/FwN/48ffw8nsGdtj/V6EsZDM3,iv:QJb7I8/vBhgJpoG4G771/x/9c2F33UQd9OWB2+Z1q/w=,tag:zunRPr0RYukCD9/rE3vE6g==,type:comment]
#ENC[AES256_GCM,data:JswZITMu+NZ9h0tIPx7GPKQJYGqaw2fw8Vm7apTI9fjUBNiMTXJhlnNSiRc1MTBYgXhx6m90Y17JZD5TcS6ilUgyqAPDJY1U,iv:98vcoMoRa+5hMD/7iiIz342lgtK6Ont9YRVt9W4aRaY=,tag:FKeDLgUaPm1fKoveN8axEg==,type:comment]
#ENC[AES256_GCM,data:IHEW4GDIRv+0jFiO9qD+Xf2dS0OWGn2J6MELbj9fCPFLXlhBD8ZradmXcKYgwIM6Da3mhuj8KIdk9RKADxsp9lWkZ0h3wMEJJwhN7xLKgA==,iv:mBUlVa6zx0aijvbn2lbfhw/iHzqrXAC4H9sZ9Eeyf5Y=,tag:+hfWOfN2feKy/J5UiLvONQ==,type:comment]
#
#ENC[AES256_GCM,data:sJVDYdTVIAweq5dXOV8DvigYDH70Ge5C74+oxPs0Ah7T5fGfqoEnvpVKU/NRlOzz7II=,iv:vS2tXmcFTSkopbxnCLCoSPnz0Pd/V/GTR78ulXW0j1o=,tag:UdVIWWWAtgStvV4VlOKzMw==,type:comment]
#ENC[AES256_GCM,data:1IUl4FGIcptVFMST8A9gc4p3IwxZpZnFDlX56mL4hnJ35Mi8JSw0IwfnUB+DGagz,iv:1x3M/oVlAwdm5tdN5TgMXstlUnd2hzGGYhOefrV8tfA=,tag:yPgY+8O37C76rowMBXZe8g==,type:comment]
#
#ENC[AES256_GCM,data:dNp9fNe6kFn5r21fgfuY66BQmHwVRVCFAUQz+hpAJw==,iv:rfqK9B3EiGc0PTjGHE/yCPHVkt+noUNYkw6+nasRGJo=,tag:Eq2GyNr1wMgP/TFlIXhcrw==,type:comment]
#
#ENC[AES256_GCM,data:EMxRfCZ/gq8vS2pOUlxnDO2pxrW/Pjrms0Mk6xQSS9oEWj0g80z0BgOAG2pMvNreLfDHepdLCmaSn/tTefuH3QVaPz0C,iv:Q0sJ/97fm0YZjduAija53Dm94SPvT4jr7CxCySkH4xM=,tag:p2fNrTC7hiGFWI3AsjYoQg==,type:comment]
matrix:
signing-key: ENC[AES256_GCM,data:HAhg/QBQyXiv/1dMru95b+4v5IybMn4TaeAuYKk=,iv:R/hYs/HrlIXLWzJv67O0DKrix8tJ50LoNNbwnaMXCQc=,tag:3dMOWxyFqWLeUkWR51ccKw==,type:str]
postgres-replication-password: ENC[AES256_GCM,data:0KT0CRSaNzrUbTd5S4D7pxfjf+zazdtZcXsbvdoOHhk9dBz5f441ouO0Gv7O14W8eayxMUz19YD9Ww==,iv:BozluZt/Ll3kqeWSbA1H2+BKp5a/AR5u/P38Gk8VSR8=,tag:iy0qGPmYAK74BgIkglpfgw==,type:str]
object-storage:
credentials: ENC[AES256_GCM,data:qgrBzVZGS7HeaLHQpi8xiqSkZcw8zwirb5/p6ArcXkuT0FdFH2ucl+o+BQkC4z/bVqpOw0ZiQt19sXzAvC4U4Y7it4i05+9g2Bs+flYutjswBE7YO/DvEspNywsfKQ==,iv:8n5PVz1yKnKe5oJnCK7ywJkFj+33eDVxiFmt9T3Q3TU=,tag:CoDkxdQqg0S4ebOCPX8y+w==,type:str]
porkbun-api-key: ENC[AES256_GCM,data:kQlmH3v6mdkXNaqXGt4V0MpOxrgPtSF/8YQ7jzJ0GtECbJzo+eF6VreguWDu3VMX2TpP1tah0m/ZaiHyAogKZSZS32c=,iv:pPTiQ0GMlndQvuBzoNj2SHtwxCiQuZdG2DOkMAiT+RA=,tag:pUiaq9OzoSft+vHO5/MkgQ==,type:str]
porkbun-secret-api-key: ENC[AES256_GCM,data:41lQr4KJ/Isa6bJSYS0E5HDMIafqWMbV9W7MSsqZjFTrryGUnUb3Rhx/X/nxGgFTwFyF6Y4BLBaCkdKYFGmcBP+QS/E=,iv:zSCoTOGWoIBUmhXjFNStSzYg9rTG4HWddrmGWl2nMMc=,tag:FgjLBGwnmmjx/8wt4XVEdA==,type:str]
sops:
age:
- recipient: age1x04u7ftjgx8de2gq596e7frauze764cmn7jjwqnx8szthvfft5qq0tezx6
enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBxanpXZjNmTlQyY0JmYXMz
MFZtZjRsdHhpQmp5d3IybnAwNTA5bDFaelRrCm9PUW1VZzhoSVQvWnJDLzZNNEVz
QW5YY3RNTXF1bFNxbmlDSVBGNU12QkUKLS0tIEJRS1orVVd6OVg5VG1xMGtkUita
bHB2ZGdMdVpOanVxQzltYXdCWjRGVEEKHNNuMz4BgAm4JaXlFPu6WM8uo+1apd/V
t3YjsihySdFDTClXU65K7qZcn1bM3Try44OP7RPE/+f9e1Q+k5w8Sg==
-----END AGE ENCRYPTED FILE-----
- recipient: age1r25zdeqq8nac6dgca9en28r57ffyz9u9d8z5yc25gc8xqz747vaqmdtk0h
enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSA4N0dLalQyR2JCUkxxT0lV
UDMyTkQyWUc0eG5sVTNrVkpaL1FMNjVJNWx3CmJPZHM1N25JeW9JUmRWTG1GOUdO
eXNwODhaYUx3TVNkeVozSlBGYzJzenMKLS0tIEFDbnpjcVlsdGhta2czWWIrQ1Yv
ZFU5MVRYQVNJcnM2dU9nRXNwN1dqdEEK3tMSrxscin5f2cl6dSEaIdGasqMi+zmI
jKU6Rn4ZZz3JkcendWgysuZgV3MAdUHnZKZSfq2o9/1OjWuynUtRwQ==
-----END AGE ENCRYPTED FILE-----
- recipient: age1w4hw2ntxrtfqhht63s9lf7nhjxjmdcc927hndn5ygcqqj532qssq4m2m6p
enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBNSDRWU2J2OUpPVXRocFlq
RmJ3TTJUL21ZWmhVOEFjOFVzWkhxYkxIV3k4Ck5jSlU5RjVnalZIVTJ3SzhRbzFT
cXNWRGZiMm5MTVFVeEdPcVR0ZUJMZncKLS0tIE9jZHNhTk1zYkxJTkRTWU1rWW9W
cC9VV0tDL3N5M0svNy9hdStndVNFLzAKxsls4LOJG4RgWJsjjW2ZGtv+RfYIaPB1
PEUwSHSFzpcw4BnBuhDnr2tNzu2XLPXjPF1m9lsN0G+NiEIQc72GnQ==
-----END AGE ENCRYPTED FILE-----
- recipient: age1vv46vn4hsn2lg6jy834cpu40c3mvqklldcm3hjtynrhwtpmlpc8szruz4v
enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBKa3lYanlFODlucWdhaE16
Y1o4djJYMEhSSThOOXFvcktpMm0zVDVKYmljCkhKVWJ4STBVcFhmeXpaZXhWL0lF
YlVobE9MdDhGVGJ4Y2VCMVpOaVdEWUUKLS0tIGQ4YUk4ZmZ3K2dIS0xRVFR4MXRx
SU5lcVJjRUhjYkNrZzFSeHU0MWdyZ2cKmzTPZK7zuzEsMaaTity3bDYe+jMZXpGW
yrX7Tc00sxm13Z/gZ845dLMoMUQvh4DXQX3aQlSJ+c35z/7z+7LpiA==
-----END AGE ENCRYPTED FILE-----
- recipient: age13h8twnwvgxn04l5ywtru89a6psw5d0uckr2eghxsjp88a5augvsstq5ard
enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSByNm1xVkxZNDZTdFh4RU1H
bmluZEdKRm80K3BwSWdXeVh6TmZ2bUtGZ0ZrCnlwSFNUT3B4V21iazFiQXNDZlp2
RWRjNUp5L2lPUmRTblR4MUhxVFdhaFUKLS0tIFRBZ1VOSTNFcFhmTmFCbVFiSHNu
Y1pobW03Sjhwc3hjZVZXVUQyWFlGNEUKX5hq7bDqpMbVaGoCg0J41xHjmGFZXrHf
upNZ7zfHlmsoRMwsiNvRlC12t15rs4xdH3PA7mxqV1CyLabKDZixpQ==
-----END AGE ENCRYPTED FILE-----
- recipient: age1fpytf05sg9n6ywpwkmn09lhpfvgtud9h75h76jhxha475zpnasqq952rpu
enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBjaFdLSnZZWUljbS9VUVlW
RWt4UllkWTVkSGlrQ0NqZjdMdWdVWko1djJNCnBnVm90cFMvbnM3a0hucDAyaVU5
b1gvZDhkOGUyN3lYRnJERDhYOUdVRWMKLS0tIGZKdkoxbC9QanpvK2RQL0tRaG9N
M3FuekJtQ2xob1dGRXc2eWM5RU9FZ1EKgrLrhh5rFtq+QWjy3loaGO4BMhQPouH6
VVpyr7UZvfhCJ7+HREKHQgQqSPNgGVYJv6HJ4+4X7DKZFET6wi7SEw==
-----END AGE ENCRYPTED FILE-----
- recipient: age1jxntjca8q2vxvf2jaal4xyvm2ae6sh62fhv897694kuzawfrk5asj00zdt
enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSB1SG5ab2dzYlZRbHovWVd0
dzgyaEdWY2Vja3BCeHUxc1VEZGxZeENoVEVrCkUyYjJtek1oVDJzaTVkWlM5OWVK
TFhxYVd5Ny8zZkpmSVplNkwrTG4vclUKLS0tIHE3Zzl2WWppVXZueUcrbzgwZXFn
T29jSG0va3g2NE4zbklyWS9BN2hKUW8KDl3jMTCeEgNBsu+Krs/lB8iXlnZu8zxB
iNX4GegOxmlgJOA6jMCh8AlwUzz7HIex9jJ5MunZ9/6V/Aubqjb1Ug==
-----END AGE ENCRYPTED FILE-----
lastmodified: "2026-05-23T22:14:25Z"
mac: ENC[AES256_GCM,data:omw2csf8/F8Ob2znjBL6/4Et2NCxu7yaMLsCfy3gP/qM8kzGW8Z7bo2gx0C7pX3qkKbersBJ/CMLaZNyEdjYnt4OsFfN2LyhBITsRID7ASt4BWXgnQ0p2jM8AV8Qb4Mxz4VMLcuDjdhtITcsPKu1z60BYomJmnLFzVVy2UYPw94=,iv:1s68DQfNCFxK6Bte7Km3V0FyopMV5DJ6EVdZ+1stgf8=,tag:ZNJ59SVV/4WqgIaEYBMRJQ==,type:str]
unencrypted_suffix: _unencrypted
version: 3.10.2