From a435bd6feddba5f6633e61ada7b89bf642aeb9b7 Mon Sep 17 00:00:00 2001 From: Juergen Kunz Date: Thu, 16 Apr 2026 02:54:16 +0000 Subject: [PATCH] feat(monitoring): add edge-triggered threshold handling with group action orchestration and HA-aware Proxmox shutdowns --- changelog.md | 7 + readme.md | 41 +++- test/test.ts | 211 ++++++++++++++++++++ ts/00_commitinfo_data.ts | 2 +- ts/actions/base-action.ts | 2 + ts/actions/proxmox-action.ts | 364 ++++++++++++++++++++++++++++------ ts/actions/shutdown-action.ts | 22 ++ ts/cli/action-handler.ts | 73 +++++-- ts/cli/ups-handler.ts | 7 +- ts/daemon.ts | 178 ++++++++++++++++- ts/group-monitoring.ts | 198 ++++++++++++++++++ ts/systemd.ts | 12 +- ts/ups-monitoring.ts | 52 ++++- 13 files changed, 1052 insertions(+), 117 deletions(-) create mode 100644 ts/group-monitoring.ts diff --git a/changelog.md b/changelog.md index 66d34fd..584c9c0 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,12 @@ # Changelog +## 2026-04-16 - 5.7.0 - feat(monitoring) +add edge-triggered threshold handling with group action orchestration and HA-aware Proxmox shutdowns + +- Track per-action threshold entry state so threshold-based actions fire only when conditions are newly violated +- Add group monitoring and threshold evaluation for redundant and non-redundant UPS groups, including suppression of destructive actions when members are unreachable +- Support optional Proxmox HA stop requests for HA-managed guests and prevent duplicate Proxmox or host shutdown scheduling + ## 2026-04-14 - 5.6.0 - feat(config) add configurable default shutdown delay for shutdown actions diff --git a/readme.md b/readme.md index 6c82ddf..c71f9a9 100644 --- a/readme.md +++ b/readme.md @@ -12,12 +12,12 @@ For reporting bugs, issues, or security vulnerabilities, please visit [community - **🔌 Multi-UPS Support** — Monitor multiple UPS devices from a single daemon - **📡 Dual Protocol Support** — SNMP (v1/v2c/v3) for network UPS + UPSD/NIS for USB-connected UPS via NUT -- **🖥️ Proxmox Integration** — Gracefully shut down QEMU VMs and LXC containers before host shutdown (auto-detects CLI tools — no API token needed on Proxmox hosts) +- **🖥️ Proxmox Integration** — Gracefully shut down QEMU VMs and LXC containers before host shutdown, with optional HA-aware stop requests for HA-managed guests - **👥 Group Management** — Organize UPS devices into groups with flexible operating modes - **Redundant Mode** — Only trigger actions when ALL UPS devices in a group are critical - **Non-Redundant Mode** — Trigger actions when ANY UPS device is critical - **⚙️ Action System** — Define custom responses with flexible trigger conditions - - Battery & runtime threshold triggers + - Edge-triggered battery & runtime threshold triggers - Power status change triggers - Webhook notifications (POST/GET) - Custom shell scripts @@ -255,6 +255,7 @@ their own `shutdownDelay`. "triggerMode": "onlyThresholds", "thresholds": { "battery": 30, "runtime": 15 }, "proxmoxMode": "auto", + "proxmoxHaPolicy": "haStop", "proxmoxExcludeIds": [], "proxmoxForceStop": true }, @@ -360,6 +361,10 @@ For USB-connected UPS via [NUT (Network UPS Tools)](https://networkupstools.org/ Actions define automated responses to UPS conditions. They run **sequentially in array order**, so place Proxmox actions before shutdown actions. +Threshold-based actions are **edge-triggered**: they fire when the monitored UPS or group **enters** a threshold violation, not on every polling cycle while the threshold remains violated. If the condition clears and later re-enters, the action can fire again. + +Shutdown and Proxmox actions also suppress duplicate runs where possible, so overlapping UPS and group actions do not repeatedly schedule the same host or guest shutdown workflow. + #### Action Types | Type | Description | @@ -382,8 +387,8 @@ Actions define automated responses to UPS conditions. They run **sequentially in | Mode | Description | | ----------------------------- | -------------------------------------------------------- | | `onlyPowerChanges` | Only when power status changes (online ↔ onBattery) | -| `onlyThresholds` | Only when battery or runtime thresholds are violated | -| `powerChangesAndThresholds` | On power changes OR threshold violations (default) | +| `onlyThresholds` | Only when battery or runtime thresholds are newly violated | +| `powerChangesAndThresholds` | On power changes OR when thresholds are newly violated (default) | | `anyChange` | On every polling cycle | #### Shutdown Action @@ -441,6 +446,8 @@ Actions define automated responses to UPS conditions. They run **sequentially in Gracefully shuts down QEMU VMs and LXC containers on a Proxmox node before the host is shut down. +If you use Proxmox HA, NUPST can optionally request `state=stopped` for HA-managed guests instead of only issuing direct `qm` / `pct` shutdown commands. + NUPST supports **two operation modes** for Proxmox: | Mode | Description | Requirements | @@ -459,6 +466,7 @@ NUPST supports **two operation modes** for Proxmox: "thresholds": { "battery": 30, "runtime": 15 }, "triggerMode": "onlyThresholds", "proxmoxMode": "auto", + "proxmoxHaPolicy": "haStop", "proxmoxExcludeIds": [100, 101], "proxmoxStopTimeout": 120, "proxmoxForceStop": true @@ -473,6 +481,7 @@ NUPST supports **two operation modes** for Proxmox: "thresholds": { "battery": 30, "runtime": 15 }, "triggerMode": "onlyThresholds", "proxmoxMode": "api", + "proxmoxHaPolicy": "haStop", "proxmoxHost": "localhost", "proxmoxPort": 8006, "proxmoxTokenId": "root@pam!nupst", @@ -487,6 +496,7 @@ NUPST supports **two operation modes** for Proxmox: | Field | Description | Default | | --------------------- | ----------------------------------------------- | ------------- | | `proxmoxMode` | Operation mode | `auto` | +| `proxmoxHaPolicy` | HA handling for HA-managed guests | `none`, `haStop` (`none` default) | | `proxmoxHost` | Proxmox API host (API mode only) | `localhost` | | `proxmoxPort` | Proxmox API port (API mode only) | `8006` | | `proxmoxNode` | Proxmox node name | Auto-detect via hostname | @@ -504,11 +514,20 @@ NUPST supports **two operation modes** for Proxmox: pveum user token add root@pam nupst --privsep=0 ``` +**HA Policy values:** + +- **`none`** — Treat HA-managed and non-HA guests the same. NUPST sends normal guest shutdown commands. +- **`haStop`** — For HA-managed guests, NUPST requests HA resource state `stopped`. Non-HA guests still use normal shutdown commands. + > ⚠️ **Important:** Place the Proxmox action **before** the shutdown action in the actions array so VMs are stopped before the host shuts down. ### Group Configuration -Groups coordinate actions across multiple UPS devices: +Groups coordinate actions across multiple UPS devices. + +Group actions are evaluated **after all UPS devices have been refreshed for a polling cycle**. + +There is **no aggregate battery math** across the group. Instead, each group action evaluates each member UPS against that action's own thresholds. | Field | Description | Values | | ------------- | ---------------------------------- | -------------------- | @@ -520,8 +539,10 @@ Groups coordinate actions across multiple UPS devices: **Group Modes:** -- **`redundant`** — Actions trigger only when ALL UPS devices in the group are critical. Use for setups with backup power units. -- **`nonRedundant`** — Actions trigger when ANY UPS device is critical. Use when all UPS units must be operational. +- **`redundant`** — A threshold-based action triggers only when **all** UPS devices in the group are on battery and below that action's thresholds. Use for setups with backup power units. +- **`nonRedundant`** — A threshold-based action triggers when **any** UPS device in the group is on battery and below that action's thresholds. Use when all UPS units must be operational. + +For threshold-based **destructive** group actions (`shutdown` and `proxmox`), NUPST suppresses execution while any group member is `unreachable`. This prevents acting on partial data during network failures. ### HTTP Server Configuration @@ -597,6 +618,7 @@ NUPST tracks communication failures per UPS device: - After **3 consecutive failures**, the UPS status transitions to `unreachable` - **Shutdown actions will NOT fire** on `unreachable` — this prevents false shutdowns from network glitches - Webhook and script actions still fire, allowing you to send alerts +- Threshold-based destructive **group** actions are also suppressed while any required group member is `unreachable` - When connectivity is restored, NUPST logs a recovery event with downtime duration - The failure counter is capped at 100 to prevent overflow @@ -613,7 +635,7 @@ UPS Devices (2): ✓ Main Server UPS (online - 100%, 3840min) Host: 192.168.1.100:161 (SNMP) Groups: Data Center - Action: proxmox (onlyThresholds: battery<30%, runtime<15min) + Action: proxmox (onlyThresholds: battery<30%, runtime<15min, ha=stop) Action: shutdown (onlyThresholds: battery<20%, runtime<10min, delay=10min) ✓ Local USB UPS (online - 95%, 2400min) @@ -784,6 +806,9 @@ curl -k -H "Authorization: PVEAPIToken=root@pam!nupst=YOUR-SECRET" \ # Check token permissions pveum user token list root@pam + +# If using proxmoxHaPolicy: haStop +ha-manager config ``` ### Actions Not Triggering diff --git a/test/test.ts b/test/test.ts index c5ba247..c5e3457 100644 --- a/test/test.ts +++ b/test/test.ts @@ -24,8 +24,16 @@ import { import { buildFailedUpsPollSnapshot, buildSuccessfulUpsPollSnapshot, + getActionThresholdStates, + getEnteredThresholdIndexes, hasThresholdViolation, + isActionThresholdExceeded, } from '../ts/ups-monitoring.ts'; +import { + buildGroupStatusSnapshot, + buildGroupThresholdContextStatus, + evaluateGroupActionThreshold, +} from '../ts/group-monitoring.ts'; import { createInitialUpsStatus } from '../ts/ups-status.ts'; import * as qenv from 'npm:@push.rocks/qenv@^6.0.0'; @@ -532,6 +540,209 @@ Deno.test('hasThresholdViolation: only fires on battery when any action threshol ); }); +Deno.test('isActionThresholdExceeded: evaluates a single action threshold on battery only', () => { + assertEquals( + isActionThresholdExceeded( + { type: 'shutdown', thresholds: { battery: 50, runtime: 20 } }, + 'online', + 40, + 10, + ), + false, + ); + + assertEquals( + isActionThresholdExceeded( + { type: 'shutdown', thresholds: { battery: 50, runtime: 20 } }, + 'onBattery', + 40, + 10, + ), + true, + ); +}); + +Deno.test('getActionThresholdStates: returns per-action threshold state array', () => { + assertEquals( + getActionThresholdStates('onBattery', 25, 8, [ + { type: 'shutdown', thresholds: { battery: 30, runtime: 10 } }, + { type: 'shutdown', thresholds: { battery: 10, runtime: 5 } }, + { type: 'webhook' }, + ]), + [true, false, false], + ); +}); + +Deno.test('getEnteredThresholdIndexes: reports only newly-entered thresholds', () => { + assertEquals(getEnteredThresholdIndexes(undefined, [false, true, true]), [1, 2]); + assertEquals(getEnteredThresholdIndexes([false, true, false], [true, true, false]), [0]); + assertEquals(getEnteredThresholdIndexes([true, true], [true, false]), []); +}); + +// ----------------------------------------------------------------------------- +// Group Monitoring Tests +// ----------------------------------------------------------------------------- + +Deno.test('buildGroupStatusSnapshot: redundant group stays online while one UPS remains online', () => { + const snapshot = buildGroupStatusSnapshot( + { id: 'group-1', name: 'Group Main' }, + 'redundant', + [ + { + ...createInitialUpsStatus({ id: 'ups-1', name: 'UPS 1' }, 1000), + powerStatus: 'onBattery' as const, + batteryCapacity: 40, + batteryRuntime: 12, + }, + { + ...createInitialUpsStatus({ id: 'ups-2', name: 'UPS 2' }, 1000), + powerStatus: 'online' as const, + batteryCapacity: 98, + batteryRuntime: 999, + }, + ], + undefined, + 5000, + ); + + assertEquals(snapshot.updatedStatus.powerStatus, 'online'); + assertEquals(snapshot.transition, 'powerStatusChange'); +}); + +Deno.test('buildGroupStatusSnapshot: nonRedundant group goes unreachable when any member is unreachable', () => { + const snapshot = buildGroupStatusSnapshot( + { id: 'group-2', name: 'Group Edge' }, + 'nonRedundant', + [ + { + ...createInitialUpsStatus({ id: 'ups-1', name: 'UPS 1' }, 1000), + powerStatus: 'online' as const, + }, + { + ...createInitialUpsStatus({ id: 'ups-2', name: 'UPS 2' }, 1000), + powerStatus: 'unreachable' as const, + unreachableSince: 2000, + }, + ], + { + ...createInitialUpsStatus({ id: 'group-2', name: 'Group Edge' }, 1000), + powerStatus: 'online' as const, + }, + 6000, + ); + + assertEquals(snapshot.updatedStatus.powerStatus, 'unreachable'); + assertEquals(snapshot.transition, 'powerStatusChange'); +}); + +Deno.test('evaluateGroupActionThreshold: redundant mode requires all members to be critical', () => { + const evaluation = evaluateGroupActionThreshold( + { type: 'shutdown', thresholds: { battery: 50, runtime: 20 } }, + 'redundant', + [ + { + ...createInitialUpsStatus({ id: 'ups-1', name: 'UPS 1' }, 1000), + powerStatus: 'onBattery' as const, + batteryCapacity: 40, + batteryRuntime: 15, + }, + { + ...createInitialUpsStatus({ id: 'ups-2', name: 'UPS 2' }, 1000), + powerStatus: 'online' as const, + batteryCapacity: 95, + batteryRuntime: 999, + }, + ], + ); + + assertEquals(evaluation.exceedsThreshold, false); +}); + +Deno.test('evaluateGroupActionThreshold: nonRedundant mode trips on any critical member', () => { + const evaluation = evaluateGroupActionThreshold( + { type: 'shutdown', thresholds: { battery: 50, runtime: 20 } }, + 'nonRedundant', + [ + { + ...createInitialUpsStatus({ id: 'ups-1', name: 'UPS 1' }, 1000), + powerStatus: 'onBattery' as const, + batteryCapacity: 40, + batteryRuntime: 15, + }, + { + ...createInitialUpsStatus({ id: 'ups-2', name: 'UPS 2' }, 1000), + powerStatus: 'online' as const, + batteryCapacity: 95, + batteryRuntime: 999, + }, + ], + ); + + assertEquals(evaluation.exceedsThreshold, true); + assertEquals(evaluation.blockedByUnreachable, false); +}); + +Deno.test('evaluateGroupActionThreshold: blocks destructive actions when a member is unreachable', () => { + const evaluation = evaluateGroupActionThreshold( + { type: 'proxmox', thresholds: { battery: 50, runtime: 20 } }, + 'nonRedundant', + [ + { + ...createInitialUpsStatus({ id: 'ups-1', name: 'UPS 1' }, 1000), + powerStatus: 'onBattery' as const, + batteryCapacity: 25, + batteryRuntime: 8, + }, + { + ...createInitialUpsStatus({ id: 'ups-2', name: 'UPS 2' }, 1000), + powerStatus: 'unreachable' as const, + unreachableSince: 3000, + }, + ], + ); + + assertEquals(evaluation.exceedsThreshold, true); + assertEquals(evaluation.blockedByUnreachable, true); +}); + +Deno.test('buildGroupThresholdContextStatus: uses the worst triggering member runtime', () => { + const status = buildGroupThresholdContextStatus( + { id: 'group-3', name: 'Group Worst' }, + [ + { + exceedsThreshold: true, + blockedByUnreachable: false, + representativeStatus: { + ...createInitialUpsStatus({ id: 'ups-1', name: 'UPS 1' }, 1000), + powerStatus: 'onBattery' as const, + batteryCapacity: 30, + batteryRuntime: 9, + }, + }, + { + exceedsThreshold: true, + blockedByUnreachable: false, + representativeStatus: { + ...createInitialUpsStatus({ id: 'ups-2', name: 'UPS 2' }, 1000), + powerStatus: 'onBattery' as const, + batteryCapacity: 20, + batteryRuntime: 4, + }, + }, + ], + [0, 1], + { + ...createInitialUpsStatus({ id: 'group-3', name: 'Group Worst' }, 1000), + powerStatus: 'online' as const, + }, + 7000, + ); + + assertEquals(status.powerStatus, 'onBattery'); + assertEquals(status.batteryCapacity, 20); + assertEquals(status.batteryRuntime, 4); +}); + // ----------------------------------------------------------------------------- // UpsOidSets Tests // ----------------------------------------------------------------------------- diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts index d6636af..f5096de 100644 --- a/ts/00_commitinfo_data.ts +++ b/ts/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: '@serve.zone/nupst', - version: '5.6.0', + version: '5.7.0', description: 'Network UPS Shutdown Tool - Monitor SNMP-enabled UPS devices and orchestrate graceful system shutdowns during power emergencies' } diff --git a/ts/actions/base-action.ts b/ts/actions/base-action.ts index 561d15f..334cd03 100644 --- a/ts/actions/base-action.ts +++ b/ts/actions/base-action.ts @@ -118,6 +118,8 @@ export interface IActionConfig { proxmoxInsecure?: boolean; /** Proxmox operation mode: 'auto' detects CLI tools, 'cli' forces CLI, 'api' forces REST API (default: 'auto') */ proxmoxMode?: 'auto' | 'api' | 'cli'; + /** How HA-managed Proxmox resources should be stopped (default: 'none') */ + proxmoxHaPolicy?: 'none' | 'haStop'; } /** diff --git a/ts/actions/proxmox-action.ts b/ts/actions/proxmox-action.ts index f811faa..ebe0a4e 100644 --- a/ts/actions/proxmox-action.ts +++ b/ts/actions/proxmox-action.ts @@ -8,6 +8,11 @@ import { logger } from '../logger.ts'; import { PROXMOX, UI } from '../constants.ts'; const execFileAsync = promisify(execFile); +type TNodeLikeGlobal = typeof globalThis & { + process?: { + env: Record; + }; +}; /** * ProxmoxAction - Gracefully shuts down Proxmox VMs and LXC containers @@ -23,6 +28,22 @@ const execFileAsync = promisify(execFile); */ export class ProxmoxAction extends Action { readonly type = 'proxmox'; + private static readonly activeRunKeys = new Set(); + + private static findCliTool(command: string): string | null { + for (const dir of PROXMOX.CLI_TOOL_PATHS) { + const candidate = `${dir}/${command}`; + try { + if (fs.existsSync(candidate)) { + return candidate; + } + } catch (_e) { + // continue + } + } + + return null; + } /** * Check if Proxmox CLI tools (qm, pct) are available on the system @@ -32,29 +53,12 @@ export class ProxmoxAction extends Action { available: boolean; qmPath: string | null; pctPath: string | null; + haManagerPath: string | null; isRoot: boolean; } { - let qmPath: string | null = null; - let pctPath: string | null = null; - - for (const dir of PROXMOX.CLI_TOOL_PATHS) { - if (!qmPath) { - const p = `${dir}/qm`; - try { - if (fs.existsSync(p)) qmPath = p; - } catch (_e) { - // continue - } - } - if (!pctPath) { - const p = `${dir}/pct`; - try { - if (fs.existsSync(p)) pctPath = p; - } catch (_e) { - // continue - } - } - } + const qmPath = this.findCliTool('qm'); + const pctPath = this.findCliTool('pct'); + const haManagerPath = this.findCliTool('ha-manager'); const isRoot = !!(process.getuid && process.getuid() === 0); @@ -62,6 +66,7 @@ export class ProxmoxAction extends Action { available: qmPath !== null && pctPath !== null && isRoot, qmPath, pctPath, + haManagerPath, isRoot, }; } @@ -69,7 +74,11 @@ export class ProxmoxAction extends Action { /** * Resolve the operation mode based on config and environment */ - private resolveMode(): { mode: 'api' | 'cli'; qmPath: string; pctPath: string } | { mode: 'api'; qmPath?: undefined; pctPath?: undefined } { + private resolveMode(): { mode: 'api' | 'cli'; qmPath: string; pctPath: string } | { + mode: 'api'; + qmPath?: undefined; + pctPath?: undefined; + } { const configuredMode = this.config.proxmoxMode || 'auto'; if (configuredMode === 'api') { @@ -111,16 +120,29 @@ export class ProxmoxAction extends Action { const resolved = this.resolveMode(); const node = this.config.proxmoxNode || os.hostname(); const excludeIds = new Set(this.config.proxmoxExcludeIds || []); - const stopTimeout = (this.config.proxmoxStopTimeout || PROXMOX.DEFAULT_STOP_TIMEOUT_SECONDS) * 1000; + const stopTimeout = (this.config.proxmoxStopTimeout || PROXMOX.DEFAULT_STOP_TIMEOUT_SECONDS) * + 1000; const forceStop = this.config.proxmoxForceStop !== false; // default true + const haPolicy = this.config.proxmoxHaPolicy || 'none'; + const host = this.config.proxmoxHost || PROXMOX.DEFAULT_HOST; + const port = this.config.proxmoxPort || PROXMOX.DEFAULT_PORT; + const runKey = `${resolved.mode}:${node}:${ + resolved.mode === 'api' ? `${host}:${port}` : 'local' + }`; + + if (ProxmoxAction.activeRunKeys.has(runKey)) { + logger.info(`Proxmox action skipped: shutdown sequence already running for node ${node}`); + return; + } + + ProxmoxAction.activeRunKeys.add(runKey); logger.log(''); logger.logBoxTitle('Proxmox VM Shutdown', UI.WIDE_BOX_WIDTH, 'warning'); logger.logBoxLine(`Mode: ${resolved.mode === 'cli' ? 'CLI (qm/pct)' : 'API (REST)'}`); logger.logBoxLine(`Node: ${node}`); + logger.logBoxLine(`HA Policy: ${haPolicy}`); if (resolved.mode === 'api') { - const host = this.config.proxmoxHost || PROXMOX.DEFAULT_HOST; - const port = this.config.proxmoxPort || PROXMOX.DEFAULT_PORT; logger.logBoxLine(`API: ${host}:${port}`); } logger.logBoxLine(`UPS: ${context.upsName} (${context.powerStatus})`); @@ -132,6 +154,11 @@ export class ProxmoxAction extends Action { logger.log(''); try { + let apiContext: { + baseUrl: string; + headers: Record; + insecure: boolean; + } | null = null; let runningVMs: Array<{ vmid: number; name: string }>; let runningCTs: Array<{ vmid: number; name: string }>; @@ -140,8 +167,6 @@ export class ProxmoxAction extends Action { runningCTs = await this.getRunningCTsCli(resolved.pctPath); } else { // API mode - validate token - const host = this.config.proxmoxHost || PROXMOX.DEFAULT_HOST; - const port = this.config.proxmoxPort || PROXMOX.DEFAULT_PORT; const tokenId = this.config.proxmoxTokenId; const tokenSecret = this.config.proxmoxTokenSecret; const insecure = this.config.proxmoxInsecure !== false; @@ -152,13 +177,26 @@ export class ProxmoxAction extends Action { return; } - const baseUrl = `https://${host}:${port}${PROXMOX.API_BASE}`; - const headers: Record = { - 'Authorization': `PVEAPIToken=${tokenId}=${tokenSecret}`, + apiContext = { + baseUrl: `https://${host}:${port}${PROXMOX.API_BASE}`, + headers: { + 'Authorization': `PVEAPIToken=${tokenId}=${tokenSecret}`, + }, + insecure, }; - runningVMs = await this.getRunningVMsApi(baseUrl, node, headers, insecure); - runningCTs = await this.getRunningCTsApi(baseUrl, node, headers, insecure); + runningVMs = await this.getRunningVMsApi( + apiContext.baseUrl, + node, + apiContext.headers, + apiContext.insecure, + ); + runningCTs = await this.getRunningCTsApi( + apiContext.baseUrl, + node, + apiContext.headers, + apiContext.insecure, + ); } // Filter out excluded IDs @@ -171,33 +209,83 @@ export class ProxmoxAction extends Action { return; } + const haManagedResources = haPolicy === 'haStop' + ? await this.getHaManagedResources(resolved, apiContext) + : { qemu: new Set(), lxc: new Set() }; + const haVmsToStop = vmsToStop.filter((vm) => haManagedResources.qemu.has(vm.vmid)); + const haCtsToStop = ctsToStop.filter((ct) => haManagedResources.lxc.has(ct.vmid)); + let directVmsToStop = vmsToStop.filter((vm) => !haManagedResources.qemu.has(vm.vmid)); + let directCtsToStop = ctsToStop.filter((ct) => !haManagedResources.lxc.has(ct.vmid)); + logger.info(`Shutting down ${vmsToStop.length} VMs and ${ctsToStop.length} containers...`); - // Send shutdown commands if (resolved.mode === 'cli') { - for (const vm of vmsToStop) { + const { haManagerPath } = ProxmoxAction.detectCliAvailability(); + if (haPolicy === 'haStop' && (haVmsToStop.length > 0 || haCtsToStop.length > 0)) { + if (!haManagerPath) { + logger.warn( + 'ha-manager not found, falling back to direct guest shutdown for HA-managed resources', + ); + directVmsToStop = [...haVmsToStop, ...directVmsToStop]; + directCtsToStop = [...haCtsToStop, ...directCtsToStop]; + } else { + for (const vm of haVmsToStop) { + await this.requestHaStopCli(haManagerPath, `vm:${vm.vmid}`); + logger.dim(` HA stop requested for VM ${vm.vmid} (${vm.name || 'unnamed'})`); + } + for (const ct of haCtsToStop) { + await this.requestHaStopCli(haManagerPath, `ct:${ct.vmid}`); + logger.dim(` HA stop requested for CT ${ct.vmid} (${ct.name || 'unnamed'})`); + } + } + } + + for (const vm of directVmsToStop) { await this.shutdownVMCli(resolved.qmPath, vm.vmid); logger.dim(` Shutdown sent to VM ${vm.vmid} (${vm.name || 'unnamed'})`); } - for (const ct of ctsToStop) { + for (const ct of directCtsToStop) { await this.shutdownCTCli(resolved.pctPath, ct.vmid); logger.dim(` Shutdown sent to CT ${ct.vmid} (${ct.name || 'unnamed'})`); } - } else { - const host = this.config.proxmoxHost || PROXMOX.DEFAULT_HOST; - const port = this.config.proxmoxPort || PROXMOX.DEFAULT_PORT; - const insecure = this.config.proxmoxInsecure !== false; - const baseUrl = `https://${host}:${port}${PROXMOX.API_BASE}`; - const headers: Record = { - 'Authorization': `PVEAPIToken=${this.config.proxmoxTokenId}=${this.config.proxmoxTokenSecret}`, - }; + } else if (apiContext) { + for (const vm of haVmsToStop) { + await this.requestHaStopApi( + apiContext.baseUrl, + `vm:${vm.vmid}`, + apiContext.headers, + apiContext.insecure, + ); + logger.dim(` HA stop requested for VM ${vm.vmid} (${vm.name || 'unnamed'})`); + } + for (const ct of haCtsToStop) { + await this.requestHaStopApi( + apiContext.baseUrl, + `ct:${ct.vmid}`, + apiContext.headers, + apiContext.insecure, + ); + logger.dim(` HA stop requested for CT ${ct.vmid} (${ct.name || 'unnamed'})`); + } - for (const vm of vmsToStop) { - await this.shutdownVMApi(baseUrl, node, vm.vmid, headers, insecure); + for (const vm of directVmsToStop) { + await this.shutdownVMApi( + apiContext.baseUrl, + node, + vm.vmid, + apiContext.headers, + apiContext.insecure, + ); logger.dim(` Shutdown sent to VM ${vm.vmid} (${vm.name || 'unnamed'})`); } - for (const ct of ctsToStop) { - await this.shutdownCTApi(baseUrl, node, ct.vmid, headers, insecure); + for (const ct of directCtsToStop) { + await this.shutdownCTApi( + apiContext.baseUrl, + node, + ct.vmid, + apiContext.headers, + apiContext.insecure, + ); logger.dim(` Shutdown sent to CT ${ct.vmid} (${ct.name || 'unnamed'})`); } } @@ -220,18 +308,23 @@ export class ProxmoxAction extends Action { } else { await this.stopCTCli(resolved.pctPath, item.vmid); } - } else { - const host = this.config.proxmoxHost || PROXMOX.DEFAULT_HOST; - const port = this.config.proxmoxPort || PROXMOX.DEFAULT_PORT; - const insecure = this.config.proxmoxInsecure !== false; - const baseUrl = `https://${host}:${port}${PROXMOX.API_BASE}`; - const headers: Record = { - 'Authorization': `PVEAPIToken=${this.config.proxmoxTokenId}=${this.config.proxmoxTokenSecret}`, - }; + } else if (apiContext) { if (item.type === 'qemu') { - await this.stopVMApi(baseUrl, node, item.vmid, headers, insecure); + await this.stopVMApi( + apiContext.baseUrl, + node, + item.vmid, + apiContext.headers, + apiContext.insecure, + ); } else { - await this.stopCTApi(baseUrl, node, item.vmid, headers, insecure); + await this.stopCTApi( + apiContext.baseUrl, + node, + item.vmid, + apiContext.headers, + apiContext.insecure, + ); } } logger.dim(` Force-stopped ${item.type} ${item.vmid} (${item.name || 'unnamed'})`); @@ -252,6 +345,8 @@ export class ProxmoxAction extends Action { logger.error( `Proxmox action failed: ${error instanceof Error ? error.message : String(error)}`, ); + } finally { + ProxmoxAction.activeRunKeys.delete(runKey); } } @@ -357,6 +452,77 @@ export class ProxmoxAction extends Action { return status; } + private async getHaManagedResources( + resolved: { mode: 'api' | 'cli'; qmPath?: string; pctPath?: string }, + apiContext: { + baseUrl: string; + headers: Record; + insecure: boolean; + } | null, + ): Promise<{ qemu: Set; lxc: Set }> { + if (resolved.mode === 'cli') { + const { haManagerPath } = ProxmoxAction.detectCliAvailability(); + if (!haManagerPath) { + return { qemu: new Set(), lxc: new Set() }; + } + + return await this.getHaManagedResourcesCli(haManagerPath); + } + + if (!apiContext) { + return { qemu: new Set(), lxc: new Set() }; + } + + return await this.getHaManagedResourcesApi( + apiContext.baseUrl, + apiContext.headers, + apiContext.insecure, + ); + } + + private async getHaManagedResourcesCli( + haManagerPath: string, + ): Promise<{ qemu: Set; lxc: Set }> { + try { + const { stdout } = await execFileAsync(haManagerPath, ['config']); + return this.parseHaManagerConfig(stdout); + } catch (error) { + logger.warn( + `Failed to list HA resources via CLI: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + return { qemu: new Set(), lxc: new Set() }; + } + } + + private parseHaManagerConfig(output: string): { qemu: Set; lxc: Set } { + const resources = { + qemu: new Set(), + lxc: new Set(), + }; + + for (const line of output.trim().split('\n')) { + const match = line.match(/^\s*(vm|ct)\s*:\s*(\d+)\s*$/i); + if (!match) { + continue; + } + + const vmid = parseInt(match[2], 10); + if (match[1].toLowerCase() === 'vm') { + resources.qemu.add(vmid); + } else { + resources.lxc.add(vmid); + } + } + + return resources; + } + + private async requestHaStopCli(haManagerPath: string, sid: string): Promise { + await execFileAsync(haManagerPath, ['set', sid, '--state', 'stopped']); + } + // ─── API-based methods ───────────────────────────────────────────── /** @@ -367,16 +533,23 @@ export class ProxmoxAction extends Action { method: string, headers: Record, insecure: boolean, + body?: URLSearchParams, ): Promise { + const requestHeaders = { ...headers }; const fetchOptions: RequestInit = { method, - headers, + headers: requestHeaders, }; + if (body) { + requestHeaders['Content-Type'] = 'application/x-www-form-urlencoded;charset=UTF-8'; + fetchOptions.body = body.toString(); + } + // Use NODE_TLS_REJECT_UNAUTHORIZED for insecure mode (self-signed certs) - if (insecure) { - // deno-lint-ignore no-explicit-any - (globalThis as any).process?.env && ((globalThis as any).process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0'); + const nodeProcess = (globalThis as TNodeLikeGlobal).process; + if (insecure && nodeProcess?.env) { + nodeProcess.env.NODE_TLS_REJECT_UNAUTHORIZED = '0'; } try { @@ -390,9 +563,8 @@ export class ProxmoxAction extends Action { return await response.json(); } finally { // Restore TLS verification - if (insecure) { - // deno-lint-ignore no-explicit-any - (globalThis as any).process?.env && ((globalThis as any).process.env.NODE_TLS_REJECT_UNAUTHORIZED = '1'); + if (insecure && nodeProcess?.env) { + nodeProcess.env.NODE_TLS_REJECT_UNAUTHORIZED = '1'; } } } @@ -453,6 +625,63 @@ export class ProxmoxAction extends Action { } } + private async getHaManagedResourcesApi( + baseUrl: string, + headers: Record, + insecure: boolean, + ): Promise<{ qemu: Set; lxc: Set }> { + try { + const response = await this.apiRequest( + `${baseUrl}/cluster/ha/resources`, + 'GET', + headers, + insecure, + ) as { data: Array<{ sid?: string }> }; + const resources = { + qemu: new Set(), + lxc: new Set(), + }; + + for (const item of response.data || []) { + const match = item.sid?.match(/^(vm|ct):(\d+)$/i); + if (!match) { + continue; + } + + const vmid = parseInt(match[2], 10); + if (match[1].toLowerCase() === 'vm') { + resources.qemu.add(vmid); + } else { + resources.lxc.add(vmid); + } + } + + return resources; + } catch (error) { + logger.warn( + `Failed to list HA resources via API: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + return { qemu: new Set(), lxc: new Set() }; + } + } + + private async requestHaStopApi( + baseUrl: string, + sid: string, + headers: Record, + insecure: boolean, + ): Promise { + await this.apiRequest( + `${baseUrl}/cluster/ha/resources/${encodeURIComponent(sid)}`, + 'PUT', + headers, + insecure, + new URLSearchParams({ state: 'stopped' }), + ); + } + private async shutdownVMApi( baseUrl: string, node: string, @@ -529,7 +758,9 @@ export class ProxmoxAction extends Action { while (remaining.length > 0 && (Date.now() - startTime) < timeout) { // Wait before polling - await new Promise((resolve) => setTimeout(resolve, PROXMOX.STATUS_POLL_INTERVAL_SECONDS * 1000)); + await new Promise((resolve) => + setTimeout(resolve, PROXMOX.STATUS_POLL_INTERVAL_SECONDS * 1000) + ); // Check which are still running const stillRunning: typeof remaining = []; @@ -547,7 +778,8 @@ export class ProxmoxAction extends Action { const insecure = this.config.proxmoxInsecure !== false; const baseUrl = `https://${host}:${port}${PROXMOX.API_BASE}`; const headers: Record = { - 'Authorization': `PVEAPIToken=${this.config.proxmoxTokenId}=${this.config.proxmoxTokenSecret}`, + 'Authorization': + `PVEAPIToken=${this.config.proxmoxTokenId}=${this.config.proxmoxTokenSecret}`, }; const statusUrl = `${baseUrl}/nodes/${node}/${item.type}/${item.vmid}/status/current`; const response = await this.apiRequest(statusUrl, 'GET', headers, insecure) as { diff --git a/ts/actions/shutdown-action.ts b/ts/actions/shutdown-action.ts index 02ed6fd..941e531 100644 --- a/ts/actions/shutdown-action.ts +++ b/ts/actions/shutdown-action.ts @@ -15,6 +15,7 @@ const execFileAsync = promisify(execFile); */ export class ShutdownAction extends Action { readonly type = 'shutdown'; + private static scheduledDelayMinutes: number | null = null; /** * Override shouldExecute to add shutdown-specific safety checks @@ -126,6 +127,25 @@ export class ShutdownAction extends Action { const shutdownDelay = this.config.shutdownDelay ?? SHUTDOWN.DEFAULT_DELAY_MINUTES; + if ( + ShutdownAction.scheduledDelayMinutes !== null && + ShutdownAction.scheduledDelayMinutes <= shutdownDelay + ) { + logger.info( + `Shutdown action skipped: shutdown already scheduled in ${ShutdownAction.scheduledDelayMinutes} minutes`, + ); + return; + } + + if ( + ShutdownAction.scheduledDelayMinutes !== null && + ShutdownAction.scheduledDelayMinutes > shutdownDelay + ) { + logger.warn( + `Shutdown already scheduled in ${ShutdownAction.scheduledDelayMinutes} minutes, rescheduling to ${shutdownDelay} minutes`, + ); + } + logger.log(''); logger.logBoxTitle('Initiating System Shutdown', UI.WIDE_BOX_WIDTH, 'error'); logger.logBoxLine(`UPS: ${context.upsName} (${context.upsId})`); @@ -139,6 +159,7 @@ export class ShutdownAction extends Action { try { await this.executeShutdownCommand(shutdownDelay); + ShutdownAction.scheduledDelayMinutes = shutdownDelay; } catch (error) { logger.error( `Shutdown command failed: ${error instanceof Error ? error.message : String(error)}`, @@ -227,6 +248,7 @@ export class ShutdownAction extends Action { logger.log(`Trying alternative shutdown method: ${cmdPath} ${alt.args.join(' ')}`); await execFileAsync(cmdPath, alt.args); logger.log(`Alternative method ${alt.cmd} succeeded`); + ShutdownAction.scheduledDelayMinutes = 0; return; // Exit if successful } } catch (_altError) { diff --git a/ts/cli/action-handler.ts b/ts/cli/action-handler.ts index 09ab03b..897952d 100644 --- a/ts/cli/action-handler.ts +++ b/ts/cli/action-handler.ts @@ -72,9 +72,13 @@ export class ActionHandler { logger.log(` ${theme.dim('1)')} Shutdown (system shutdown)`); logger.log(` ${theme.dim('2)')} Webhook (HTTP notification)`); logger.log(` ${theme.dim('3)')} Custom Script (run .sh file from /etc/nupst)`); - logger.log(` ${theme.dim('4)')} Proxmox (gracefully shut down VMs/LXCs before host shutdown)`); + logger.log( + ` ${theme.dim('4)')} Proxmox (gracefully shut down VMs/LXCs before host shutdown)`, + ); - const typeInput = await prompt(` ${theme.dim('Select action type')} ${theme.dim('[1]:')} `); + const typeInput = await prompt( + ` ${theme.dim('Select action type')} ${theme.dim('[1]:')} `, + ); const typeValue = parseInt(typeInput, 10) || 1; const newAction: Partial = {}; @@ -82,11 +86,13 @@ export class ActionHandler { if (typeValue === 1) { // Shutdown action newAction.type = 'shutdown'; - const defaultShutdownDelay = - this.nupst.getDaemon().getConfig().defaultShutdownDelay ?? SHUTDOWN.DEFAULT_DELAY_MINUTES; + const defaultShutdownDelay = this.nupst.getDaemon().getConfig().defaultShutdownDelay ?? + SHUTDOWN.DEFAULT_DELAY_MINUTES; const delayStr = await prompt( - ` ${theme.dim('Shutdown delay')} ${theme.dim(`(minutes, leave empty for default ${defaultShutdownDelay}):`)} `, + ` ${theme.dim('Shutdown delay')} ${ + theme.dim(`(minutes, leave empty for default ${defaultShutdownDelay}):`) + } `, ); if (delayStr.trim()) { const shutdownDelay = parseInt(delayStr, 10); @@ -114,7 +120,9 @@ export class ActionHandler { const methodInput = await prompt(` ${theme.dim('Select method')} ${theme.dim('[1]:')} `); newAction.webhookMethod = methodInput === '2' ? 'GET' : 'POST'; - const timeoutInput = await prompt(` ${theme.dim('Timeout in seconds')} ${theme.dim('[10]:')} `); + const timeoutInput = await prompt( + ` ${theme.dim('Timeout in seconds')} ${theme.dim('[10]:')} `, + ); const timeout = parseInt(timeoutInput, 10); if (timeoutInput.trim() && !isNaN(timeout)) { newAction.webhookTimeout = timeout * 1000; @@ -123,14 +131,18 @@ export class ActionHandler { // Script action newAction.type = 'script'; - const scriptPath = await prompt(` ${theme.dim('Script filename (in /etc/nupst/, must end with .sh):')} `); + const scriptPath = await prompt( + ` ${theme.dim('Script filename (in /etc/nupst/, must end with .sh):')} `, + ); if (!scriptPath.trim() || !scriptPath.trim().endsWith('.sh')) { logger.error('Script path must end with .sh.'); process.exit(1); } newAction.scriptPath = scriptPath.trim(); - const timeoutInput = await prompt(` ${theme.dim('Script timeout in seconds')} ${theme.dim('[60]:')} `); + const timeoutInput = await prompt( + ` ${theme.dim('Script timeout in seconds')} ${theme.dim('[60]:')} `, + ); const timeout = parseInt(timeoutInput, 10); if (timeoutInput.trim() && !isNaN(timeout)) { newAction.scriptTimeout = timeout * 1000; @@ -159,14 +171,20 @@ export class ActionHandler { logger.info('Proxmox API Settings:'); logger.dim('Create a token with: pveum user token add root@pam nupst --privsep=0'); - const pxHost = await prompt(` ${theme.dim('Proxmox Host')} ${theme.dim('[localhost]:')} `); + const pxHost = await prompt( + ` ${theme.dim('Proxmox Host')} ${theme.dim('[localhost]:')} `, + ); newAction.proxmoxHost = pxHost.trim() || 'localhost'; - const pxPortInput = await prompt(` ${theme.dim('Proxmox API Port')} ${theme.dim('[8006]:')} `); + const pxPortInput = await prompt( + ` ${theme.dim('Proxmox API Port')} ${theme.dim('[8006]:')} `, + ); const pxPort = parseInt(pxPortInput, 10); newAction.proxmoxPort = pxPortInput.trim() && !isNaN(pxPort) ? pxPort : 8006; - const pxNode = await prompt(` ${theme.dim('Proxmox Node Name (empty = auto-detect):')} `); + const pxNode = await prompt( + ` ${theme.dim('Proxmox Node Name (empty = auto-detect):')} `, + ); if (pxNode.trim()) { newAction.proxmoxNode = pxNode.trim(); } @@ -185,25 +203,41 @@ export class ActionHandler { } newAction.proxmoxTokenSecret = tokenSecret.trim(); - const insecureInput = await prompt(` ${theme.dim('Skip TLS verification (self-signed cert)?')} ${theme.dim('(Y/n):')} `); + const insecureInput = await prompt( + ` ${theme.dim('Skip TLS verification (self-signed cert)?')} ${theme.dim('(Y/n):')} `, + ); newAction.proxmoxInsecure = insecureInput.toLowerCase() !== 'n'; newAction.proxmoxMode = 'api'; } // Common Proxmox settings (both modes) - const excludeInput = await prompt(` ${theme.dim('VM/CT IDs to exclude (comma-separated, or empty):')} `); + const excludeInput = await prompt( + ` ${theme.dim('VM/CT IDs to exclude (comma-separated, or empty):')} `, + ); if (excludeInput.trim()) { - newAction.proxmoxExcludeIds = excludeInput.split(',').map((s) => parseInt(s.trim(), 10)).filter((n) => !isNaN(n)); + newAction.proxmoxExcludeIds = excludeInput.split(',').map((s) => parseInt(s.trim(), 10)) + .filter((n) => !isNaN(n)); } - const timeoutInput = await prompt(` ${theme.dim('VM shutdown timeout in seconds')} ${theme.dim('[120]:')} `); + const timeoutInput = await prompt( + ` ${theme.dim('VM shutdown timeout in seconds')} ${theme.dim('[120]:')} `, + ); const stopTimeout = parseInt(timeoutInput, 10); if (timeoutInput.trim() && !isNaN(stopTimeout)) { newAction.proxmoxStopTimeout = stopTimeout; } - const forceInput = await prompt(` ${theme.dim('Force-stop VMs that don\'t shut down in time?')} ${theme.dim('(Y/n):')} `); + const forceInput = await prompt( + ` ${theme.dim("Force-stop VMs that don't shut down in time?")} ${ + theme.dim('(Y/n):') + } `, + ); newAction.proxmoxForceStop = forceInput.toLowerCase() !== 'n'; + + const haPolicyInput = await prompt( + ` ${theme.dim('HA-managed guest handling')} ${theme.dim('([1] none, 2 haStop):')} `, + ); + newAction.proxmoxHaPolicy = haPolicyInput.trim() === '2' ? 'haStop' : 'none'; } else { logger.error('Invalid action type.'); process.exit(1); @@ -473,8 +507,8 @@ export class ActionHandler { ]; const rows = target.actions.map((action, index) => { - const defaultShutdownDelay = - this.nupst.getDaemon().getConfig().defaultShutdownDelay ?? SHUTDOWN.DEFAULT_DELAY_MINUTES; + const defaultShutdownDelay = this.nupst.getDaemon().getConfig().defaultShutdownDelay ?? + SHUTDOWN.DEFAULT_DELAY_MINUTES; let details = `${action.shutdownDelay ?? defaultShutdownDelay}min delay`; if (action.type === 'proxmox') { const mode = action.proxmoxMode || 'auto'; @@ -488,6 +522,9 @@ export class ActionHandler { if (action.proxmoxExcludeIds?.length) { details += `, excl: ${action.proxmoxExcludeIds.join(',')}`; } + if (action.proxmoxHaPolicy === 'haStop') { + details += ', haStop'; + } } else if (action.type === 'webhook') { details = action.webhookUrl || theme.dim('N/A'); } else if (action.type === 'script') { diff --git a/ts/cli/ups-handler.ts b/ts/cli/ups-handler.ts index 3959673..1f47423 100644 --- a/ts/cli/ups-handler.ts +++ b/ts/cli/ups-handler.ts @@ -1152,8 +1152,8 @@ export class UpsHandler { if (typeValue === 1) { // Shutdown action action.type = 'shutdown'; - const defaultShutdownDelay = - this.nupst.getDaemon().getConfig().defaultShutdownDelay ?? SHUTDOWN.DEFAULT_DELAY_MINUTES; + const defaultShutdownDelay = this.nupst.getDaemon().getConfig().defaultShutdownDelay ?? + SHUTDOWN.DEFAULT_DELAY_MINUTES; const delayInput = await prompt( `Shutdown delay in minutes (leave empty for default ${defaultShutdownDelay}): `, @@ -1276,6 +1276,9 @@ export class UpsHandler { const forceInput = await prompt("Force-stop VMs that don't shut down in time? (Y/n): "); action.proxmoxForceStop = forceInput.toLowerCase() !== 'n'; + const haPolicyInput = await prompt('HA-managed guest handling ([1] none, 2 haStop): '); + action.proxmoxHaPolicy = haPolicyInput.trim() === '2' ? 'haStop' : 'none'; + logger.log(''); logger.info('Note: Place the Proxmox action BEFORE the shutdown action'); logger.dim('in the action chain so VMs shut down before the host.'); diff --git a/ts/daemon.ts b/ts/daemon.ts index a5b82c7..5b878f4 100644 --- a/ts/daemon.ts +++ b/ts/daemon.ts @@ -14,6 +14,7 @@ import type { IActionConfig } from './actions/base-action.ts'; import { ActionManager } from './actions/index.ts'; import { applyDefaultShutdownDelay, + buildUpsActionContext, decideUpsActionExecution, type TUpsTriggerReason, } from './action-orchestration.ts'; @@ -26,11 +27,17 @@ import { } from './config-watch.ts'; import { type IPauseState, loadPauseSnapshot } from './pause-state.ts'; import { ShutdownExecutor } from './shutdown-executor.ts'; +import { + buildGroupStatusSnapshot, + buildGroupThresholdContextStatus, + evaluateGroupActionThreshold, +} from './group-monitoring.ts'; import { buildFailedUpsPollSnapshot, buildSuccessfulUpsPollSnapshot, ensureUpsStatus, - hasThresholdViolation, + getActionThresholdStates, + getEnteredThresholdIndexes, } from './ups-monitoring.ts'; import { buildShutdownErrorRow, @@ -178,6 +185,8 @@ export class NupstDaemon { private isPaused: boolean = false; private pauseState: IPauseState | null = null; private upsStatus: Map = new Map(); + private groupStatus: Map = new Map(); + private thresholdState: Map = new Map(); private httpServer?: NupstHttpServer; private readonly shutdownExecutor: ShutdownExecutor; @@ -218,7 +227,8 @@ export class NupstDaemon { // Cast to INupstConfig since migrations ensure the output is valid. const validConfig = migratedConfig as unknown as INupstConfig; const normalizedShutdownDelay = this.normalizeShutdownDelay(validConfig.defaultShutdownDelay); - const shouldPersistNormalizedConfig = validConfig.defaultShutdownDelay !== normalizedShutdownDelay; + const shouldPersistNormalizedConfig = + validConfig.defaultShutdownDelay !== normalizedShutdownDelay; validConfig.defaultShutdownDelay = normalizedShutdownDelay; if (migrated || shouldPersistNormalizedConfig) { this.config = validConfig; @@ -642,19 +652,24 @@ export class NupstDaemon { ); } - if ( - hasThresholdViolation( - status.powerStatus, - status.batteryCapacity, - status.batteryRuntime, - ups.actions, - ) - ) { + const thresholdStates = getActionThresholdStates( + status.powerStatus, + status.batteryCapacity, + status.batteryRuntime, + ups.actions, + ); + const enteredThresholdIndexes = this.trackEnteredThresholdIndexes( + `ups:${ups.id}`, + thresholdStates, + ); + + if (enteredThresholdIndexes.length > 0) { await this.triggerUpsActions( ups, pollSnapshot.updatedStatus, pollSnapshot.previousStatus, 'thresholdViolation', + enteredThresholdIndexes, ); } @@ -694,6 +709,95 @@ export class NupstDaemon { this.upsStatus.set(ups.id, failureSnapshot.updatedStatus); } } + + await this.checkGroupActions(); + } + + private trackEnteredThresholdIndexes(sourceKey: string, currentStates: boolean[]): number[] { + const previousStates = this.thresholdState.get(sourceKey); + const enteredIndexes = getEnteredThresholdIndexes(previousStates, currentStates); + this.thresholdState.set(sourceKey, [...currentStates]); + return enteredIndexes; + } + + private getGroupActionIdentity(group: IGroupConfig): { id: string; name: string } { + return { + id: group.id, + name: `Group ${group.name}`, + }; + } + + private async checkGroupActions(): Promise { + for (const group of this.config.groups || []) { + const groupIdentity = this.getGroupActionIdentity(group); + const memberStatuses = this.config.upsDevices + .filter((ups) => ups.groups?.includes(group.id)) + .map((ups) => this.upsStatus.get(ups.id)) + .filter((status): status is IUpsStatus => !!status); + + if (memberStatuses.length === 0) { + continue; + } + + const currentTime = Date.now(); + const pollSnapshot = buildGroupStatusSnapshot( + groupIdentity, + group.mode, + memberStatuses, + this.groupStatus.get(group.id), + currentTime, + ); + + if (pollSnapshot.transition === 'powerStatusChange' && pollSnapshot.previousStatus) { + logger.log(''); + logger.logBoxTitle(`Group Power Status Change: ${group.name}`, 60, 'warning'); + logger.logBoxLine( + `Previous: ${formatPowerStatus(pollSnapshot.previousStatus.powerStatus)}`, + ); + logger.logBoxLine(`Current: ${formatPowerStatus(pollSnapshot.updatedStatus.powerStatus)}`); + logger.logBoxLine(`Members: ${memberStatuses.map((status) => status.name).join(', ')}`); + logger.logBoxLine(`Time: ${new Date().toISOString()}`); + logger.logBoxEnd(); + logger.log(''); + + await this.triggerGroupActions( + group, + pollSnapshot.updatedStatus, + pollSnapshot.previousStatus, + 'powerStatusChange', + ); + } + + const thresholdEvaluations = (group.actions || []).map((action) => + evaluateGroupActionThreshold(action, group.mode, memberStatuses) + ); + const thresholdStates = thresholdEvaluations.map((evaluation) => + evaluation.exceedsThreshold && !evaluation.blockedByUnreachable + ); + const enteredThresholdIndexes = this.trackEnteredThresholdIndexes( + `group:${group.id}`, + thresholdStates, + ); + + if (enteredThresholdIndexes.length > 0) { + const thresholdStatus = buildGroupThresholdContextStatus( + groupIdentity, + thresholdEvaluations, + enteredThresholdIndexes, + pollSnapshot.updatedStatus, + currentTime, + ); + await this.triggerGroupActions( + group, + thresholdStatus, + pollSnapshot.previousStatus, + 'thresholdViolation', + enteredThresholdIndexes, + ); + } + + this.groupStatus.set(group.id, pollSnapshot.updatedStatus); + } } /** @@ -761,6 +865,7 @@ export class NupstDaemon { status: IUpsStatus, previousStatus: IUpsStatus | undefined, triggerReason: TUpsTriggerReason, + actionIndexes?: number[], ): Promise { const decision = decideUpsActionExecution( this.isPaused, @@ -784,14 +889,63 @@ export class NupstDaemon { return; } + const selectedActions = actionIndexes + ? decision.actions.filter((_action, index) => actionIndexes.includes(index)) + : decision.actions; + + if (selectedActions.length === 0) { + return; + } + const actions = applyDefaultShutdownDelay( - decision.actions, + selectedActions, this.getDefaultShutdownDelayMinutes(), ); await ActionManager.executeActions(actions, decision.context); } + private async triggerGroupActions( + group: IGroupConfig, + status: IUpsStatus, + previousStatus: IUpsStatus | undefined, + triggerReason: TUpsTriggerReason, + actionIndexes?: number[], + ): Promise { + if (this.isPaused) { + logger.info( + `[PAUSED] Actions suppressed for Group ${group.name} (trigger: ${triggerReason})`, + ); + return; + } + + const configuredActions = group.actions || []; + if (configuredActions.length === 0) { + return; + } + + const selectedActions = actionIndexes + ? configuredActions.filter((_action, index) => actionIndexes.includes(index)) + : configuredActions; + + if (selectedActions.length === 0) { + return; + } + + const actions = applyDefaultShutdownDelay( + selectedActions, + this.getDefaultShutdownDelayMinutes(), + ); + const context = buildUpsActionContext( + this.getGroupActionIdentity(group), + status, + previousStatus, + triggerReason, + ); + + await ActionManager.executeActions(actions, context); + } + /** * Initiate system shutdown with UPS monitoring during shutdown * @param reason Reason for shutdown @@ -1054,6 +1208,8 @@ export class NupstDaemon { // Load the new configuration await this.loadConfig(); + this.thresholdState.clear(); + this.groupStatus.clear(); const newDeviceCount = this.config.upsDevices?.length || 0; const reloadSnapshot = analyzeConfigReload(oldDeviceCount, newDeviceCount); diff --git a/ts/group-monitoring.ts b/ts/group-monitoring.ts new file mode 100644 index 0000000..57f7c68 --- /dev/null +++ b/ts/group-monitoring.ts @@ -0,0 +1,198 @@ +import type { IActionConfig, TPowerStatus } from './actions/base-action.ts'; +import { createInitialUpsStatus, type IUpsIdentity, type IUpsStatus } from './ups-status.ts'; + +export interface IGroupStatusSnapshot { + updatedStatus: IUpsStatus; + transition: 'none' | 'powerStatusChange'; + previousStatus?: IUpsStatus; +} + +export interface IGroupThresholdEvaluation { + exceedsThreshold: boolean; + blockedByUnreachable: boolean; + representativeStatus?: IUpsStatus; +} + +const destructiveActionTypes = new Set(['shutdown', 'proxmox']); + +function getStatusSeverity(powerStatus: TPowerStatus): number { + switch (powerStatus) { + case 'unreachable': + return 3; + case 'onBattery': + return 2; + case 'unknown': + return 1; + case 'online': + default: + return 0; + } +} + +export function selectWorstStatus(statuses: IUpsStatus[]): IUpsStatus | undefined { + return statuses.reduce((worst, status) => { + if (!worst) { + return status; + } + + const severityDiff = getStatusSeverity(status.powerStatus) - + getStatusSeverity(worst.powerStatus); + if (severityDiff > 0) { + return status; + } + if (severityDiff < 0) { + return worst; + } + + if (status.batteryRuntime !== worst.batteryRuntime) { + return status.batteryRuntime < worst.batteryRuntime ? status : worst; + } + + if (status.batteryCapacity !== worst.batteryCapacity) { + return status.batteryCapacity < worst.batteryCapacity ? status : worst; + } + + return worst; + }, undefined); +} + +function deriveGroupPowerStatus( + mode: 'redundant' | 'nonRedundant', + memberStatuses: IUpsStatus[], +): TPowerStatus { + if (memberStatuses.length === 0) { + return 'unknown'; + } + + if (memberStatuses.some((status) => status.powerStatus === 'unreachable')) { + return 'unreachable'; + } + + if (mode === 'redundant') { + if (memberStatuses.every((status) => status.powerStatus === 'onBattery')) { + return 'onBattery'; + } + } else if (memberStatuses.some((status) => status.powerStatus === 'onBattery')) { + return 'onBattery'; + } + + if (memberStatuses.some((status) => status.powerStatus === 'unknown')) { + return 'unknown'; + } + + return 'online'; +} + +function pickRepresentativeStatus( + powerStatus: TPowerStatus, + memberStatuses: IUpsStatus[], +): IUpsStatus | undefined { + const matchingStatuses = memberStatuses.filter((status) => status.powerStatus === powerStatus); + return selectWorstStatus(matchingStatuses.length > 0 ? matchingStatuses : memberStatuses); +} + +export function buildGroupStatusSnapshot( + group: IUpsIdentity, + mode: 'redundant' | 'nonRedundant', + memberStatuses: IUpsStatus[], + currentStatus: IUpsStatus | undefined, + currentTime: number, +): IGroupStatusSnapshot { + const previousStatus = currentStatus || createInitialUpsStatus(group, currentTime); + const powerStatus = deriveGroupPowerStatus(mode, memberStatuses); + const representative = pickRepresentativeStatus(powerStatus, memberStatuses) || previousStatus; + const updatedStatus: IUpsStatus = { + ...previousStatus, + id: group.id, + name: group.name, + powerStatus, + batteryCapacity: representative.batteryCapacity, + batteryRuntime: representative.batteryRuntime, + outputLoad: representative.outputLoad, + outputPower: representative.outputPower, + outputVoltage: representative.outputVoltage, + outputCurrent: representative.outputCurrent, + lastCheckTime: currentTime, + consecutiveFailures: 0, + unreachableSince: powerStatus === 'unreachable' + ? previousStatus.unreachableSince || currentTime + : 0, + lastStatusChange: previousStatus.lastStatusChange || currentTime, + }; + + if (previousStatus.powerStatus !== powerStatus) { + updatedStatus.lastStatusChange = currentTime; + if (powerStatus === 'unreachable') { + updatedStatus.unreachableSince = currentTime; + } + return { + updatedStatus, + transition: 'powerStatusChange', + previousStatus, + }; + } + + return { + updatedStatus, + transition: 'none', + previousStatus: currentStatus, + }; +} + +export function evaluateGroupActionThreshold( + actionConfig: IActionConfig, + mode: 'redundant' | 'nonRedundant', + memberStatuses: IUpsStatus[], +): IGroupThresholdEvaluation { + if (!actionConfig.thresholds || memberStatuses.length === 0) { + return { + exceedsThreshold: false, + blockedByUnreachable: false, + }; + } + + const criticalMembers = memberStatuses.filter((status) => + status.powerStatus === 'onBattery' && + (status.batteryCapacity < actionConfig.thresholds!.battery || + status.batteryRuntime < actionConfig.thresholds!.runtime) + ); + const exceedsThreshold = mode === 'redundant' + ? criticalMembers.length === memberStatuses.length + : criticalMembers.length > 0; + + return { + exceedsThreshold, + blockedByUnreachable: exceedsThreshold && + destructiveActionTypes.has(actionConfig.type) && + memberStatuses.some((status) => status.powerStatus === 'unreachable'), + representativeStatus: selectWorstStatus(criticalMembers), + }; +} + +export function buildGroupThresholdContextStatus( + group: IUpsIdentity, + evaluations: IGroupThresholdEvaluation[], + enteredActionIndexes: number[], + fallbackStatus: IUpsStatus, + currentTime: number, +): IUpsStatus { + const representativeStatuses = enteredActionIndexes + .map((index) => evaluations[index]?.representativeStatus) + .filter((status): status is IUpsStatus => !!status); + + const representative = selectWorstStatus(representativeStatuses) || fallbackStatus; + + return { + ...fallbackStatus, + id: group.id, + name: group.name, + powerStatus: 'onBattery', + batteryCapacity: representative.batteryCapacity, + batteryRuntime: representative.batteryRuntime, + outputLoad: representative.outputLoad, + outputPower: representative.outputPower, + outputVoltage: representative.outputVoltage, + outputCurrent: representative.outputCurrent, + lastCheckTime: currentTime, + }; +} diff --git a/ts/systemd.ts b/ts/systemd.ts index f809fc8..d67d73a 100644 --- a/ts/systemd.ts +++ b/ts/systemd.ts @@ -346,8 +346,8 @@ WantedBy=multi-user.target */ private async displaySingleUpsStatus(ups: IUpsConfig, snmp: NupstSnmp): Promise { try { - const defaultShutdownDelay = - this.daemon.getConfig().defaultShutdownDelay ?? SHUTDOWN.DEFAULT_DELAY_MINUTES; + const defaultShutdownDelay = this.daemon.getConfig().defaultShutdownDelay ?? + SHUTDOWN.DEFAULT_DELAY_MINUTES; const protocol = ups.protocol || 'snmp'; let status; @@ -437,6 +437,8 @@ WantedBy=multi-user.target if (action.type === 'shutdown') { const shutdownDelay = action.shutdownDelay ?? defaultShutdownDelay; actionDesc += `, delay=${shutdownDelay}min`; + } else if (action.type === 'proxmox' && action.proxmoxHaPolicy === 'haStop') { + actionDesc += ', ha=stop'; } actionDesc += ')'; } else { @@ -444,6 +446,8 @@ WantedBy=multi-user.target if (action.type === 'shutdown') { const shutdownDelay = action.shutdownDelay ?? defaultShutdownDelay; actionDesc += `, delay=${shutdownDelay}min`; + } else if (action.type === 'proxmox' && action.proxmoxHaPolicy === 'haStop') { + actionDesc += ', ha=stop'; } actionDesc += ')'; } @@ -520,6 +524,8 @@ WantedBy=multi-user.target if (action.type === 'shutdown') { const shutdownDelay = action.shutdownDelay ?? defaultShutdownDelay; actionDesc += `, delay=${shutdownDelay}min`; + } else if (action.type === 'proxmox' && action.proxmoxHaPolicy === 'haStop') { + actionDesc += ', ha=stop'; } actionDesc += ')'; } else { @@ -527,6 +533,8 @@ WantedBy=multi-user.target if (action.type === 'shutdown') { const shutdownDelay = action.shutdownDelay ?? defaultShutdownDelay; actionDesc += `, delay=${shutdownDelay}min`; + } else if (action.type === 'proxmox' && action.proxmoxHaPolicy === 'haStop') { + actionDesc += ', ha=stop'; } actionDesc += ')'; } diff --git a/ts/ups-monitoring.ts b/ts/ups-monitoring.ts index e9a511a..e9583ce 100644 --- a/ts/ups-monitoring.ts +++ b/ts/ups-monitoring.ts @@ -120,19 +120,53 @@ export function hasThresholdViolation( batteryRuntime: number, actions: IActionConfig[] | undefined, ): boolean { - if (powerStatus !== 'onBattery' || !actions || actions.length === 0) { + return getActionThresholdStates(powerStatus, batteryCapacity, batteryRuntime, actions).some( + Boolean, + ); +} + +export function isActionThresholdExceeded( + actionConfig: IActionConfig, + powerStatus: IProtocolUpsStatus['powerStatus'], + batteryCapacity: number, + batteryRuntime: number, +): boolean { + if (powerStatus !== 'onBattery' || !actionConfig.thresholds) { return false; } - for (const actionConfig of actions) { - if ( - actionConfig.thresholds && - (batteryCapacity < actionConfig.thresholds.battery || - batteryRuntime < actionConfig.thresholds.runtime) - ) { - return true; + return ( + batteryCapacity < actionConfig.thresholds.battery || + batteryRuntime < actionConfig.thresholds.runtime + ); +} + +export function getActionThresholdStates( + powerStatus: IProtocolUpsStatus['powerStatus'], + batteryCapacity: number, + batteryRuntime: number, + actions: IActionConfig[] | undefined, +): boolean[] { + if (!actions || actions.length === 0) { + return []; + } + + return actions.map((actionConfig) => + isActionThresholdExceeded(actionConfig, powerStatus, batteryCapacity, batteryRuntime) + ); +} + +export function getEnteredThresholdIndexes( + previousStates: boolean[] | undefined, + currentStates: boolean[], +): number[] { + const enteredIndexes: number[] = []; + + for (let index = 0; index < currentStates.length; index++) { + if (currentStates[index] && !previousStates?.[index]) { + enteredIndexes.push(index); } } - return false; + return enteredIndexes; }