feat(monitoring): add edge-triggered threshold handling with group action orchestration and HA-aware Proxmox shutdowns

This commit is contained in:
2026-04-16 02:54:16 +00:00
parent bf4d519428
commit a435bd6fed
13 changed files with 1052 additions and 117 deletions
+2
View File
@@ -118,6 +118,8 @@ export interface IActionConfig {
proxmoxInsecure?: boolean;
/** Proxmox operation mode: 'auto' detects CLI tools, 'cli' forces CLI, 'api' forces REST API (default: 'auto') */
proxmoxMode?: 'auto' | 'api' | 'cli';
/** How HA-managed Proxmox resources should be stopped (default: 'none') */
proxmoxHaPolicy?: 'none' | 'haStop';
}
/**
+298 -66
View File
@@ -8,6 +8,11 @@ import { logger } from '../logger.ts';
import { PROXMOX, UI } from '../constants.ts';
const execFileAsync = promisify(execFile);
type TNodeLikeGlobal = typeof globalThis & {
process?: {
env: Record<string, string | undefined>;
};
};
/**
* ProxmoxAction - Gracefully shuts down Proxmox VMs and LXC containers
@@ -23,6 +28,22 @@ const execFileAsync = promisify(execFile);
*/
export class ProxmoxAction extends Action {
readonly type = 'proxmox';
private static readonly activeRunKeys = new Set<string>();
private static findCliTool(command: string): string | null {
for (const dir of PROXMOX.CLI_TOOL_PATHS) {
const candidate = `${dir}/${command}`;
try {
if (fs.existsSync(candidate)) {
return candidate;
}
} catch (_e) {
// continue
}
}
return null;
}
/**
* Check if Proxmox CLI tools (qm, pct) are available on the system
@@ -32,29 +53,12 @@ export class ProxmoxAction extends Action {
available: boolean;
qmPath: string | null;
pctPath: string | null;
haManagerPath: string | null;
isRoot: boolean;
} {
let qmPath: string | null = null;
let pctPath: string | null = null;
for (const dir of PROXMOX.CLI_TOOL_PATHS) {
if (!qmPath) {
const p = `${dir}/qm`;
try {
if (fs.existsSync(p)) qmPath = p;
} catch (_e) {
// continue
}
}
if (!pctPath) {
const p = `${dir}/pct`;
try {
if (fs.existsSync(p)) pctPath = p;
} catch (_e) {
// continue
}
}
}
const qmPath = this.findCliTool('qm');
const pctPath = this.findCliTool('pct');
const haManagerPath = this.findCliTool('ha-manager');
const isRoot = !!(process.getuid && process.getuid() === 0);
@@ -62,6 +66,7 @@ export class ProxmoxAction extends Action {
available: qmPath !== null && pctPath !== null && isRoot,
qmPath,
pctPath,
haManagerPath,
isRoot,
};
}
@@ -69,7 +74,11 @@ export class ProxmoxAction extends Action {
/**
* Resolve the operation mode based on config and environment
*/
private resolveMode(): { mode: 'api' | 'cli'; qmPath: string; pctPath: string } | { mode: 'api'; qmPath?: undefined; pctPath?: undefined } {
private resolveMode(): { mode: 'api' | 'cli'; qmPath: string; pctPath: string } | {
mode: 'api';
qmPath?: undefined;
pctPath?: undefined;
} {
const configuredMode = this.config.proxmoxMode || 'auto';
if (configuredMode === 'api') {
@@ -111,16 +120,29 @@ export class ProxmoxAction extends Action {
const resolved = this.resolveMode();
const node = this.config.proxmoxNode || os.hostname();
const excludeIds = new Set(this.config.proxmoxExcludeIds || []);
const stopTimeout = (this.config.proxmoxStopTimeout || PROXMOX.DEFAULT_STOP_TIMEOUT_SECONDS) * 1000;
const stopTimeout = (this.config.proxmoxStopTimeout || PROXMOX.DEFAULT_STOP_TIMEOUT_SECONDS) *
1000;
const forceStop = this.config.proxmoxForceStop !== false; // default true
const haPolicy = this.config.proxmoxHaPolicy || 'none';
const host = this.config.proxmoxHost || PROXMOX.DEFAULT_HOST;
const port = this.config.proxmoxPort || PROXMOX.DEFAULT_PORT;
const runKey = `${resolved.mode}:${node}:${
resolved.mode === 'api' ? `${host}:${port}` : 'local'
}`;
if (ProxmoxAction.activeRunKeys.has(runKey)) {
logger.info(`Proxmox action skipped: shutdown sequence already running for node ${node}`);
return;
}
ProxmoxAction.activeRunKeys.add(runKey);
logger.log('');
logger.logBoxTitle('Proxmox VM Shutdown', UI.WIDE_BOX_WIDTH, 'warning');
logger.logBoxLine(`Mode: ${resolved.mode === 'cli' ? 'CLI (qm/pct)' : 'API (REST)'}`);
logger.logBoxLine(`Node: ${node}`);
logger.logBoxLine(`HA Policy: ${haPolicy}`);
if (resolved.mode === 'api') {
const host = this.config.proxmoxHost || PROXMOX.DEFAULT_HOST;
const port = this.config.proxmoxPort || PROXMOX.DEFAULT_PORT;
logger.logBoxLine(`API: ${host}:${port}`);
}
logger.logBoxLine(`UPS: ${context.upsName} (${context.powerStatus})`);
@@ -132,6 +154,11 @@ export class ProxmoxAction extends Action {
logger.log('');
try {
let apiContext: {
baseUrl: string;
headers: Record<string, string>;
insecure: boolean;
} | null = null;
let runningVMs: Array<{ vmid: number; name: string }>;
let runningCTs: Array<{ vmid: number; name: string }>;
@@ -140,8 +167,6 @@ export class ProxmoxAction extends Action {
runningCTs = await this.getRunningCTsCli(resolved.pctPath);
} else {
// API mode - validate token
const host = this.config.proxmoxHost || PROXMOX.DEFAULT_HOST;
const port = this.config.proxmoxPort || PROXMOX.DEFAULT_PORT;
const tokenId = this.config.proxmoxTokenId;
const tokenSecret = this.config.proxmoxTokenSecret;
const insecure = this.config.proxmoxInsecure !== false;
@@ -152,13 +177,26 @@ export class ProxmoxAction extends Action {
return;
}
const baseUrl = `https://${host}:${port}${PROXMOX.API_BASE}`;
const headers: Record<string, string> = {
'Authorization': `PVEAPIToken=${tokenId}=${tokenSecret}`,
apiContext = {
baseUrl: `https://${host}:${port}${PROXMOX.API_BASE}`,
headers: {
'Authorization': `PVEAPIToken=${tokenId}=${tokenSecret}`,
},
insecure,
};
runningVMs = await this.getRunningVMsApi(baseUrl, node, headers, insecure);
runningCTs = await this.getRunningCTsApi(baseUrl, node, headers, insecure);
runningVMs = await this.getRunningVMsApi(
apiContext.baseUrl,
node,
apiContext.headers,
apiContext.insecure,
);
runningCTs = await this.getRunningCTsApi(
apiContext.baseUrl,
node,
apiContext.headers,
apiContext.insecure,
);
}
// Filter out excluded IDs
@@ -171,33 +209,83 @@ export class ProxmoxAction extends Action {
return;
}
const haManagedResources = haPolicy === 'haStop'
? await this.getHaManagedResources(resolved, apiContext)
: { qemu: new Set<number>(), lxc: new Set<number>() };
const haVmsToStop = vmsToStop.filter((vm) => haManagedResources.qemu.has(vm.vmid));
const haCtsToStop = ctsToStop.filter((ct) => haManagedResources.lxc.has(ct.vmid));
let directVmsToStop = vmsToStop.filter((vm) => !haManagedResources.qemu.has(vm.vmid));
let directCtsToStop = ctsToStop.filter((ct) => !haManagedResources.lxc.has(ct.vmid));
logger.info(`Shutting down ${vmsToStop.length} VMs and ${ctsToStop.length} containers...`);
// Send shutdown commands
if (resolved.mode === 'cli') {
for (const vm of vmsToStop) {
const { haManagerPath } = ProxmoxAction.detectCliAvailability();
if (haPolicy === 'haStop' && (haVmsToStop.length > 0 || haCtsToStop.length > 0)) {
if (!haManagerPath) {
logger.warn(
'ha-manager not found, falling back to direct guest shutdown for HA-managed resources',
);
directVmsToStop = [...haVmsToStop, ...directVmsToStop];
directCtsToStop = [...haCtsToStop, ...directCtsToStop];
} else {
for (const vm of haVmsToStop) {
await this.requestHaStopCli(haManagerPath, `vm:${vm.vmid}`);
logger.dim(` HA stop requested for VM ${vm.vmid} (${vm.name || 'unnamed'})`);
}
for (const ct of haCtsToStop) {
await this.requestHaStopCli(haManagerPath, `ct:${ct.vmid}`);
logger.dim(` HA stop requested for CT ${ct.vmid} (${ct.name || 'unnamed'})`);
}
}
}
for (const vm of directVmsToStop) {
await this.shutdownVMCli(resolved.qmPath, vm.vmid);
logger.dim(` Shutdown sent to VM ${vm.vmid} (${vm.name || 'unnamed'})`);
}
for (const ct of ctsToStop) {
for (const ct of directCtsToStop) {
await this.shutdownCTCli(resolved.pctPath, ct.vmid);
logger.dim(` Shutdown sent to CT ${ct.vmid} (${ct.name || 'unnamed'})`);
}
} else {
const host = this.config.proxmoxHost || PROXMOX.DEFAULT_HOST;
const port = this.config.proxmoxPort || PROXMOX.DEFAULT_PORT;
const insecure = this.config.proxmoxInsecure !== false;
const baseUrl = `https://${host}:${port}${PROXMOX.API_BASE}`;
const headers: Record<string, string> = {
'Authorization': `PVEAPIToken=${this.config.proxmoxTokenId}=${this.config.proxmoxTokenSecret}`,
};
} else if (apiContext) {
for (const vm of haVmsToStop) {
await this.requestHaStopApi(
apiContext.baseUrl,
`vm:${vm.vmid}`,
apiContext.headers,
apiContext.insecure,
);
logger.dim(` HA stop requested for VM ${vm.vmid} (${vm.name || 'unnamed'})`);
}
for (const ct of haCtsToStop) {
await this.requestHaStopApi(
apiContext.baseUrl,
`ct:${ct.vmid}`,
apiContext.headers,
apiContext.insecure,
);
logger.dim(` HA stop requested for CT ${ct.vmid} (${ct.name || 'unnamed'})`);
}
for (const vm of vmsToStop) {
await this.shutdownVMApi(baseUrl, node, vm.vmid, headers, insecure);
for (const vm of directVmsToStop) {
await this.shutdownVMApi(
apiContext.baseUrl,
node,
vm.vmid,
apiContext.headers,
apiContext.insecure,
);
logger.dim(` Shutdown sent to VM ${vm.vmid} (${vm.name || 'unnamed'})`);
}
for (const ct of ctsToStop) {
await this.shutdownCTApi(baseUrl, node, ct.vmid, headers, insecure);
for (const ct of directCtsToStop) {
await this.shutdownCTApi(
apiContext.baseUrl,
node,
ct.vmid,
apiContext.headers,
apiContext.insecure,
);
logger.dim(` Shutdown sent to CT ${ct.vmid} (${ct.name || 'unnamed'})`);
}
}
@@ -220,18 +308,23 @@ export class ProxmoxAction extends Action {
} else {
await this.stopCTCli(resolved.pctPath, item.vmid);
}
} else {
const host = this.config.proxmoxHost || PROXMOX.DEFAULT_HOST;
const port = this.config.proxmoxPort || PROXMOX.DEFAULT_PORT;
const insecure = this.config.proxmoxInsecure !== false;
const baseUrl = `https://${host}:${port}${PROXMOX.API_BASE}`;
const headers: Record<string, string> = {
'Authorization': `PVEAPIToken=${this.config.proxmoxTokenId}=${this.config.proxmoxTokenSecret}`,
};
} else if (apiContext) {
if (item.type === 'qemu') {
await this.stopVMApi(baseUrl, node, item.vmid, headers, insecure);
await this.stopVMApi(
apiContext.baseUrl,
node,
item.vmid,
apiContext.headers,
apiContext.insecure,
);
} else {
await this.stopCTApi(baseUrl, node, item.vmid, headers, insecure);
await this.stopCTApi(
apiContext.baseUrl,
node,
item.vmid,
apiContext.headers,
apiContext.insecure,
);
}
}
logger.dim(` Force-stopped ${item.type} ${item.vmid} (${item.name || 'unnamed'})`);
@@ -252,6 +345,8 @@ export class ProxmoxAction extends Action {
logger.error(
`Proxmox action failed: ${error instanceof Error ? error.message : String(error)}`,
);
} finally {
ProxmoxAction.activeRunKeys.delete(runKey);
}
}
@@ -357,6 +452,77 @@ export class ProxmoxAction extends Action {
return status;
}
private async getHaManagedResources(
resolved: { mode: 'api' | 'cli'; qmPath?: string; pctPath?: string },
apiContext: {
baseUrl: string;
headers: Record<string, string>;
insecure: boolean;
} | null,
): Promise<{ qemu: Set<number>; lxc: Set<number> }> {
if (resolved.mode === 'cli') {
const { haManagerPath } = ProxmoxAction.detectCliAvailability();
if (!haManagerPath) {
return { qemu: new Set<number>(), lxc: new Set<number>() };
}
return await this.getHaManagedResourcesCli(haManagerPath);
}
if (!apiContext) {
return { qemu: new Set<number>(), lxc: new Set<number>() };
}
return await this.getHaManagedResourcesApi(
apiContext.baseUrl,
apiContext.headers,
apiContext.insecure,
);
}
private async getHaManagedResourcesCli(
haManagerPath: string,
): Promise<{ qemu: Set<number>; lxc: Set<number> }> {
try {
const { stdout } = await execFileAsync(haManagerPath, ['config']);
return this.parseHaManagerConfig(stdout);
} catch (error) {
logger.warn(
`Failed to list HA resources via CLI: ${
error instanceof Error ? error.message : String(error)
}`,
);
return { qemu: new Set<number>(), lxc: new Set<number>() };
}
}
private parseHaManagerConfig(output: string): { qemu: Set<number>; lxc: Set<number> } {
const resources = {
qemu: new Set<number>(),
lxc: new Set<number>(),
};
for (const line of output.trim().split('\n')) {
const match = line.match(/^\s*(vm|ct)\s*:\s*(\d+)\s*$/i);
if (!match) {
continue;
}
const vmid = parseInt(match[2], 10);
if (match[1].toLowerCase() === 'vm') {
resources.qemu.add(vmid);
} else {
resources.lxc.add(vmid);
}
}
return resources;
}
private async requestHaStopCli(haManagerPath: string, sid: string): Promise<void> {
await execFileAsync(haManagerPath, ['set', sid, '--state', 'stopped']);
}
// ─── API-based methods ─────────────────────────────────────────────
/**
@@ -367,16 +533,23 @@ export class ProxmoxAction extends Action {
method: string,
headers: Record<string, string>,
insecure: boolean,
body?: URLSearchParams,
): Promise<unknown> {
const requestHeaders = { ...headers };
const fetchOptions: RequestInit = {
method,
headers,
headers: requestHeaders,
};
if (body) {
requestHeaders['Content-Type'] = 'application/x-www-form-urlencoded;charset=UTF-8';
fetchOptions.body = body.toString();
}
// Use NODE_TLS_REJECT_UNAUTHORIZED for insecure mode (self-signed certs)
if (insecure) {
// deno-lint-ignore no-explicit-any
(globalThis as any).process?.env && ((globalThis as any).process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0');
const nodeProcess = (globalThis as TNodeLikeGlobal).process;
if (insecure && nodeProcess?.env) {
nodeProcess.env.NODE_TLS_REJECT_UNAUTHORIZED = '0';
}
try {
@@ -390,9 +563,8 @@ export class ProxmoxAction extends Action {
return await response.json();
} finally {
// Restore TLS verification
if (insecure) {
// deno-lint-ignore no-explicit-any
(globalThis as any).process?.env && ((globalThis as any).process.env.NODE_TLS_REJECT_UNAUTHORIZED = '1');
if (insecure && nodeProcess?.env) {
nodeProcess.env.NODE_TLS_REJECT_UNAUTHORIZED = '1';
}
}
}
@@ -453,6 +625,63 @@ export class ProxmoxAction extends Action {
}
}
private async getHaManagedResourcesApi(
baseUrl: string,
headers: Record<string, string>,
insecure: boolean,
): Promise<{ qemu: Set<number>; lxc: Set<number> }> {
try {
const response = await this.apiRequest(
`${baseUrl}/cluster/ha/resources`,
'GET',
headers,
insecure,
) as { data: Array<{ sid?: string }> };
const resources = {
qemu: new Set<number>(),
lxc: new Set<number>(),
};
for (const item of response.data || []) {
const match = item.sid?.match(/^(vm|ct):(\d+)$/i);
if (!match) {
continue;
}
const vmid = parseInt(match[2], 10);
if (match[1].toLowerCase() === 'vm') {
resources.qemu.add(vmid);
} else {
resources.lxc.add(vmid);
}
}
return resources;
} catch (error) {
logger.warn(
`Failed to list HA resources via API: ${
error instanceof Error ? error.message : String(error)
}`,
);
return { qemu: new Set<number>(), lxc: new Set<number>() };
}
}
private async requestHaStopApi(
baseUrl: string,
sid: string,
headers: Record<string, string>,
insecure: boolean,
): Promise<void> {
await this.apiRequest(
`${baseUrl}/cluster/ha/resources/${encodeURIComponent(sid)}`,
'PUT',
headers,
insecure,
new URLSearchParams({ state: 'stopped' }),
);
}
private async shutdownVMApi(
baseUrl: string,
node: string,
@@ -529,7 +758,9 @@ export class ProxmoxAction extends Action {
while (remaining.length > 0 && (Date.now() - startTime) < timeout) {
// Wait before polling
await new Promise((resolve) => setTimeout(resolve, PROXMOX.STATUS_POLL_INTERVAL_SECONDS * 1000));
await new Promise((resolve) =>
setTimeout(resolve, PROXMOX.STATUS_POLL_INTERVAL_SECONDS * 1000)
);
// Check which are still running
const stillRunning: typeof remaining = [];
@@ -547,7 +778,8 @@ export class ProxmoxAction extends Action {
const insecure = this.config.proxmoxInsecure !== false;
const baseUrl = `https://${host}:${port}${PROXMOX.API_BASE}`;
const headers: Record<string, string> = {
'Authorization': `PVEAPIToken=${this.config.proxmoxTokenId}=${this.config.proxmoxTokenSecret}`,
'Authorization':
`PVEAPIToken=${this.config.proxmoxTokenId}=${this.config.proxmoxTokenSecret}`,
};
const statusUrl = `${baseUrl}/nodes/${node}/${item.type}/${item.vmid}/status/current`;
const response = await this.apiRequest(statusUrl, 'GET', headers, insecure) as {
+22
View File
@@ -15,6 +15,7 @@ const execFileAsync = promisify(execFile);
*/
export class ShutdownAction extends Action {
readonly type = 'shutdown';
private static scheduledDelayMinutes: number | null = null;
/**
* Override shouldExecute to add shutdown-specific safety checks
@@ -126,6 +127,25 @@ export class ShutdownAction extends Action {
const shutdownDelay = this.config.shutdownDelay ?? SHUTDOWN.DEFAULT_DELAY_MINUTES;
if (
ShutdownAction.scheduledDelayMinutes !== null &&
ShutdownAction.scheduledDelayMinutes <= shutdownDelay
) {
logger.info(
`Shutdown action skipped: shutdown already scheduled in ${ShutdownAction.scheduledDelayMinutes} minutes`,
);
return;
}
if (
ShutdownAction.scheduledDelayMinutes !== null &&
ShutdownAction.scheduledDelayMinutes > shutdownDelay
) {
logger.warn(
`Shutdown already scheduled in ${ShutdownAction.scheduledDelayMinutes} minutes, rescheduling to ${shutdownDelay} minutes`,
);
}
logger.log('');
logger.logBoxTitle('Initiating System Shutdown', UI.WIDE_BOX_WIDTH, 'error');
logger.logBoxLine(`UPS: ${context.upsName} (${context.upsId})`);
@@ -139,6 +159,7 @@ export class ShutdownAction extends Action {
try {
await this.executeShutdownCommand(shutdownDelay);
ShutdownAction.scheduledDelayMinutes = shutdownDelay;
} catch (error) {
logger.error(
`Shutdown command failed: ${error instanceof Error ? error.message : String(error)}`,
@@ -227,6 +248,7 @@ export class ShutdownAction extends Action {
logger.log(`Trying alternative shutdown method: ${cmdPath} ${alt.args.join(' ')}`);
await execFileAsync(cmdPath, alt.args);
logger.log(`Alternative method ${alt.cmd} succeeded`);
ShutdownAction.scheduledDelayMinutes = 0;
return; // Exit if successful
}
} catch (_altError) {