Files
tspm/ts/daemon/tspm.daemon.ts

757 lines
24 KiB
TypeScript

import * as plugins from '../plugins.js';
import * as paths from '../paths.js';
import { toProcessId } from '../shared/protocol/id.js';
import type { ProcessId } from '../shared/protocol/id.js';
import { ProcessManager } from './processmanager.js';
import type {
IpcMethodMap,
RequestForMethod,
ResponseForMethod,
DaemonStatusResponse,
HeartbeatResponse,
} from '../shared/protocol/ipc.types.js';
import { LogPersistence } from './logpersistence.js';
/**
* Central daemon server that manages all TSPM processes
*/
export class TspmDaemon {
private tspmInstance: ProcessManager;
private ipcServer: plugins.smartipc.IpcServer;
private startTime: number;
private isShuttingDown: boolean = false;
private socketPath: string;
private heartbeatInterval: NodeJS.Timeout | null = null;
private daemonPidFile: string;
private version: string;
constructor() {
this.tspmInstance = new ProcessManager();
this.socketPath = plugins.path.join(paths.tspmDir, 'tspm.sock');
this.daemonPidFile = plugins.path.join(paths.tspmDir, 'daemon.pid');
this.startTime = Date.now();
// Determine daemon version from package metadata
try {
const proj = new plugins.projectinfo.ProjectInfo(paths.packageDir);
this.version = proj.npm.version || 'unknown';
} catch {
this.version = 'unknown';
}
}
/**
* Start the daemon server
*/
public async start(): Promise<void> {
console.log('Starting TSPM daemon...');
// Ensure the TSPM directory exists
const fs = await import('fs/promises');
await fs.mkdir(paths.tspmDir, { recursive: true });
// Check if another daemon is already running
if (await this.isDaemonRunning()) {
throw new Error('Another TSPM daemon instance is already running');
}
// Initialize IPC server
this.ipcServer = plugins.smartipc.SmartIpc.createServer({
id: 'tspm-daemon',
socketPath: this.socketPath,
autoCleanupSocketFile: true, // Clean up stale sockets
socketMode: 0o600, // Set proper permissions
heartbeat: true,
heartbeatInterval: 5000,
heartbeatTimeout: 20000,
heartbeatInitialGracePeriodMs: 10000, // Grace period for startup
heartbeatThrowOnTimeout: false, // Don't throw, emit events instead
});
// Debug hooks for connection troubleshooting
this.ipcServer.on('clientConnect', (clientId: string) => {
console.log(`[IPC] client connected: ${clientId}`);
});
this.ipcServer.on('clientDisconnect', (clientId: string) => {
console.log(`[IPC] client disconnected: ${clientId}`);
});
this.ipcServer.on('error', (err: any) => {
console.error('[IPC] server error:', err?.message || err);
});
// Register message handlers
this.registerHandlers();
// Start the IPC server and wait until ready to accept connections
await this.ipcServer.start({ readyWhen: 'accepting' });
// Write PID file
await this.writePidFile();
// Start heartbeat monitoring
this.startHeartbeatMonitoring();
// Load existing process configurations
await this.tspmInstance.loadProcessConfigs();
await this.tspmInstance.loadDesiredStates();
// Set up log publishing
this.tspmInstance.on('process:log', ({ processId, log }) => {
// Publish to topic for this process
const topic = `logs.${processId}`;
// Deliver only to subscribed clients
if (this.ipcServer) {
try {
const topicIndex = (this.ipcServer as any).topicIndex as Map<string, Set<string>> | undefined;
const subscribers = topicIndex?.get(topic);
if (subscribers && subscribers.size > 0) {
// Send directly to subscribers for this topic
for (const clientId of subscribers) {
this.ipcServer
.sendToClient(clientId, `topic:${topic}`, log)
.catch((err: any) => {
// Surface but don't fail the loop
console.error('[IPC] sendToClient error:', err?.message || err);
});
}
}
} catch (err: any) {
console.error('[IPC] Topic delivery error:', err?.message || err);
}
}
});
// Set up graceful shutdown handlers
this.setupShutdownHandlers();
// Start processes that should be online per desired state
await this.tspmInstance.startDesired();
console.log(`TSPM daemon started successfully on ${this.socketPath}`);
console.log(`PID: ${process.pid}`);
}
/**
* Register all IPC message handlers
*/
private registerHandlers(): void {
// Process management handlers
this.ipcServer.onMessage(
'start',
async (request: RequestForMethod<'start'>) => {
try {
await this.tspmInstance.setDesiredState(request.config.id, 'online');
await this.tspmInstance.start(request.config);
const processInfo = this.tspmInstance.processInfo.get(
request.config.id,
);
return {
processId: request.config.id,
pid: processInfo?.pid,
status: processInfo?.status || 'stopped',
};
} catch (error) {
throw new Error(`Failed to start process: ${error.message}`);
}
},
);
// Start by id (resolve config on server)
this.ipcServer.onMessage(
'startById',
async (request: RequestForMethod<'startById'>) => {
try {
const id = toProcessId(request.id);
let config = this.tspmInstance.processConfigs.get(id);
if (!config) {
// Try to reload configs if not found (handles races or stale state)
await this.tspmInstance.loadProcessConfigs();
config = this.tspmInstance.processConfigs.get(id) || null as any;
}
if (!config) {
throw new Error(`Process ${id} not found`);
}
await this.tspmInstance.setDesiredState(id, 'online');
const existing = this.tspmInstance.processes.get(id);
if (existing) {
if (existing.isRunning()) {
// Already running; return current status/pid
const runningInfo = this.tspmInstance.processInfo.get(id);
return {
processId: id,
pid: runningInfo?.pid,
status: runningInfo?.status || 'online',
};
} else {
await this.tspmInstance.restart(id);
}
} else {
await this.tspmInstance.start(config);
}
const processInfo = this.tspmInstance.processInfo.get(id);
return {
processId: id,
pid: processInfo?.pid,
status: processInfo?.status || 'stopped',
};
} catch (error) {
throw new Error(`Failed to start process: ${error.message}`);
}
},
);
this.ipcServer.onMessage(
'stop',
async (request: RequestForMethod<'stop'>) => {
try {
const id = toProcessId(request.id);
await this.tspmInstance.setDesiredState(id, 'stopped');
await this.tspmInstance.stop(id);
return {
success: true,
message: `Process ${id} stopped successfully`,
};
} catch (error) {
throw new Error(`Failed to stop process: ${error.message}`);
}
},
);
this.ipcServer.onMessage(
'restart',
async (request: RequestForMethod<'restart'>) => {
try {
const id = toProcessId(request.id);
await this.tspmInstance.setDesiredState(id, 'online');
await this.tspmInstance.restart(id);
const processInfo = this.tspmInstance.processInfo.get(id);
return {
processId: id,
pid: processInfo?.pid,
status: processInfo?.status || 'stopped',
};
} catch (error) {
throw new Error(`Failed to restart process: ${error.message}`);
}
},
);
this.ipcServer.onMessage(
'delete',
async (request: RequestForMethod<'delete'>) => {
try {
const id = toProcessId(request.id);
// Ensure desired state reflects stopped before deletion
await this.tspmInstance.setDesiredState(id, 'stopped');
await this.tspmInstance.delete(id);
return {
success: true,
message: `Process ${id} deleted successfully`,
};
} catch (error) {
throw new Error(`Failed to delete process: ${error.message}`);
}
},
);
// Query handlers
this.ipcServer.onMessage(
'add',
async (request: RequestForMethod<'add'>) => {
try {
const id = await this.tspmInstance.add(request.config as any);
const config = this.tspmInstance.processConfigs.get(id)!;
return { id, config };
} catch (error) {
throw new Error(`Failed to add process: ${error.message}`);
}
},
);
this.ipcServer.onMessage(
'update',
async (request: RequestForMethod<'update'>) => {
try {
const id = toProcessId(request.id);
const updated = await this.tspmInstance.update(id, request.updates as any);
return { id, config: updated };
} catch (error) {
throw new Error(`Failed to update process: ${error.message}`);
}
},
);
// Note: 'remove' is only a CLI alias. Daemon exposes 'delete' only.
this.ipcServer.onMessage(
'list',
async (request: RequestForMethod<'list'>) => {
const processes = await this.tspmInstance.list();
return { processes };
},
);
this.ipcServer.onMessage(
'describe',
async (request: RequestForMethod<'describe'>) => {
const id = toProcessId(request.id);
const result = await this.tspmInstance.describe(id);
if (!result) {
throw new Error(`Process ${id} not found`);
}
// Return correctly shaped response
return {
processInfo: result.info,
config: result.config,
};
},
);
this.ipcServer.onMessage(
'getLogs',
async (request: RequestForMethod<'getLogs'>) => {
const id = toProcessId(request.id);
const logs = await this.tspmInstance.getLogs(id, request.lines);
return { logs };
},
);
// Stream backlog logs and let client subscribe to live topic separately
this.ipcServer.onMessage(
'logs:subscribe',
async (
request: RequestForMethod<'logs:subscribe'>,
clientId: string,
) => {
const id = toProcessId(request.id);
// Determine backlog set
const allLogs = await this.tspmInstance.getLogs(id);
let filtered = allLogs;
if (request.types && request.types.length) {
filtered = filtered.filter((l) => request.types!.includes(l.type));
}
if (request.sinceTime && request.sinceTime > 0) {
filtered = filtered.filter(
(l) => new Date(l.timestamp).getTime() >= request.sinceTime!,
);
}
const lines = request.lines && request.lines > 0 ? request.lines : 0;
if (lines > 0 && filtered.length > lines) {
filtered = filtered.slice(-lines);
}
// Send backlog entries directly to the requesting client as topic messages
// in small batches to avoid overwhelming the transport or client.
const chunkSize = 200;
for (let i = 0; i < filtered.length; i += chunkSize) {
const chunk = filtered.slice(i, i + chunkSize);
await Promise.allSettled(
chunk.map((entry) =>
this.ipcServer.sendToClient(
clientId,
`topic:logs.backlog.${id}`,
{
...entry,
timestamp: new Date(entry.timestamp).getTime(),
},
),
),
);
// Yield a bit between chunks
await new Promise((r) => setTimeout(r, 5));
}
return { ok: true } as any;
},
);
// Inspect subscribers for a process log topic
this.ipcServer.onMessage(
'logs:subscribers',
async (
request: RequestForMethod<'logs:subscribers'>,
clientId: string,
) => {
const id = toProcessId(request.id);
const topic = `logs.${id}`;
try {
const topicIndex = (this.ipcServer as any).topicIndex as Map<string, Set<string>> | undefined;
const subs = Array.from(topicIndex?.get(topic) || []);
// Also include the requesting clientId if it has a local handler without subscription
return { topic, subscribers: subs, count: subs.length } as any;
} catch (err: any) {
return { topic, subscribers: [], count: 0 } as any;
}
},
);
// Resolve target (id:n | name:foo | numeric string) to ProcessId
this.ipcServer.onMessage(
'resolveTarget',
async (request: RequestForMethod<'resolveTarget'>) => {
const raw = String(request.target || '').trim();
if (!raw) {
throw new Error('Empty target');
}
// id:<n>
if (/^id:\s*\d+$/i.test(raw)) {
const idNum = raw.split(':')[1].trim();
const id = toProcessId(idNum);
const config = this.tspmInstance.processConfigs.get(id);
if (!config) throw new Error(`Process ${id} not found`);
return { id, name: config.name } as ResponseForMethod<'resolveTarget'>;
}
// name:<label>
if (/^name:/i.test(raw)) {
const name = raw.slice(raw.indexOf(':') + 1).trim();
if (!name) throw new Error('Missing name after name:');
const matches = Array.from(this.tspmInstance.processConfigs.values()).filter(
(c) => (c.name || '').trim() === name,
);
if (matches.length === 0) {
throw new Error(`No process found with name "${name}"`);
}
if (matches.length > 1) {
const ids = matches.map((c) => String(c.id)).join(', ');
throw new Error(
`Multiple processes found with name "${name}": ids [${ids}]. Please use id:<n>.`,
);
}
return { id: matches[0].id, name } as ResponseForMethod<'resolveTarget'>;
}
// bare numeric id
if (/^\d+$/.test(raw)) {
const id = toProcessId(raw);
const config = this.tspmInstance.processConfigs.get(id);
if (!config) throw new Error(`Process ${id} not found`);
return { id, name: config.name } as ResponseForMethod<'resolveTarget'>;
}
// Unknown format
throw new Error(
'Unsupported target format. Use numeric id (e.g. 1), id:<n> (e.g. id:1), or name:<label> (e.g. name:api).',
);
},
);
// Batch operations handlers
this.ipcServer.onMessage(
'startAll',
async (request: RequestForMethod<'startAll'>) => {
const started: ProcessId[] = [];
const failed: Array<{ id: ProcessId; error: string }> = [];
await this.tspmInstance.setDesiredStateForAll('online');
await this.tspmInstance.startAll();
// Get status of all processes
for (const [id, processInfo] of this.tspmInstance.processInfo) {
if (processInfo.status === 'online') {
started.push(id);
} else {
failed.push({ id, error: 'Failed to start' });
}
}
return { started, failed };
},
);
this.ipcServer.onMessage(
'stopAll',
async (request: RequestForMethod<'stopAll'>) => {
const stopped: ProcessId[] = [];
const failed: Array<{ id: ProcessId; error: string }> = [];
await this.tspmInstance.setDesiredStateForAll('stopped');
await this.tspmInstance.stopAll();
// Yield briefly to allow any pending exit events to settle
await new Promise((r) => setTimeout(r, 50));
// Determine which monitors are no longer running
for (const [id, monitor] of this.tspmInstance.processes) {
if (!monitor.isRunning()) {
stopped.push(id);
} else {
failed.push({ id, error: 'Failed to stop' });
}
}
return { stopped, failed };
},
);
this.ipcServer.onMessage(
'restartAll',
async (request: RequestForMethod<'restartAll'>) => {
const restarted: ProcessId[] = [];
const failed: Array<{ id: ProcessId; error: string }> = [];
await this.tspmInstance.restartAll();
// Get status of all processes
for (const [id, processInfo] of this.tspmInstance.processInfo) {
if (processInfo.status === 'online') {
restarted.push(id);
} else {
failed.push({ id, error: 'Failed to restart' });
}
}
return { restarted, failed };
},
);
// Reset handler: stops all and clears configs
this.ipcServer.onMessage(
'reset',
async (request: RequestForMethod<'reset'>) => {
const result = await this.tspmInstance.reset();
return result;
},
);
// Daemon management handlers
this.ipcServer.onMessage(
'daemon:status',
async (request: RequestForMethod<'daemon:status'>) => {
const memUsage = process.memoryUsage();
// Aggregate log stats from monitors
let totalLogCount = 0;
let totalLogBytes = 0;
const perProcess: Array<{ id: ProcessId; count: number; bytes: number }> = [];
for (const [id, monitor] of this.tspmInstance.processes.entries()) {
try {
const logs = monitor.getLogs();
const count = logs.length;
const bytes = LogPersistence.calculateLogMemorySize(logs);
totalLogCount += count;
totalLogBytes += bytes;
perProcess.push({ id, count, bytes });
} catch {}
}
const pathsInfo = {
tspmDir: paths.tspmDir,
socketPath: this.socketPath,
pidFile: this.daemonPidFile,
};
const configsInfo = {
processConfigs: this.tspmInstance.processConfigs.size,
};
return {
status: 'running',
pid: process.pid,
uptime: Date.now() - this.startTime,
processCount: this.tspmInstance.processes.size,
memoryUsage: memUsage.heapUsed,
cpuUsage: process.cpuUsage().user / 1000000, // Convert to seconds
version: this.version,
logsInMemory: {
totalCount: totalLogCount,
totalBytes: totalLogBytes,
perProcess,
},
paths: pathsInfo,
configs: configsInfo,
};
},
);
this.ipcServer.onMessage(
'daemon:shutdown',
async (request: RequestForMethod<'daemon:shutdown'>) => {
if (this.isShuttingDown) {
return {
success: false,
message: 'Daemon is already shutting down',
};
}
// Schedule shutdown
const graceful = request.graceful !== false;
const timeout = request.timeout || 10000;
if (graceful) {
setTimeout(() => this.shutdown(true), 100);
} else {
setTimeout(() => this.shutdown(false), 100);
}
return {
success: true,
message: `Daemon will shutdown ${graceful ? 'gracefully' : 'immediately'} in ${timeout}ms`,
};
},
);
// Heartbeat handler
this.ipcServer.onMessage(
'heartbeat',
async (request: RequestForMethod<'heartbeat'>) => {
return {
timestamp: Date.now(),
status: this.isShuttingDown ? 'degraded' : 'healthy',
};
},
);
}
/**
* Start heartbeat monitoring
*/
private startHeartbeatMonitoring(): void {
// Send heartbeat every 30 seconds
this.heartbeatInterval = setInterval(() => {
// This is where we could implement health checks
// For now, just log that the daemon is alive
const uptime = Math.floor((Date.now() - this.startTime) / 1000);
console.log(
`[Heartbeat] Daemon alive - Uptime: ${uptime}s, Processes: ${this.tspmInstance.processes.size}`,
);
}, 30000);
}
/**
* Set up graceful shutdown handlers
*/
private setupShutdownHandlers(): void {
const shutdownHandler = async (signal: string) => {
console.log(`\nReceived ${signal}, initiating graceful shutdown...`);
await this.shutdown(true);
};
process.on('SIGTERM', () => shutdownHandler('SIGTERM'));
process.on('SIGINT', () => shutdownHandler('SIGINT'));
process.on('SIGHUP', () => shutdownHandler('SIGHUP'));
// Handle uncaught errors
process.on('uncaughtException', (error) => {
console.error('Uncaught exception:', error);
this.shutdown(false);
});
process.on('unhandledRejection', (reason, promise) => {
console.error('Unhandled rejection at:', promise, 'reason:', reason);
// Don't exit on unhandled rejection, just log it
});
}
/**
* Shutdown the daemon
*/
public async shutdown(graceful: boolean = true): Promise<void> {
if (this.isShuttingDown) {
return;
}
this.isShuttingDown = true;
console.log('Shutting down TSPM daemon...');
// Clear heartbeat interval
if (this.heartbeatInterval) {
clearInterval(this.heartbeatInterval);
this.heartbeatInterval = null;
}
if (graceful) {
// Stop all processes gracefully
try {
console.log('Stopping all managed processes...');
await this.tspmInstance.stopAll();
} catch (error) {
console.error('Error stopping processes:', error);
}
}
// Stop IPC server
if (this.ipcServer) {
try {
await this.ipcServer.stop();
} catch (error) {
console.error('Error stopping IPC server:', error);
}
}
// Remove PID file
await this.removePidFile();
// Remove socket file if it exists
try {
const fs = await import('fs');
await fs.promises.unlink(this.socketPath).catch(() => {});
} catch (error) {
// Ignore errors
}
console.log('TSPM daemon shutdown complete');
process.exit(0);
}
/**
* Check if another daemon instance is running
*/
private async isDaemonRunning(): Promise<boolean> {
try {
const fs = await import('fs');
const pidContent = await fs.promises.readFile(
this.daemonPidFile,
'utf-8',
);
const pid = parseInt(pidContent.trim(), 10);
// Check if process is running
try {
process.kill(pid, 0);
return true; // Process exists
} catch {
// Process doesn't exist, clean up stale PID file
await this.removePidFile();
return false;
}
} catch {
// PID file doesn't exist
return false;
}
}
/**
* Write the daemon PID to a file
*/
private async writePidFile(): Promise<void> {
const fs = await import('fs');
await fs.promises.writeFile(this.daemonPidFile, process.pid.toString());
}
/**
* Remove the daemon PID file
*/
private async removePidFile(): Promise<void> {
try {
const fs = await import('fs');
await fs.promises.unlink(this.daemonPidFile);
} catch {
// Ignore if file doesn't exist
}
}
}
/**
* Main entry point for the daemon
*/
export const startDaemon = async (): Promise<void> => {
const daemon = new TspmDaemon();
await daemon.start();
// Keep the process alive
await new Promise(() => {});
};
// If this file is run directly (not imported), start the daemon
if (process.env.TSPM_DAEMON_MODE === 'true') {
startDaemon().catch((error) => {
console.error('Failed to start TSPM daemon:', error);
process.exit(1);
});
}