feat(cli/daemon/processmonitor): Add flexible target resolution and search command; improve restart/backoff and error handling

This commit is contained in:
2025-08-30 16:55:10 +00:00
parent 22a43204d4
commit ebc20a9232
17 changed files with 327 additions and 109 deletions

View File

@@ -156,6 +156,11 @@ export class ProcessManager extends EventEmitter {
this.updateProcessInfo(config.id, { pid: undefined });
});
// Set up failure handler to mark process as errored
monitor.on('failed', () => {
this.updateProcessInfo(config.id, { status: 'errored', pid: undefined });
});
await monitor.start();
// Wait a moment for the process to spawn and get its PID
@@ -327,6 +332,11 @@ export class ProcessManager extends EventEmitter {
});
}
// Mark errored on failure events
newMonitor.on('failed', () => {
this.updateProcessInfo(id, { status: 'errored', pid: undefined });
});
this.logger.info(`Successfully restarted process with id '${id}'`);
} catch (error: Error | unknown) {
const processError = new ProcessError(

View File

@@ -18,6 +18,10 @@ export class ProcessMonitor extends EventEmitter {
private processId?: ProcessId;
private currentLogMemorySize: number = 0;
private readonly MAX_LOG_MEMORY_SIZE = 10 * 1024 * 1024; // 10MB
private restartTimer: NodeJS.Timeout | null = null;
private lastRetryAt: number | null = null;
private readonly MAX_RETRIES = 10;
private readonly RESET_WINDOW_MS = 60 * 60 * 1000; // 1 hour
constructor(config: IMonitorConfig & { id?: ProcessId }) {
super();
@@ -132,10 +136,7 @@ export class ProcessMonitor extends EventEmitter {
this.emit('exit', code, signal);
if (!this.stopped) {
this.logger.info('Restarting process...');
this.log('Restarting process...');
this.restartCount++;
this.spawnProcess();
this.scheduleRestart('exit');
} else {
this.logger.debug(
'Not restarting process because monitor is stopped',
@@ -164,10 +165,7 @@ export class ProcessMonitor extends EventEmitter {
}
if (!this.stopped) {
this.logger.info('Restarting process due to error...');
this.log('Restarting process due to error...');
this.restartCount++;
this.spawnProcess();
this.scheduleRestart('error');
} else {
this.logger.debug('Not restarting process because monitor is stopped');
}
@@ -185,6 +183,49 @@ export class ProcessMonitor extends EventEmitter {
}
}
/**
* Schedule a restart with incremental debounce and failure cutoff.
*/
private scheduleRestart(reason: 'exit' | 'error'): void {
const now = Date.now();
// Reset window: if last retry was more than 1 hour ago, reset counter
if (this.lastRetryAt && now - this.lastRetryAt >= this.RESET_WINDOW_MS) {
this.logger.info('Resetting retry counter after 1 hour window');
this.restartCount = 0;
}
// Already at or above max retries?
if (this.restartCount >= this.MAX_RETRIES) {
const msg = 'Maximum restart attempts reached. Marking process as failed.';
this.logger.warn(msg);
this.log(msg);
this.stopped = true;
// Emit a specific event so manager can set status to errored
this.emit('failed');
return;
}
// Increment and compute delay (1..10 seconds)
this.restartCount++;
const delaySec = Math.min(this.restartCount, 10);
const msg = `Restarting process in ${delaySec}s (attempt ${this.restartCount}/${this.MAX_RETRIES}) due to ${reason}...`;
this.logger.info(msg);
this.log(msg);
// Clear existing timer if any, then schedule
if (this.restartTimer) {
clearTimeout(this.restartTimer);
}
this.lastRetryAt = now;
this.restartTimer = setTimeout(() => {
// If stopped in the meantime, do not spawn
if (this.stopped) {
return;
}
this.spawnProcess();
}, delaySec * 1000);
}
/**
* Monitor the process group's memory usage. If the total memory exceeds the limit,
* kill the process group so that the 'exit' handler can restart it.

View File

@@ -208,6 +208,8 @@ export class TspmDaemon {
async (request: RequestForMethod<'delete'>) => {
try {
const id = toProcessId(request.id);
// Ensure desired state reflects stopped before deletion
await this.tspmInstance.setDesiredState(id, 'stopped');
await this.tspmInstance.delete(id);
return {
success: true,
@@ -246,18 +248,7 @@ export class TspmDaemon {
},
);
this.ipcServer.onMessage(
'remove',
async (request: RequestForMethod<'remove'>) => {
try {
const id = toProcessId(request.id);
await this.tspmInstance.delete(id);
return { success: true, message: `Process ${id} deleted successfully` };
} catch (error) {
throw new Error(`Failed to remove process: ${error.message}`);
}
},
);
// Note: 'remove' is only a CLI alias. Daemon exposes 'delete' only.
this.ipcServer.onMessage(
'list',
@@ -291,6 +282,58 @@ export class TspmDaemon {
},
);
// Resolve target (id:n | name:foo | numeric string) to ProcessId
this.ipcServer.onMessage(
'resolveTarget',
async (request: RequestForMethod<'resolveTarget'>) => {
const raw = String(request.target || '').trim();
if (!raw) {
throw new Error('Empty target');
}
// id:<n>
if (/^id:\s*\d+$/i.test(raw)) {
const idNum = raw.split(':')[1].trim();
const id = toProcessId(idNum);
const config = this.tspmInstance.processConfigs.get(id);
if (!config) throw new Error(`Process ${id} not found`);
return { id, name: config.name } as ResponseForMethod<'resolveTarget'>;
}
// name:<label>
if (/^name:/i.test(raw)) {
const name = raw.slice(raw.indexOf(':') + 1).trim();
if (!name) throw new Error('Missing name after name:');
const matches = Array.from(this.tspmInstance.processConfigs.values()).filter(
(c) => (c.name || '').trim() === name,
);
if (matches.length === 0) {
throw new Error(`No process found with name "${name}"`);
}
if (matches.length > 1) {
const ids = matches.map((c) => String(c.id)).join(', ');
throw new Error(
`Multiple processes found with name "${name}": ids [${ids}]. Please use id:<n>.`,
);
}
return { id: matches[0].id, name } as ResponseForMethod<'resolveTarget'>;
}
// bare numeric id
if (/^\d+$/.test(raw)) {
const id = toProcessId(raw);
const config = this.tspmInstance.processConfigs.get(id);
if (!config) throw new Error(`Process ${id} not found`);
return { id, name: config.name } as ResponseForMethod<'resolveTarget'>;
}
// Unknown format
throw new Error(
'Unsupported target format. Use numeric id (e.g. 1), id:<n> (e.g. id:1), or name:<label> (e.g. name:api).',
);
},
);
// Batch operations handlers
this.ipcServer.onMessage(
'startAll',