feat(cli/daemon/processmonitor): Add flexible target resolution and search command; improve restart/backoff and error handling
This commit is contained in:
@@ -156,6 +156,11 @@ export class ProcessManager extends EventEmitter {
|
||||
this.updateProcessInfo(config.id, { pid: undefined });
|
||||
});
|
||||
|
||||
// Set up failure handler to mark process as errored
|
||||
monitor.on('failed', () => {
|
||||
this.updateProcessInfo(config.id, { status: 'errored', pid: undefined });
|
||||
});
|
||||
|
||||
await monitor.start();
|
||||
|
||||
// Wait a moment for the process to spawn and get its PID
|
||||
@@ -327,6 +332,11 @@ export class ProcessManager extends EventEmitter {
|
||||
});
|
||||
}
|
||||
|
||||
// Mark errored on failure events
|
||||
newMonitor.on('failed', () => {
|
||||
this.updateProcessInfo(id, { status: 'errored', pid: undefined });
|
||||
});
|
||||
|
||||
this.logger.info(`Successfully restarted process with id '${id}'`);
|
||||
} catch (error: Error | unknown) {
|
||||
const processError = new ProcessError(
|
||||
|
@@ -18,6 +18,10 @@ export class ProcessMonitor extends EventEmitter {
|
||||
private processId?: ProcessId;
|
||||
private currentLogMemorySize: number = 0;
|
||||
private readonly MAX_LOG_MEMORY_SIZE = 10 * 1024 * 1024; // 10MB
|
||||
private restartTimer: NodeJS.Timeout | null = null;
|
||||
private lastRetryAt: number | null = null;
|
||||
private readonly MAX_RETRIES = 10;
|
||||
private readonly RESET_WINDOW_MS = 60 * 60 * 1000; // 1 hour
|
||||
|
||||
constructor(config: IMonitorConfig & { id?: ProcessId }) {
|
||||
super();
|
||||
@@ -132,10 +136,7 @@ export class ProcessMonitor extends EventEmitter {
|
||||
this.emit('exit', code, signal);
|
||||
|
||||
if (!this.stopped) {
|
||||
this.logger.info('Restarting process...');
|
||||
this.log('Restarting process...');
|
||||
this.restartCount++;
|
||||
this.spawnProcess();
|
||||
this.scheduleRestart('exit');
|
||||
} else {
|
||||
this.logger.debug(
|
||||
'Not restarting process because monitor is stopped',
|
||||
@@ -164,10 +165,7 @@ export class ProcessMonitor extends EventEmitter {
|
||||
}
|
||||
|
||||
if (!this.stopped) {
|
||||
this.logger.info('Restarting process due to error...');
|
||||
this.log('Restarting process due to error...');
|
||||
this.restartCount++;
|
||||
this.spawnProcess();
|
||||
this.scheduleRestart('error');
|
||||
} else {
|
||||
this.logger.debug('Not restarting process because monitor is stopped');
|
||||
}
|
||||
@@ -185,6 +183,49 @@ export class ProcessMonitor extends EventEmitter {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Schedule a restart with incremental debounce and failure cutoff.
|
||||
*/
|
||||
private scheduleRestart(reason: 'exit' | 'error'): void {
|
||||
const now = Date.now();
|
||||
// Reset window: if last retry was more than 1 hour ago, reset counter
|
||||
if (this.lastRetryAt && now - this.lastRetryAt >= this.RESET_WINDOW_MS) {
|
||||
this.logger.info('Resetting retry counter after 1 hour window');
|
||||
this.restartCount = 0;
|
||||
}
|
||||
|
||||
// Already at or above max retries?
|
||||
if (this.restartCount >= this.MAX_RETRIES) {
|
||||
const msg = 'Maximum restart attempts reached. Marking process as failed.';
|
||||
this.logger.warn(msg);
|
||||
this.log(msg);
|
||||
this.stopped = true;
|
||||
// Emit a specific event so manager can set status to errored
|
||||
this.emit('failed');
|
||||
return;
|
||||
}
|
||||
|
||||
// Increment and compute delay (1..10 seconds)
|
||||
this.restartCount++;
|
||||
const delaySec = Math.min(this.restartCount, 10);
|
||||
const msg = `Restarting process in ${delaySec}s (attempt ${this.restartCount}/${this.MAX_RETRIES}) due to ${reason}...`;
|
||||
this.logger.info(msg);
|
||||
this.log(msg);
|
||||
|
||||
// Clear existing timer if any, then schedule
|
||||
if (this.restartTimer) {
|
||||
clearTimeout(this.restartTimer);
|
||||
}
|
||||
this.lastRetryAt = now;
|
||||
this.restartTimer = setTimeout(() => {
|
||||
// If stopped in the meantime, do not spawn
|
||||
if (this.stopped) {
|
||||
return;
|
||||
}
|
||||
this.spawnProcess();
|
||||
}, delaySec * 1000);
|
||||
}
|
||||
|
||||
/**
|
||||
* Monitor the process group's memory usage. If the total memory exceeds the limit,
|
||||
* kill the process group so that the 'exit' handler can restart it.
|
||||
|
@@ -208,6 +208,8 @@ export class TspmDaemon {
|
||||
async (request: RequestForMethod<'delete'>) => {
|
||||
try {
|
||||
const id = toProcessId(request.id);
|
||||
// Ensure desired state reflects stopped before deletion
|
||||
await this.tspmInstance.setDesiredState(id, 'stopped');
|
||||
await this.tspmInstance.delete(id);
|
||||
return {
|
||||
success: true,
|
||||
@@ -246,18 +248,7 @@ export class TspmDaemon {
|
||||
},
|
||||
);
|
||||
|
||||
this.ipcServer.onMessage(
|
||||
'remove',
|
||||
async (request: RequestForMethod<'remove'>) => {
|
||||
try {
|
||||
const id = toProcessId(request.id);
|
||||
await this.tspmInstance.delete(id);
|
||||
return { success: true, message: `Process ${id} deleted successfully` };
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to remove process: ${error.message}`);
|
||||
}
|
||||
},
|
||||
);
|
||||
// Note: 'remove' is only a CLI alias. Daemon exposes 'delete' only.
|
||||
|
||||
this.ipcServer.onMessage(
|
||||
'list',
|
||||
@@ -291,6 +282,58 @@ export class TspmDaemon {
|
||||
},
|
||||
);
|
||||
|
||||
// Resolve target (id:n | name:foo | numeric string) to ProcessId
|
||||
this.ipcServer.onMessage(
|
||||
'resolveTarget',
|
||||
async (request: RequestForMethod<'resolveTarget'>) => {
|
||||
const raw = String(request.target || '').trim();
|
||||
if (!raw) {
|
||||
throw new Error('Empty target');
|
||||
}
|
||||
|
||||
// id:<n>
|
||||
if (/^id:\s*\d+$/i.test(raw)) {
|
||||
const idNum = raw.split(':')[1].trim();
|
||||
const id = toProcessId(idNum);
|
||||
const config = this.tspmInstance.processConfigs.get(id);
|
||||
if (!config) throw new Error(`Process ${id} not found`);
|
||||
return { id, name: config.name } as ResponseForMethod<'resolveTarget'>;
|
||||
}
|
||||
|
||||
// name:<label>
|
||||
if (/^name:/i.test(raw)) {
|
||||
const name = raw.slice(raw.indexOf(':') + 1).trim();
|
||||
if (!name) throw new Error('Missing name after name:');
|
||||
const matches = Array.from(this.tspmInstance.processConfigs.values()).filter(
|
||||
(c) => (c.name || '').trim() === name,
|
||||
);
|
||||
if (matches.length === 0) {
|
||||
throw new Error(`No process found with name "${name}"`);
|
||||
}
|
||||
if (matches.length > 1) {
|
||||
const ids = matches.map((c) => String(c.id)).join(', ');
|
||||
throw new Error(
|
||||
`Multiple processes found with name "${name}": ids [${ids}]. Please use id:<n>.`,
|
||||
);
|
||||
}
|
||||
return { id: matches[0].id, name } as ResponseForMethod<'resolveTarget'>;
|
||||
}
|
||||
|
||||
// bare numeric id
|
||||
if (/^\d+$/.test(raw)) {
|
||||
const id = toProcessId(raw);
|
||||
const config = this.tspmInstance.processConfigs.get(id);
|
||||
if (!config) throw new Error(`Process ${id} not found`);
|
||||
return { id, name: config.name } as ResponseForMethod<'resolveTarget'>;
|
||||
}
|
||||
|
||||
// Unknown format
|
||||
throw new Error(
|
||||
'Unsupported target format. Use numeric id (e.g. 1), id:<n> (e.g. id:1), or name:<label> (e.g. name:api).',
|
||||
);
|
||||
},
|
||||
);
|
||||
|
||||
// Batch operations handlers
|
||||
this.ipcServer.onMessage(
|
||||
'startAll',
|
||||
|
Reference in New Issue
Block a user