Compare commits

...

4 Commits

Author SHA1 Message Date
1c4ffbb612 5.6.2
Some checks failed
Default (tags) / security (push) Successful in 50s
Default (tags) / test (push) Failing after 12m37s
Default (tags) / release (push) Has been cancelled
Default (tags) / metadata (push) Has been cancelled
2025-08-31 07:45:48 +00:00
0a75c4cf76 fix(processmanager): Improve process lifecycle handling and cleanup in daemon, monitors and wrappers 2025-08-31 07:45:47 +00:00
8f31672a67 5.6.1
Some checks failed
Default (tags) / security (push) Successful in 51s
Default (tags) / test (push) Failing after 3m57s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-08-31 00:01:50 +00:00
b3087831e2 fix(daemon): Ensure robust process shutdown and improve logs/subscriber diagnostics 2025-08-31 00:01:50 +00:00
9 changed files with 135 additions and 30 deletions

View File

@@ -1,5 +1,23 @@
# Changelog
## 2025-08-31 - 5.6.2 - fix(processmanager)
Improve process lifecycle handling and cleanup in daemon, monitors and wrappers
- StartAll: when a monitor exists but is not running, restart it instead of skipping — ensures saved processes are reliably brought online.
- ProcessMonitor.stop: cancel any pending restart timers to prevent stray restarts after explicit stop.
- ProcessWrapper: add killProcessTree helper and use it for graceful (SIGTERM) and force (SIGKILL) shutdowns to reliably signal child processes.
- Daemon stopAll: yield briefly after stopping processes and inspect monitors (not only processInfo) to accurately report stopped vs failed processes.
## 2025-08-31 - 5.6.1 - fix(daemon)
Ensure robust process shutdown and improve logs/subscriber diagnostics
- Make ProcessWrapper.stop asynchronous and awaitable to avoid race conditions when stopping processes
- Signal entire process groups on POSIX (kill by negative PID) and fall back to per-PID signalling; escalate to SIGKILL after a timeout
- Await processWrapper.stop() from ProcessMonitor when enforcing memory limits or handling exits/errors to ensure child processes are cleaned up
- Add logs:subscribers IPC endpoint and corresponding types to inspect current subscribers for a process log topic
- Add optional CLI debug output in logs command (enabled via TSPM_DEBUG=true) to print subscriber counts and details
- Support passing request.lines to getLogs handler in daemon to limit returned log entries
## 2025-08-30 - 5.6.0 - feat(processmonitor)
Add CPU monitoring and display CPU in process list

View File

@@ -1,6 +1,6 @@
{
"name": "@git.zone/tspm",
"version": "5.6.0",
"version": "5.6.2",
"private": false,
"description": "a no fuzz process manager",
"main": "dist_ts/index.js",

View File

@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@git.zone/tspm',
version: '5.6.0',
version: '5.6.2',
description: 'a no fuzz process manager'
}

View File

@@ -144,6 +144,13 @@ export function registerLogsCommand(smartcli: plugins.smartcli.Smartcli) {
await withStreamingLifecycle(
async () => {
// Optional: debug subscribers if requested via env (hidden)
if (process.env.TSPM_DEBUG === 'true') {
try {
const subInfo = await tspmIpcClient.request('logs:subscribers' as any, { id });
console.log(`[DEBUG] Subscribers for logs.${id}: ${subInfo.count} (${(subInfo.subscribers||[]).join(',')})`);
} catch {}
}
await tspmIpcClient.subscribe(id, (log: any) => {
// Reset sequence if runId changed (e.g., process restarted)
if (log.runId && log.runId !== lastRunId) {

View File

@@ -499,8 +499,12 @@ export class ProcessManager extends EventEmitter {
*/
public async startAll(): Promise<void> {
for (const [id, config] of this.processConfigs.entries()) {
if (!this.processes.has(id)) {
const monitor = this.processes.get(id);
if (!monitor) {
await this.start(config);
} else if (!monitor.isRunning()) {
// If a monitor exists but is not running, restart the process to ensure a clean start
await this.restart(id);
}
}
}

View File

@@ -291,7 +291,7 @@ export class ProcessMonitor extends EventEmitter {
// Stop the process wrapper, which will trigger the exit handler and restart
if (this.processWrapper) {
this.processWrapper.stop();
await this.processWrapper.stop();
}
}
} catch (error: Error | unknown) {
@@ -400,6 +400,11 @@ export class ProcessMonitor extends EventEmitter {
if (this.intervalId) {
clearInterval(this.intervalId);
}
// Cancel any pending restart timer
if (this.restartTimer) {
clearTimeout(this.restartTimer);
this.restartTimer = null;
}
if (this.processWrapper) {
// Clear pidusage state for current PID before stopping to avoid leaks
try {
@@ -408,7 +413,7 @@ export class ProcessMonitor extends EventEmitter {
(plugins.pidusage as any)?.clear?.(pidToClear);
}
} catch {}
this.processWrapper.stop();
await this.processWrapper.stop();
}
}

View File

@@ -23,6 +23,26 @@ export class ProcessWrapper extends EventEmitter {
private runId: string = '';
private stdoutRemainder: string = '';
private stderrRemainder: string = '';
// Helper: send a signal to the process and all its children (best-effort)
private async killProcessTree(signal: NodeJS.Signals): Promise<void> {
if (!this.process || !this.process.pid) return;
const rootPid = this.process.pid;
await new Promise<void>((resolve) => {
plugins.psTree(rootPid, (err: any, children: ReadonlyArray<{ PID: string }>) => {
const pids: number[] = [rootPid, ...children.map((c) => Number(c.PID)).filter((n) => Number.isFinite(n))];
for (const pid of pids) {
try {
// Always signal individual PIDs to avoid accidentally targeting unrelated groups
process.kill(pid, signal);
} catch {
// ignore ESRCH/EPERM
}
}
resolve();
});
});
}
constructor(options: IProcessWrapperOptions) {
super();
@@ -180,7 +200,7 @@ export class ProcessWrapper extends EventEmitter {
/**
* Stop the wrapped process
*/
public stop(): void {
public async stop(): Promise<void> {
if (!this.process) {
this.logger.debug('Stop called but no process is running');
this.addSystemLog('No process running');
@@ -193,30 +213,46 @@ export class ProcessWrapper extends EventEmitter {
// First try SIGTERM for graceful shutdown
if (this.process.pid) {
try {
this.logger.debug(`Sending SIGTERM to process ${this.process.pid}`);
process.kill(this.process.pid, 'SIGTERM');
this.logger.debug(`Sending SIGTERM to process tree rooted at ${this.process.pid}`);
await this.killProcessTree('SIGTERM');
// Give it 5 seconds to shut down gracefully
setTimeout((): void => {
if (this.process && this.process.pid) {
// If the process already exited, return immediately
if (typeof this.process.exitCode === 'number') {
this.logger.debug('Process already exited, no need to wait');
return;
}
// Wait for exit or escalate
await new Promise<void>((resolve) => {
let settled = false;
const cleanup = () => {
if (settled) return;
settled = true;
resolve();
};
const onExit = () => cleanup();
this.process!.once('exit', onExit);
const killTimer = setTimeout(async () => {
if (!this.process || !this.process.pid) return cleanup();
this.logger.warn(
`Process ${this.process.pid} did not exit gracefully, force killing...`,
);
this.addSystemLog(
'Process did not exit gracefully, force killing...',
`Process ${this.process.pid} did not exit gracefully, force killing tree...`,
);
this.addSystemLog('Process did not exit gracefully, force killing...');
try {
process.kill(this.process.pid, 'SIGKILL');
} catch (error: Error | unknown) {
// Process might have exited between checks
this.logger.debug(
`Failed to send SIGKILL, process probably already exited: ${
error instanceof Error ? error.message : String(error)
}`,
);
}
}
}, 5000);
await this.killProcessTree('SIGKILL');
} catch {}
// Give a short grace period after SIGKILL
setTimeout(() => cleanup(), 500);
}, 5000);
// Safety cap in case neither exit nor timer fires (shouldn't happen)
setTimeout(() => {
clearTimeout(killTimer);
cleanup();
}, 10000);
});
} catch (error: Error | unknown) {
const processError = new ProcessError(
error instanceof Error ? error.message : String(error),

View File

@@ -293,7 +293,8 @@ export class TspmDaemon {
this.ipcServer.onMessage(
'getLogs',
async (request: RequestForMethod<'getLogs'>) => {
const logs = await this.tspmInstance.getLogs(toProcessId(request.id));
const id = toProcessId(request.id);
const logs = await this.tspmInstance.getLogs(id, request.lines);
return { logs };
},
);
@@ -346,6 +347,26 @@ export class TspmDaemon {
},
);
// Inspect subscribers for a process log topic
this.ipcServer.onMessage(
'logs:subscribers',
async (
request: RequestForMethod<'logs:subscribers'>,
clientId: string,
) => {
const id = toProcessId(request.id);
const topic = `logs.${id}`;
try {
const topicIndex = (this.ipcServer as any).topicIndex as Map<string, Set<string>> | undefined;
const subs = Array.from(topicIndex?.get(topic) || []);
// Also include the requesting clientId if it has a local handler without subscription
return { topic, subscribers: subs, count: subs.length } as any;
} catch (err: any) {
return { topic, subscribers: [], count: 0 } as any;
}
},
);
// Resolve target (id:n | name:foo | numeric string) to ProcessId
this.ipcServer.onMessage(
'resolveTarget',
@@ -429,10 +450,12 @@ export class TspmDaemon {
await this.tspmInstance.setDesiredStateForAll('stopped');
await this.tspmInstance.stopAll();
// Yield briefly to allow any pending exit events to settle
await new Promise((r) => setTimeout(r, 50));
// Get status of all processes
for (const [id, processInfo] of this.tspmInstance.processInfo) {
if (processInfo.status === 'stopped') {
// Determine which monitors are no longer running
for (const [id, monitor] of this.tspmInstance.processes) {
if (!monitor.isRunning()) {
stopped.push(id);
} else {
failed.push({ id, error: 'Failed to stop' });

View File

@@ -151,6 +151,17 @@ export interface LogsSubscribeResponse {
ok: boolean;
}
// Inspect current subscribers for a process log topic
export interface LogsSubscribersRequest {
id: ProcessId;
}
export interface LogsSubscribersResponse {
topic: string;
subscribers: string[];
count: number;
}
// Start all command
export interface StartAllRequest {
// No parameters needed
@@ -287,6 +298,7 @@ export type IpcMethodMap = {
describe: { request: DescribeRequest; response: DescribeResponse };
getLogs: { request: GetLogsRequest; response: GetLogsResponse };
'logs:subscribe': { request: LogsSubscribeRequest; response: LogsSubscribeResponse };
'logs:subscribers': { request: LogsSubscribersRequest; response: LogsSubscribersResponse };
startAll: { request: StartAllRequest; response: StartAllResponse };
stopAll: { request: StopAllRequest; response: StopAllResponse };
restartAll: { request: RestartAllRequest; response: RestartAllResponse };