feat(daemon): Add crash log manager with rotation and integrate crash logging; improve IPC & process listener cleanup

This commit is contained in:
2025-09-01 10:32:51 +00:00
parent 9473924fcc
commit c9d924811d
9 changed files with 932 additions and 31 deletions

View File

@@ -1,5 +1,16 @@
# Changelog
## 2025-09-01 - 5.10.0 - feat(daemon)
Add crash log manager with rotation and integrate crash logging; improve IPC & process listener cleanup
- Introduce CrashLogManager to create formatted crash reports, persist them to disk and rotate old logs (max 100)
- Persist recent process logs, include metadata (exit code, signal, restart attempts, memory) and human-readable sizes in crash reports
- Integrate crash logging into ProcessMonitor: save crash logs on non-zero exits and errors, and persist/rotate logs
- Improve ProcessMonitor and ProcessWrapper by tracking and removing event listeners to avoid memory leaks
- Clear pidusage cache more aggressively to prevent stale entries
- Enhance TspmIpcClient to store/remove lifecycle event handlers on disconnect to avoid dangling listeners
- Add tests and utilities: test/test.crashlog.direct.ts, test/test.crashlog.manual.ts and test/test.crashlog.ts to validate crash log creation and rotation
## 2025-08-31 - 5.9.0 - feat(cli)
Add interactive edit flow to CLI and improve UX

View File

@@ -0,0 +1,148 @@
#!/usr/bin/env tsx
import { CrashLogManager } from '../ts/daemon/crashlogmanager.js';
import type { IProcessLog } from '../ts/shared/protocol/ipc.types.js';
import * as plugins from '../ts/plugins.js';
import * as paths from '../ts/paths.js';
import * as fs from 'fs/promises';
async function testCrashLogManager() {
console.log('🧪 Testing CrashLogManager directly...\n');
const crashLogManager = new CrashLogManager();
const crashLogsDir = plugins.path.join(paths.tspmDir, 'crashlogs');
// Clean up any existing crash logs
console.log('📁 Cleaning up existing crash logs...');
try {
await fs.rm(crashLogsDir, { recursive: true, force: true });
} catch {}
// Create test logs
const testLogs: IProcessLog[] = [
{
timestamp: Date.now() - 5000,
message: '[TEST] Process starting up...',
type: 'stdout'
},
{
timestamp: Date.now() - 4000,
message: '[TEST] Initializing components...',
type: 'stdout'
},
{
timestamp: Date.now() - 3000,
message: '[TEST] Running main loop...',
type: 'stdout'
},
{
timestamp: Date.now() - 2000,
message: '[TEST] Warning: Memory usage high',
type: 'stderr'
},
{
timestamp: Date.now() - 1000,
message: '[TEST] Error: Unhandled exception occurred!',
type: 'stderr'
},
{
timestamp: Date.now() - 500,
message: '[TEST] Fatal: Process crashing with exit code 42',
type: 'stderr'
}
];
// Test saving a crash log
console.log('💾 Saving crash log...');
await crashLogManager.saveCrashLog(
1 as any, // ProcessId
'test-process',
testLogs,
42, // exit code
null, // signal
3, // restart count
1024 * 1024 * 50 // 50MB memory usage
);
// Check if crash log was created
console.log('🔍 Checking for crash log...');
const crashLogFiles = await fs.readdir(crashLogsDir).catch(() => []);
console.log(` Found ${crashLogFiles.length} crash log files:`);
crashLogFiles.forEach(file => console.log(` - ${file}`));
if (crashLogFiles.length === 0) {
console.error('❌ No crash logs were created!');
process.exit(1);
}
// Read and display the crash log
const crashLogFile = crashLogFiles[0];
const crashLogPath = plugins.path.join(crashLogsDir, crashLogFile);
const crashLogContent = await fs.readFile(crashLogPath, 'utf-8');
console.log('\n📋 Crash log content:');
console.log('─'.repeat(60));
console.log(crashLogContent);
console.log('─'.repeat(60));
// Verify content
const checks = [
{ text: 'CRASH REPORT', found: crashLogContent.includes('CRASH REPORT') },
{ text: 'Exit Code: 42', found: crashLogContent.includes('Exit Code: 42') },
{ text: 'Restart Attempt: 3/10', found: crashLogContent.includes('Restart Attempt: 3/10') },
{ text: 'Memory Usage: 50 MB', found: crashLogContent.includes('Memory Usage: 50 MB') },
{ text: 'Fatal: Process crashing', found: crashLogContent.includes('Fatal: Process crashing') }
];
console.log('\n✅ Verification:');
checks.forEach(check => {
console.log(` ${check.found ? '✓' : '✗'} Contains "${check.text}"`);
});
const allChecksPassed = checks.every(c => c.found);
// Test rotation (create 100+ logs to test limit)
console.log('\n🔄 Testing rotation (creating 105 crash logs)...');
for (let i = 2; i <= 105; i++) {
await crashLogManager.saveCrashLog(
i as any,
`test-process-${i}`,
testLogs,
i,
null,
1,
1024 * 1024 * 10
);
// Small delay to ensure different timestamps
await new Promise(resolve => setTimeout(resolve, 10));
}
// Check that we have exactly 100 logs (rotation working)
const finalLogFiles = await fs.readdir(crashLogsDir);
console.log(` After rotation: ${finalLogFiles.length} crash logs (should be 100)`);
if (finalLogFiles.length !== 100) {
console.error(`❌ Rotation failed! Expected 100 logs, got ${finalLogFiles.length}`);
process.exit(1);
}
// Verify oldest logs were deleted (test-process should be gone)
const hasOriginal = finalLogFiles.some(f => f.includes('_1_test-process.log'));
if (hasOriginal) {
console.error('❌ Rotation failed! Oldest log still exists');
process.exit(1);
}
if (allChecksPassed) {
console.log('\n✅ All crash log tests passed!');
} else {
console.log('\n❌ Some crash log tests failed!');
process.exit(1);
}
}
// Run the test
testCrashLogManager().catch(error => {
console.error('❌ Test failed:', error);
process.exit(1);
});

View File

@@ -0,0 +1,137 @@
#!/usr/bin/env tsx
import * as plugins from '../ts/plugins.js';
import * as paths from '../ts/paths.js';
import * as fs from 'fs/promises';
import { execSync } from 'child_process';
// Test process that will crash
const CRASH_SCRIPT = `
setInterval(() => {
console.log('[test] Process is running...');
}, 1000);
setTimeout(() => {
console.error('[test] About to crash with non-zero exit code!');
process.exit(42);
}, 3000);
`;
async function testCrashLog() {
console.log('🧪 Testing crash log functionality...\n');
const crashScriptPath = plugins.path.join(paths.tspmDir, 'test-crash-script.js');
const crashLogsDir = plugins.path.join(paths.tspmDir, 'crashlogs');
try {
// Clean up any existing crash logs
console.log('📁 Cleaning up existing crash logs...');
try {
await fs.rm(crashLogsDir, { recursive: true, force: true });
} catch {}
// Write the crash script
console.log('📝 Writing test crash script...');
await fs.writeFile(crashScriptPath, CRASH_SCRIPT);
// Stop any existing daemon
console.log('🛑 Stopping any existing daemon...');
try {
execSync('tsx ts/cli.ts daemon stop', { stdio: 'inherit' });
} catch {}
await new Promise(resolve => setTimeout(resolve, 1000));
// Start the daemon
console.log('🚀 Starting daemon...');
execSync('tsx ts/cli.ts daemon start', { stdio: 'inherit' });
await new Promise(resolve => setTimeout(resolve, 2000));
// Add a process that will crash
console.log(' Adding crash test process...');
const addOutput = execSync(`tsx ts/cli.ts add "node ${crashScriptPath}" --name crash-test`, { encoding: 'utf-8' });
console.log(addOutput);
// Extract process ID from output
const idMatch = addOutput.match(/Process added with ID: (\d+)/);
if (!idMatch) {
throw new Error('Could not extract process ID from output');
}
const processId = parseInt(idMatch[1]);
console.log(` Process ID: ${processId}`);
// Start the process
console.log('▶️ Starting process that will crash...');
execSync(`tsx ts/cli.ts start ${processId}`, { stdio: 'inherit' });
// Wait for the process to crash (it crashes after 3 seconds)
console.log('⏳ Waiting for process to crash...');
await new Promise(resolve => setTimeout(resolve, 5000));
// Check if crash log was created
console.log('🔍 Checking for crash log...');
const crashLogFiles = await fs.readdir(crashLogsDir).catch(() => []);
console.log(` Found ${crashLogFiles.length} crash log files:`);
crashLogFiles.forEach(file => console.log(` - ${file}`));
if (crashLogFiles.length === 0) {
throw new Error('No crash logs were created!');
}
// Find the crash log for our test process
const testCrashLog = crashLogFiles.find(file => file.includes('crash-test'));
if (!testCrashLog) {
throw new Error('Could not find crash log for test process');
}
// Read and display crash log content
const crashLogPath = plugins.path.join(crashLogsDir, testCrashLog);
const crashLogContent = await fs.readFile(crashLogPath, 'utf-8');
console.log('\n📋 Crash log content:');
console.log('─'.repeat(60));
console.log(crashLogContent);
console.log('─'.repeat(60));
// Verify crash log contains expected information
const checks = [
{ text: 'CRASH REPORT', found: crashLogContent.includes('CRASH REPORT') },
{ text: 'Exit Code: 42', found: crashLogContent.includes('Exit Code: 42') },
{ text: 'About to crash', found: crashLogContent.includes('About to crash') },
{ text: 'Process is running', found: crashLogContent.includes('Process is running') }
];
console.log('\n✅ Verification:');
checks.forEach(check => {
console.log(` ${check.found ? '✓' : '✗'} Contains "${check.text}"`);
});
const allChecksPassed = checks.every(c => c.found);
// Clean up
console.log('\n🧹 Cleaning up...');
execSync(`tsx ts/cli.ts delete ${processId}`, { stdio: 'inherit' });
execSync('tsx ts/cli.ts daemon stop', { stdio: 'inherit' });
await fs.unlink(crashScriptPath).catch(() => {});
if (allChecksPassed) {
console.log('\n✅ All crash log tests passed!');
} else {
console.log('\n❌ Some crash log tests failed!');
process.exit(1);
}
} catch (error) {
console.error('\n❌ Test failed:', error);
// Clean up on error
try {
execSync('tsx ts/cli.ts daemon stop', { stdio: 'inherit' });
await fs.unlink(crashScriptPath).catch(() => {});
} catch {}
process.exit(1);
}
}
// Run the test
testCrashLog();

172
test/test.crashlog.ts Normal file
View File

@@ -0,0 +1,172 @@
import { tap, expect } from '@git.zone/tstest/tapbundle';
import * as plugins from '../ts/plugins.js';
import * as paths from '../ts/paths.js';
import * as fs from 'fs/promises';
// Import tspm client
import { tspmIpcClient } from '../ts/client/tspm.ipcclient.js';
// Test process that will crash
const CRASH_SCRIPT = `
setInterval(() => {
console.log('[test] Process is running...');
}, 1000);
setTimeout(() => {
console.error('[test] About to crash with non-zero exit code!');
process.exit(42);
}, 3000);
`;
tap.test('should create crash logs when process crashes', async (tools) => {
const crashScriptPath = plugins.path.join(paths.tspmDir, 'test-crash-script.js');
const crashLogsDir = plugins.path.join(paths.tspmDir, 'crashlogs');
// Clean up any existing crash logs
try {
await fs.rm(crashLogsDir, { recursive: true, force: true });
} catch {}
// Write the crash script
await fs.writeFile(crashScriptPath, CRASH_SCRIPT);
// Start the daemon
console.log('Starting daemon...');
const daemonResult = await tools.runCommand('tsx ts/cli/tspm.cli.ts daemon start');
expect(daemonResult.exitCode).toEqual(0);
// Wait for daemon to be ready
await tools.wait(2000);
// Add a process that will crash
console.log('Adding crash test process...');
const addResult = await tools.runCommand(`tsx ts/cli/tspm.cli.ts add "node ${crashScriptPath}" --name crash-test`);
expect(addResult.exitCode).toEqual(0);
// Extract process ID from output
const idMatch = addResult.stdout.match(/Process added with ID: (\d+)/);
expect(idMatch).toBeTruthy();
const processId = parseInt(idMatch![1]);
// Start the process
console.log('Starting process that will crash...');
const startResult = await tools.runCommand(`tsx ts/cli/tspm.cli.ts start ${processId}`);
expect(startResult.exitCode).toEqual(0);
// Wait for the process to crash (it crashes after 3 seconds)
console.log('Waiting for process to crash...');
await tools.wait(5000);
// Check if crash log was created
console.log('Checking for crash log...');
const crashLogFiles = await fs.readdir(crashLogsDir).catch(() => []);
console.log(`Found ${crashLogFiles.length} crash log files:`, crashLogFiles);
// Should have at least one crash log
expect(crashLogFiles.length).toBeGreaterThan(0);
// Find the crash log for our test process
const testCrashLog = crashLogFiles.find(file => file.includes('crash-test'));
expect(testCrashLog).toBeTruthy();
// Read and verify crash log content
const crashLogPath = plugins.path.join(crashLogsDir, testCrashLog!);
const crashLogContent = await fs.readFile(crashLogPath, 'utf-8');
console.log('Crash log content:');
console.log(crashLogContent);
// Verify crash log contains expected information
expect(crashLogContent).toIncludeIgnoreCase('crash report');
expect(crashLogContent).toIncludeIgnoreCase('exit code: 42');
expect(crashLogContent).toIncludeIgnoreCase('About to crash');
// Stop the process
console.log('Cleaning up...');
await tools.runCommand(`tsx ts/cli/tspm.cli.ts delete ${processId}`);
// Stop the daemon
await tools.runCommand('tsx ts/cli/tspm.cli.ts daemon stop');
// Clean up test file
await fs.unlink(crashScriptPath).catch(() => {});
});
tap.test('should create crash logs when process is killed', async (tools) => {
const killScriptPath = plugins.path.join(paths.tspmDir, 'test-kill-script.js');
const crashLogsDir = plugins.path.join(paths.tspmDir, 'crashlogs');
// Write a script that runs indefinitely
const KILL_SCRIPT = `
setInterval(() => {
console.log('[test] Process is running and will be killed...');
}, 500);
`;
await fs.writeFile(killScriptPath, KILL_SCRIPT);
// Start the daemon
console.log('Starting daemon...');
const daemonResult = await tools.runCommand('tsx ts/cli/tspm.cli.ts daemon start');
expect(daemonResult.exitCode).toEqual(0);
// Wait for daemon to be ready
await tools.wait(2000);
// Add a process that we'll kill
console.log('Adding kill test process...');
const addResult = await tools.runCommand(`tsx ts/cli/tspm.cli.ts add "node ${killScriptPath}" --name kill-test`);
expect(addResult.exitCode).toEqual(0);
// Extract process ID
const idMatch = addResult.stdout.match(/Process added with ID: (\d+)/);
expect(idMatch).toBeTruthy();
const processId = parseInt(idMatch![1]);
// Start the process
console.log('Starting process to be killed...');
const startResult = await tools.runCommand(`tsx ts/cli/tspm.cli.ts start ${processId}`);
expect(startResult.exitCode).toEqual(0);
// Wait for process to run a bit
await tools.wait(2000);
// Get the actual PID of the running process
const statusResult = await tools.runCommand(`tsx ts/cli/tspm.cli.ts describe ${processId}`);
const pidMatch = statusResult.stdout.match(/pid:\s+(\d+)/);
if (pidMatch) {
const pid = parseInt(pidMatch[1]);
console.log(`Killing process with PID ${pid}...`);
// Kill the process with SIGTERM
await tools.runCommand(`kill -TERM ${pid}`);
// Wait for crash log to be created
await tools.wait(3000);
// Check for crash log
console.log('Checking for crash log from killed process...');
const crashLogFiles = await fs.readdir(crashLogsDir).catch(() => []);
const killCrashLog = crashLogFiles.find(file => file.includes('kill-test'));
if (killCrashLog) {
const crashLogPath = plugins.path.join(crashLogsDir, killCrashLog);
const crashLogContent = await fs.readFile(crashLogPath, 'utf-8');
console.log('Kill crash log content:');
console.log(crashLogContent);
// Verify it contains signal information
expect(crashLogContent).toIncludeIgnoreCase('signal: SIGTERM');
}
}
// Clean up
console.log('Cleaning up...');
await tools.runCommand(`tsx ts/cli/tspm.cli.ts delete ${processId}`);
await tools.runCommand('tsx ts/cli/tspm.cli.ts daemon stop');
await fs.unlink(killScriptPath).catch(() => {});
});
export default tap.start();

View File

@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@git.zone/tspm',
version: '5.9.0',
version: '5.10.0',
description: 'a no fuzz process manager'
}

View File

@@ -17,6 +17,9 @@ export class TspmIpcClient {
private socketPath: string;
private daemonPidFile: string;
private isConnected: boolean = false;
// Store event handlers for cleanup
private heartbeatTimeoutHandler?: () => void;
private markDisconnectedHandler?: () => void;
constructor() {
this.socketPath = plugins.path.join(paths.tspmDir, 'tspm.sock');
@@ -74,20 +77,21 @@ export class TspmIpcClient {
this.isConnected = true;
// Handle heartbeat timeouts gracefully
this.ipcClient.on('heartbeatTimeout', () => {
this.heartbeatTimeoutHandler = () => {
console.warn('Heartbeat timeout detected, connection may be degraded');
this.isConnected = false;
});
};
this.ipcClient.on('heartbeatTimeout', this.heartbeatTimeoutHandler);
// Reflect connection lifecycle on the client state
const markDisconnected = () => {
this.markDisconnectedHandler = () => {
this.isConnected = false;
};
// Common lifecycle events
this.ipcClient.on('disconnect', markDisconnected as any);
this.ipcClient.on('close', markDisconnected as any);
this.ipcClient.on('end', markDisconnected as any);
this.ipcClient.on('error', markDisconnected as any);
this.ipcClient.on('disconnect', this.markDisconnectedHandler as any);
this.ipcClient.on('close', this.markDisconnectedHandler as any);
this.ipcClient.on('end', this.markDisconnectedHandler as any);
this.ipcClient.on('error', this.markDisconnectedHandler as any);
// connected
} catch (error) {
@@ -103,6 +107,21 @@ export class TspmIpcClient {
*/
public async disconnect(): Promise<void> {
if (this.ipcClient) {
// Remove event listeners before disconnecting
if (this.heartbeatTimeoutHandler) {
this.ipcClient.removeListener('heartbeatTimeout', this.heartbeatTimeoutHandler);
}
if (this.markDisconnectedHandler) {
this.ipcClient.removeListener('disconnect', this.markDisconnectedHandler as any);
this.ipcClient.removeListener('close', this.markDisconnectedHandler as any);
this.ipcClient.removeListener('end', this.markDisconnectedHandler as any);
this.ipcClient.removeListener('error', this.markDisconnectedHandler as any);
}
// Clear handler references
this.heartbeatTimeoutHandler = undefined;
this.markDisconnectedHandler = undefined;
await this.ipcClient.disconnect();
this.ipcClient = null;
this.isConnected = false;

View File

@@ -0,0 +1,265 @@
import * as plugins from '../plugins.js';
import * as paths from '../paths.js';
import type { IProcessLog } from '../shared/protocol/ipc.types.js';
import type { ProcessId } from '../shared/protocol/id.js';
/**
* Manages crash log storage for failed processes
*/
export class CrashLogManager {
private crashLogsDir: string;
private readonly MAX_CRASH_LOGS = 100;
private readonly MAX_LOG_SIZE_BYTES = 1024 * 1024; // 1MB
constructor() {
this.crashLogsDir = plugins.path.join(paths.tspmDir, 'crashlogs');
}
/**
* Save a crash log for a failed process
*/
public async saveCrashLog(
processId: ProcessId,
processName: string,
logs: IProcessLog[],
exitCode: number | null,
signal: string | null,
restartCount: number,
memoryUsage?: number
): Promise<void> {
try {
// Ensure directory exists
await this.ensureCrashLogsDir();
// Generate filename with timestamp
const timestamp = new Date();
const dateStr = this.formatDate(timestamp);
const sanitizedName = this.sanitizeFilename(processName);
const filename = `${dateStr}_${processId}_${sanitizedName}.log`;
const filepath = plugins.path.join(this.crashLogsDir, filename);
// Get recent logs that fit within size limit
const recentLogs = this.getRecentLogs(logs, this.MAX_LOG_SIZE_BYTES);
// Create crash report
const crashReport = this.formatCrashReport({
processId,
processName,
timestamp,
exitCode,
signal,
restartCount,
memoryUsage,
logs: recentLogs
});
// Write crash log
await plugins.smartfile.memory.toFs(crashReport, filepath);
// Rotate old logs if needed
await this.rotateOldLogs();
console.log(`Crash log saved: ${filename}`);
} catch (error) {
console.error(`Failed to save crash log for process ${processId}:`, error);
}
}
/**
* Format date for filename: YYYY-MM-DD_HH-mm-ss
*/
private formatDate(date: Date): string {
const year = date.getFullYear();
const month = String(date.getMonth() + 1).padStart(2, '0');
const day = String(date.getDate()).padStart(2, '0');
const hours = String(date.getHours()).padStart(2, '0');
const minutes = String(date.getMinutes()).padStart(2, '0');
const seconds = String(date.getSeconds()).padStart(2, '0');
return `${year}-${month}-${day}_${hours}-${minutes}-${seconds}`;
}
/**
* Sanitize process name for use in filename
*/
private sanitizeFilename(name: string): string {
// Replace problematic characters with underscore
return name
.replace(/[^a-zA-Z0-9-_]/g, '_')
.replace(/_+/g, '_')
.substring(0, 50); // Limit length
}
/**
* Get recent logs that fit within the size limit
*/
private getRecentLogs(logs: IProcessLog[], maxBytes: number): IProcessLog[] {
if (logs.length === 0) return [];
// Start from the end and work backwards
const recentLogs: IProcessLog[] = [];
let currentSize = 0;
for (let i = logs.length - 1; i >= 0; i--) {
const log = logs[i];
const logSize = this.estimateLogSize(log);
if (currentSize + logSize > maxBytes && recentLogs.length > 0) {
// Would exceed limit, stop adding
break;
}
recentLogs.unshift(log);
currentSize += logSize;
}
return recentLogs;
}
/**
* Estimate size of a log entry in bytes
*/
private estimateLogSize(log: IProcessLog): number {
// Format: [timestamp] [type] message\n
const formatted = `[${new Date(log.timestamp).toISOString()}] [${log.type}] ${log.message}\n`;
return Buffer.byteLength(formatted, 'utf8');
}
/**
* Format a crash report with metadata and logs
*/
private formatCrashReport(data: {
processId: ProcessId;
processName: string;
timestamp: Date;
exitCode: number | null;
signal: string | null;
restartCount: number;
memoryUsage?: number;
logs: IProcessLog[];
}): string {
const lines: string[] = [
'================================================================================',
'TSPM CRASH REPORT',
'================================================================================',
`Process: ${data.processName} (ID: ${data.processId})`,
`Date: ${data.timestamp.toISOString()}`,
`Exit Code: ${data.exitCode ?? 'N/A'}`,
`Signal: ${data.signal ?? 'N/A'}`,
`Restart Attempt: ${data.restartCount}/10`,
];
if (data.memoryUsage !== undefined && data.memoryUsage > 0) {
lines.push(`Memory Usage: ${this.humanReadableBytes(data.memoryUsage)}`);
}
lines.push(
'================================================================================',
'',
`LAST ${data.logs.length} LOG ENTRIES:`,
'--------------------------------------------------------------------------------',
''
);
// Add log entries
for (const log of data.logs) {
const timestamp = new Date(log.timestamp).toISOString();
const type = log.type.toUpperCase().padEnd(6);
lines.push(`[${timestamp}] [${type}] ${log.message}`);
}
lines.push(
'',
'================================================================================',
'END OF CRASH REPORT',
'================================================================================',
''
);
return lines.join('\n');
}
/**
* Convert bytes to human-readable format
*/
private humanReadableBytes(bytes: number): string {
if (bytes === 0) return '0 Bytes';
const k = 1024;
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
}
/**
* Ensure crash logs directory exists
*/
private async ensureCrashLogsDir(): Promise<void> {
await plugins.smartfile.fs.ensureDir(this.crashLogsDir);
}
/**
* Rotate old crash logs when exceeding max count
*/
private async rotateOldLogs(): Promise<void> {
try {
// Get all crash log files
const files = await plugins.smartfile.fs.listFileTree(this.crashLogsDir, '*.log');
if (files.length <= this.MAX_CRASH_LOGS) {
return; // No rotation needed
}
// Get file stats and sort by modification time (oldest first)
const fileStats = await Promise.all(
files.map(async (file) => {
const filepath = plugins.path.join(this.crashLogsDir, file);
const stats = await plugins.smartfile.fs.stat(filepath);
return { filepath, mtime: stats.mtime.getTime() };
})
);
fileStats.sort((a, b) => a.mtime - b.mtime);
// Delete oldest files to stay under limit
const filesToDelete = fileStats.length - this.MAX_CRASH_LOGS;
for (let i = 0; i < filesToDelete; i++) {
await plugins.smartfile.fs.remove(fileStats[i].filepath);
console.log(`Rotated old crash log: ${plugins.path.basename(fileStats[i].filepath)}`);
}
} catch (error) {
console.error('Failed to rotate crash logs:', error);
}
}
/**
* Get list of crash logs for a specific process
*/
public async getCrashLogsForProcess(processId: ProcessId): Promise<string[]> {
try {
await this.ensureCrashLogsDir();
const files = await plugins.smartfile.fs.listFileTree(this.crashLogsDir, `*_${processId}_*.log`);
return files.map(file => plugins.path.join(this.crashLogsDir, file));
} catch (error) {
console.error(`Failed to get crash logs for process ${processId}:`, error);
return [];
}
}
/**
* Clean up all crash logs (for maintenance)
*/
public async cleanupAllCrashLogs(): Promise<void> {
try {
await this.ensureCrashLogsDir();
const files = await plugins.smartfile.fs.listFileTree(this.crashLogsDir, '*.log');
for (const file of files) {
const filepath = plugins.path.join(this.crashLogsDir, file);
await plugins.smartfile.fs.remove(filepath);
}
console.log(`Cleaned up ${files.length} crash logs`);
} catch (error) {
console.error('Failed to cleanup crash logs:', error);
}
}
}

View File

@@ -2,6 +2,7 @@ import * as plugins from '../plugins.js';
import { EventEmitter } from 'events';
import { ProcessWrapper } from './processwrapper.js';
import { LogPersistence } from './logpersistence.js';
import { CrashLogManager } from './crashlogmanager.js';
import { Logger, ProcessError, handleError } from '../shared/common/utils.errorhandler.js';
import type { IMonitorConfig, IProcessLog } from '../shared/protocol/ipc.types.js';
import type { ProcessId } from '../shared/protocol/id.js';
@@ -15,6 +16,7 @@ export class ProcessMonitor extends EventEmitter {
private logger: Logger;
private logs: IProcessLog[] = [];
private logPersistence: LogPersistence;
private crashLogManager: CrashLogManager;
private processId?: ProcessId;
private currentLogMemorySize: number = 0;
private readonly MAX_LOG_MEMORY_SIZE = 10 * 1024 * 1024; // 10MB
@@ -26,6 +28,11 @@ export class ProcessMonitor extends EventEmitter {
private readonly RESET_WINDOW_MS = 60 * 60 * 1000; // 1 hour
private lastMemoryUsage: number = 0;
private lastCpuUsage: number = 0;
// Store event listeners for cleanup
private logHandler?: (log: IProcessLog) => void;
private startHandler?: (pid: number) => void;
private exitHandler?: (code: number | null, signal: string | null) => Promise<void>;
private errorHandler?: (error: Error | ProcessError) => Promise<void>;
constructor(config: IMonitorConfig & { id?: ProcessId }) {
super();
@@ -33,6 +40,7 @@ export class ProcessMonitor extends EventEmitter {
this.logger = new Logger(`ProcessMonitor:${config.name || 'unnamed'}`);
this.logs = [];
this.logPersistence = new LogPersistence();
this.crashLogManager = new CrashLogManager();
this.processId = config.id;
this.currentLogMemorySize = 0;
}
@@ -83,6 +91,14 @@ export class ProcessMonitor extends EventEmitter {
this.logger.info(`Spawning process: ${this.config.command}`);
// Clear any orphaned pidusage cache entries before spawning
try {
(plugins.pidusage as any)?.clearAll?.();
} catch {}
// Clean up previous listeners if any
this.cleanupListeners();
// Create a new process wrapper
this.processWrapper = new ProcessWrapper({
name: this.config.name || 'unnamed-process',
@@ -94,7 +110,7 @@ export class ProcessMonitor extends EventEmitter {
});
// Set up event handlers
this.processWrapper.on('log', (log: IProcessLog): void => {
this.logHandler = (log: IProcessLog): void => {
// Store the log in our buffer
this.logs.push(log);
if (process.env.TSPM_DEBUG) {
@@ -117,6 +133,7 @@ export class ProcessMonitor extends EventEmitter {
// Remove oldest logs until we're under the memory limit
const removed = this.logs.shift()!;
const removedSize = this.logSizeMap.get(removed) ?? this.estimateLogSize(removed);
this.logSizeMap.delete(removed); // Clean up map entry to prevent memory leak
this.currentLogMemorySize -= removedSize;
}
@@ -127,16 +144,16 @@ export class ProcessMonitor extends EventEmitter {
if (log.type === 'system') {
this.log(log.message);
}
});
};
this.processWrapper.on('log', this.logHandler);
// Re-emit start event with PID for upstream handlers
this.processWrapper.on('start', (pid: number): void => {
this.startHandler = (pid: number): void => {
this.emit('start', pid);
});
};
this.processWrapper.on('start', this.startHandler);
this.processWrapper.on(
'exit',
async (code: number | null, signal: string | null): Promise<void> => {
this.exitHandler = async (code: number | null, signal: string | null): Promise<void> => {
const exitMsg = `Process exited with code ${code}, signal ${signal}.`;
this.logger.info(exitMsg);
this.log(exitMsg);
@@ -149,6 +166,27 @@ export class ProcessMonitor extends EventEmitter {
}
} catch {}
// Detect if this was a crash (non-zero exit code or killed by signal)
const isCrash = (code !== null && code !== 0) || signal !== null;
// Save crash log if this was a crash
if (isCrash && this.processId && this.config.name) {
try {
await this.crashLogManager.saveCrashLog(
this.processId,
this.config.name,
this.logs,
code,
signal,
this.restartCount,
this.lastMemoryUsage
);
this.logger.info(`Saved crash log for process ${this.config.name}`);
} catch (error) {
this.logger.error(`Failed to save crash log: ${error}`);
}
}
// Flush logs to disk on exit
if (this.processId && this.logs.length > 0) {
try {
@@ -169,10 +207,10 @@ export class ProcessMonitor extends EventEmitter {
'Not restarting process because monitor is stopped',
);
}
},
);
};
this.processWrapper.on('exit', this.exitHandler);
this.processWrapper.on('error', async (error: Error | ProcessError): Promise<void> => {
this.errorHandler = async (error: Error | ProcessError): Promise<void> => {
const errorMsg =
error instanceof ProcessError
? `Process error: ${error.toString()}`
@@ -181,6 +219,24 @@ export class ProcessMonitor extends EventEmitter {
this.logger.error(error);
this.log(errorMsg);
// Save crash log for errors
if (this.processId && this.config.name) {
try {
await this.crashLogManager.saveCrashLog(
this.processId,
this.config.name,
this.logs,
null, // no exit code for errors
null, // no signal for errors
this.restartCount,
this.lastMemoryUsage
);
this.logger.info(`Saved crash log for process ${this.config.name} due to error`);
} catch (crashLogError) {
this.logger.error(`Failed to save crash log: ${crashLogError}`);
}
}
// Flush logs to disk on error
if (this.processId && this.logs.length > 0) {
try {
@@ -196,7 +252,8 @@ export class ProcessMonitor extends EventEmitter {
} else {
this.logger.debug('Not restarting process because monitor is stopped');
}
});
};
this.processWrapper.on('error', this.errorHandler);
// Start the process
try {
@@ -210,6 +267,31 @@ export class ProcessMonitor extends EventEmitter {
}
}
/**
* Clean up event listeners from process wrapper
*/
private cleanupListeners(): void {
if (this.processWrapper) {
if (this.logHandler) {
this.processWrapper.removeListener('log', this.logHandler);
}
if (this.startHandler) {
this.processWrapper.removeListener('start', this.startHandler);
}
if (this.exitHandler) {
this.processWrapper.removeListener('exit', this.exitHandler);
}
if (this.errorHandler) {
this.processWrapper.removeListener('error', this.errorHandler);
}
}
// Clear references
this.logHandler = undefined;
this.startHandler = undefined;
this.exitHandler = undefined;
this.errorHandler = undefined;
}
/**
* Schedule a restart with incremental debounce and failure cutoff.
*/
@@ -360,6 +442,14 @@ export class ProcessMonitor extends EventEmitter {
this.logger.debug(
`Total memory for process group: ${this.humanReadableBytes(totalMemory)}`,
);
// Clear pidusage cache for all PIDs to prevent memory leaks
for (const pid of pids) {
try {
(plugins.pidusage as any)?.clear?.(pid);
} catch {}
}
resolve({ memory: totalMemory, cpu: totalCpu });
},
);
@@ -387,6 +477,9 @@ export class ProcessMonitor extends EventEmitter {
this.log('Stopping process monitor.');
this.stopped = true;
// Clean up event listeners
this.cleanupListeners();
// Flush logs to disk before stopping
if (this.processId && this.logs.length > 0) {
try {

View File

@@ -23,6 +23,13 @@ export class ProcessWrapper extends EventEmitter {
private runId: string = '';
private stdoutRemainder: string = '';
private stderrRemainder: string = '';
// Store event handlers for cleanup
private exitHandler?: (code: number | null, signal: string | null) => void;
private errorHandler?: (error: Error) => void;
private stdoutDataHandler?: (data: Buffer) => void;
private stdoutEndHandler?: () => void;
private stderrDataHandler?: (data: Buffer) => void;
private stderrEndHandler?: () => void;
// Helper: send a signal to the process and all its children (best-effort)
private async killProcessTree(signal: NodeJS.Signals): Promise<void> {
@@ -84,7 +91,7 @@ export class ProcessWrapper extends EventEmitter {
this.startTime = new Date();
// Handle process exit
this.process.on('exit', (code, signal) => {
this.exitHandler = (code, signal) => {
const exitMessage = `Process exited with code ${code}, signal ${signal}`;
this.logger.info(exitMessage);
this.addSystemLog(exitMessage);
@@ -97,10 +104,11 @@ export class ProcessWrapper extends EventEmitter {
this.process = null;
this.emit('exit', code, signal);
});
};
this.process.on('exit', this.exitHandler);
// Handle errors
this.process.on('error', (error) => {
this.errorHandler = (error) => {
const processError = new ProcessError(
error.message,
'ERR_PROCESS_EXECUTION',
@@ -109,7 +117,8 @@ export class ProcessWrapper extends EventEmitter {
this.logger.error(processError);
this.addSystemLog(`Process error: ${processError.toString()}`);
this.emit('error', processError);
});
};
this.process.on('error', this.errorHandler);
// Capture stdout
if (this.process.stdout) {
@@ -118,7 +127,7 @@ export class ProcessWrapper extends EventEmitter {
`[ProcessWrapper] Setting up stdout listener for process ${this.process.pid}`,
);
}
this.process.stdout.on('data', (data) => {
this.stdoutDataHandler = (data) => {
if (process.env.TSPM_DEBUG) {
console.error(
`[ProcessWrapper] Received stdout data from PID ${this.process?.pid}: ${data
@@ -141,23 +150,25 @@ export class ProcessWrapper extends EventEmitter {
this.logger.debug(`Captured stdout: ${line}`);
this.addLog('stdout', line);
}
});
};
this.process.stdout.on('data', this.stdoutDataHandler);
// Flush remainder on stream end
this.process.stdout.on('end', () => {
this.stdoutEndHandler = () => {
if (this.stdoutRemainder) {
this.logger.debug(`Flushing stdout remainder: ${this.stdoutRemainder}`);
this.addLog('stdout', this.stdoutRemainder);
this.stdoutRemainder = '';
}
});
};
this.process.stdout.on('end', this.stdoutEndHandler);
} else {
this.logger.warn('Process stdout is null');
}
// Capture stderr
if (this.process.stderr) {
this.process.stderr.on('data', (data) => {
this.stderrDataHandler = (data) => {
// Add data to remainder buffer and split by newlines
const text = this.stderrRemainder + data.toString();
const lines = text.split('\n');
@@ -169,15 +180,17 @@ export class ProcessWrapper extends EventEmitter {
for (const line of lines) {
this.addLog('stderr', line);
}
});
};
this.process.stderr.on('data', this.stderrDataHandler);
// Flush remainder on stream end
this.process.stderr.on('end', () => {
this.stderrEndHandler = () => {
if (this.stderrRemainder) {
this.addLog('stderr', this.stderrRemainder);
this.stderrRemainder = '';
}
});
};
this.process.stderr.on('end', this.stderrEndHandler);
}
this.addSystemLog(`Process started with PID ${this.process.pid}`);
@@ -200,6 +213,46 @@ export class ProcessWrapper extends EventEmitter {
}
}
/**
* Clean up event listeners from process and streams
*/
private cleanupListeners(): void {
if (this.process) {
if (this.exitHandler) {
this.process.removeListener('exit', this.exitHandler);
}
if (this.errorHandler) {
this.process.removeListener('error', this.errorHandler);
}
if (this.process.stdout) {
if (this.stdoutDataHandler) {
this.process.stdout.removeListener('data', this.stdoutDataHandler);
}
if (this.stdoutEndHandler) {
this.process.stdout.removeListener('end', this.stdoutEndHandler);
}
}
if (this.process.stderr) {
if (this.stderrDataHandler) {
this.process.stderr.removeListener('data', this.stderrDataHandler);
}
if (this.stderrEndHandler) {
this.process.stderr.removeListener('end', this.stderrEndHandler);
}
}
}
// Clear references
this.exitHandler = undefined;
this.errorHandler = undefined;
this.stdoutDataHandler = undefined;
this.stdoutEndHandler = undefined;
this.stderrDataHandler = undefined;
this.stderrEndHandler = undefined;
}
/**
* Stop the wrapped process
*/
@@ -210,6 +263,9 @@ export class ProcessWrapper extends EventEmitter {
return;
}
// Clean up event listeners before stopping
this.cleanupListeners();
this.logger.info('Stopping process...');
this.addSystemLog('Stopping process...');