feat(daemon): Add crash log manager with rotation and integrate crash logging; improve IPC & process listener cleanup
This commit is contained in:
11
changelog.md
11
changelog.md
@@ -1,5 +1,16 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## 2025-09-01 - 5.10.0 - feat(daemon)
|
||||||
|
Add crash log manager with rotation and integrate crash logging; improve IPC & process listener cleanup
|
||||||
|
|
||||||
|
- Introduce CrashLogManager to create formatted crash reports, persist them to disk and rotate old logs (max 100)
|
||||||
|
- Persist recent process logs, include metadata (exit code, signal, restart attempts, memory) and human-readable sizes in crash reports
|
||||||
|
- Integrate crash logging into ProcessMonitor: save crash logs on non-zero exits and errors, and persist/rotate logs
|
||||||
|
- Improve ProcessMonitor and ProcessWrapper by tracking and removing event listeners to avoid memory leaks
|
||||||
|
- Clear pidusage cache more aggressively to prevent stale entries
|
||||||
|
- Enhance TspmIpcClient to store/remove lifecycle event handlers on disconnect to avoid dangling listeners
|
||||||
|
- Add tests and utilities: test/test.crashlog.direct.ts, test/test.crashlog.manual.ts and test/test.crashlog.ts to validate crash log creation and rotation
|
||||||
|
|
||||||
## 2025-08-31 - 5.9.0 - feat(cli)
|
## 2025-08-31 - 5.9.0 - feat(cli)
|
||||||
Add interactive edit flow to CLI and improve UX
|
Add interactive edit flow to CLI and improve UX
|
||||||
|
|
||||||
|
148
test/test.crashlog.direct.ts
Normal file
148
test/test.crashlog.direct.ts
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
#!/usr/bin/env tsx
|
||||||
|
|
||||||
|
import { CrashLogManager } from '../ts/daemon/crashlogmanager.js';
|
||||||
|
import type { IProcessLog } from '../ts/shared/protocol/ipc.types.js';
|
||||||
|
import * as plugins from '../ts/plugins.js';
|
||||||
|
import * as paths from '../ts/paths.js';
|
||||||
|
import * as fs from 'fs/promises';
|
||||||
|
|
||||||
|
async function testCrashLogManager() {
|
||||||
|
console.log('🧪 Testing CrashLogManager directly...\n');
|
||||||
|
|
||||||
|
const crashLogManager = new CrashLogManager();
|
||||||
|
const crashLogsDir = plugins.path.join(paths.tspmDir, 'crashlogs');
|
||||||
|
|
||||||
|
// Clean up any existing crash logs
|
||||||
|
console.log('📁 Cleaning up existing crash logs...');
|
||||||
|
try {
|
||||||
|
await fs.rm(crashLogsDir, { recursive: true, force: true });
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
// Create test logs
|
||||||
|
const testLogs: IProcessLog[] = [
|
||||||
|
{
|
||||||
|
timestamp: Date.now() - 5000,
|
||||||
|
message: '[TEST] Process starting up...',
|
||||||
|
type: 'stdout'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
timestamp: Date.now() - 4000,
|
||||||
|
message: '[TEST] Initializing components...',
|
||||||
|
type: 'stdout'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
timestamp: Date.now() - 3000,
|
||||||
|
message: '[TEST] Running main loop...',
|
||||||
|
type: 'stdout'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
timestamp: Date.now() - 2000,
|
||||||
|
message: '[TEST] Warning: Memory usage high',
|
||||||
|
type: 'stderr'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
timestamp: Date.now() - 1000,
|
||||||
|
message: '[TEST] Error: Unhandled exception occurred!',
|
||||||
|
type: 'stderr'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
timestamp: Date.now() - 500,
|
||||||
|
message: '[TEST] Fatal: Process crashing with exit code 42',
|
||||||
|
type: 'stderr'
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
// Test saving a crash log
|
||||||
|
console.log('💾 Saving crash log...');
|
||||||
|
await crashLogManager.saveCrashLog(
|
||||||
|
1 as any, // ProcessId
|
||||||
|
'test-process',
|
||||||
|
testLogs,
|
||||||
|
42, // exit code
|
||||||
|
null, // signal
|
||||||
|
3, // restart count
|
||||||
|
1024 * 1024 * 50 // 50MB memory usage
|
||||||
|
);
|
||||||
|
|
||||||
|
// Check if crash log was created
|
||||||
|
console.log('🔍 Checking for crash log...');
|
||||||
|
const crashLogFiles = await fs.readdir(crashLogsDir).catch(() => []);
|
||||||
|
console.log(` Found ${crashLogFiles.length} crash log files:`);
|
||||||
|
crashLogFiles.forEach(file => console.log(` - ${file}`));
|
||||||
|
|
||||||
|
if (crashLogFiles.length === 0) {
|
||||||
|
console.error('❌ No crash logs were created!');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read and display the crash log
|
||||||
|
const crashLogFile = crashLogFiles[0];
|
||||||
|
const crashLogPath = plugins.path.join(crashLogsDir, crashLogFile);
|
||||||
|
const crashLogContent = await fs.readFile(crashLogPath, 'utf-8');
|
||||||
|
|
||||||
|
console.log('\n📋 Crash log content:');
|
||||||
|
console.log('─'.repeat(60));
|
||||||
|
console.log(crashLogContent);
|
||||||
|
console.log('─'.repeat(60));
|
||||||
|
|
||||||
|
// Verify content
|
||||||
|
const checks = [
|
||||||
|
{ text: 'CRASH REPORT', found: crashLogContent.includes('CRASH REPORT') },
|
||||||
|
{ text: 'Exit Code: 42', found: crashLogContent.includes('Exit Code: 42') },
|
||||||
|
{ text: 'Restart Attempt: 3/10', found: crashLogContent.includes('Restart Attempt: 3/10') },
|
||||||
|
{ text: 'Memory Usage: 50 MB', found: crashLogContent.includes('Memory Usage: 50 MB') },
|
||||||
|
{ text: 'Fatal: Process crashing', found: crashLogContent.includes('Fatal: Process crashing') }
|
||||||
|
];
|
||||||
|
|
||||||
|
console.log('\n✅ Verification:');
|
||||||
|
checks.forEach(check => {
|
||||||
|
console.log(` ${check.found ? '✓' : '✗'} Contains "${check.text}"`);
|
||||||
|
});
|
||||||
|
|
||||||
|
const allChecksPassed = checks.every(c => c.found);
|
||||||
|
|
||||||
|
// Test rotation (create 100+ logs to test limit)
|
||||||
|
console.log('\n🔄 Testing rotation (creating 105 crash logs)...');
|
||||||
|
for (let i = 2; i <= 105; i++) {
|
||||||
|
await crashLogManager.saveCrashLog(
|
||||||
|
i as any,
|
||||||
|
`test-process-${i}`,
|
||||||
|
testLogs,
|
||||||
|
i,
|
||||||
|
null,
|
||||||
|
1,
|
||||||
|
1024 * 1024 * 10
|
||||||
|
);
|
||||||
|
// Small delay to ensure different timestamps
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 10));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that we have exactly 100 logs (rotation working)
|
||||||
|
const finalLogFiles = await fs.readdir(crashLogsDir);
|
||||||
|
console.log(` After rotation: ${finalLogFiles.length} crash logs (should be 100)`);
|
||||||
|
|
||||||
|
if (finalLogFiles.length !== 100) {
|
||||||
|
console.error(`❌ Rotation failed! Expected 100 logs, got ${finalLogFiles.length}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify oldest logs were deleted (test-process should be gone)
|
||||||
|
const hasOriginal = finalLogFiles.some(f => f.includes('_1_test-process.log'));
|
||||||
|
if (hasOriginal) {
|
||||||
|
console.error('❌ Rotation failed! Oldest log still exists');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (allChecksPassed) {
|
||||||
|
console.log('\n✅ All crash log tests passed!');
|
||||||
|
} else {
|
||||||
|
console.log('\n❌ Some crash log tests failed!');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the test
|
||||||
|
testCrashLogManager().catch(error => {
|
||||||
|
console.error('❌ Test failed:', error);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
137
test/test.crashlog.manual.ts
Normal file
137
test/test.crashlog.manual.ts
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
#!/usr/bin/env tsx
|
||||||
|
|
||||||
|
import * as plugins from '../ts/plugins.js';
|
||||||
|
import * as paths from '../ts/paths.js';
|
||||||
|
import * as fs from 'fs/promises';
|
||||||
|
import { execSync } from 'child_process';
|
||||||
|
|
||||||
|
// Test process that will crash
|
||||||
|
const CRASH_SCRIPT = `
|
||||||
|
setInterval(() => {
|
||||||
|
console.log('[test] Process is running...');
|
||||||
|
}, 1000);
|
||||||
|
|
||||||
|
setTimeout(() => {
|
||||||
|
console.error('[test] About to crash with non-zero exit code!');
|
||||||
|
process.exit(42);
|
||||||
|
}, 3000);
|
||||||
|
`;
|
||||||
|
|
||||||
|
async function testCrashLog() {
|
||||||
|
console.log('🧪 Testing crash log functionality...\n');
|
||||||
|
|
||||||
|
const crashScriptPath = plugins.path.join(paths.tspmDir, 'test-crash-script.js');
|
||||||
|
const crashLogsDir = plugins.path.join(paths.tspmDir, 'crashlogs');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Clean up any existing crash logs
|
||||||
|
console.log('📁 Cleaning up existing crash logs...');
|
||||||
|
try {
|
||||||
|
await fs.rm(crashLogsDir, { recursive: true, force: true });
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
// Write the crash script
|
||||||
|
console.log('📝 Writing test crash script...');
|
||||||
|
await fs.writeFile(crashScriptPath, CRASH_SCRIPT);
|
||||||
|
|
||||||
|
// Stop any existing daemon
|
||||||
|
console.log('🛑 Stopping any existing daemon...');
|
||||||
|
try {
|
||||||
|
execSync('tsx ts/cli.ts daemon stop', { stdio: 'inherit' });
|
||||||
|
} catch {}
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||||
|
|
||||||
|
// Start the daemon
|
||||||
|
console.log('🚀 Starting daemon...');
|
||||||
|
execSync('tsx ts/cli.ts daemon start', { stdio: 'inherit' });
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||||
|
|
||||||
|
// Add a process that will crash
|
||||||
|
console.log('➕ Adding crash test process...');
|
||||||
|
const addOutput = execSync(`tsx ts/cli.ts add "node ${crashScriptPath}" --name crash-test`, { encoding: 'utf-8' });
|
||||||
|
console.log(addOutput);
|
||||||
|
|
||||||
|
// Extract process ID from output
|
||||||
|
const idMatch = addOutput.match(/Process added with ID: (\d+)/);
|
||||||
|
if (!idMatch) {
|
||||||
|
throw new Error('Could not extract process ID from output');
|
||||||
|
}
|
||||||
|
const processId = parseInt(idMatch[1]);
|
||||||
|
console.log(` Process ID: ${processId}`);
|
||||||
|
|
||||||
|
// Start the process
|
||||||
|
console.log('▶️ Starting process that will crash...');
|
||||||
|
execSync(`tsx ts/cli.ts start ${processId}`, { stdio: 'inherit' });
|
||||||
|
|
||||||
|
// Wait for the process to crash (it crashes after 3 seconds)
|
||||||
|
console.log('⏳ Waiting for process to crash...');
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 5000));
|
||||||
|
|
||||||
|
// Check if crash log was created
|
||||||
|
console.log('🔍 Checking for crash log...');
|
||||||
|
const crashLogFiles = await fs.readdir(crashLogsDir).catch(() => []);
|
||||||
|
console.log(` Found ${crashLogFiles.length} crash log files:`);
|
||||||
|
crashLogFiles.forEach(file => console.log(` - ${file}`));
|
||||||
|
|
||||||
|
if (crashLogFiles.length === 0) {
|
||||||
|
throw new Error('No crash logs were created!');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the crash log for our test process
|
||||||
|
const testCrashLog = crashLogFiles.find(file => file.includes('crash-test'));
|
||||||
|
if (!testCrashLog) {
|
||||||
|
throw new Error('Could not find crash log for test process');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read and display crash log content
|
||||||
|
const crashLogPath = plugins.path.join(crashLogsDir, testCrashLog);
|
||||||
|
const crashLogContent = await fs.readFile(crashLogPath, 'utf-8');
|
||||||
|
|
||||||
|
console.log('\n📋 Crash log content:');
|
||||||
|
console.log('─'.repeat(60));
|
||||||
|
console.log(crashLogContent);
|
||||||
|
console.log('─'.repeat(60));
|
||||||
|
|
||||||
|
// Verify crash log contains expected information
|
||||||
|
const checks = [
|
||||||
|
{ text: 'CRASH REPORT', found: crashLogContent.includes('CRASH REPORT') },
|
||||||
|
{ text: 'Exit Code: 42', found: crashLogContent.includes('Exit Code: 42') },
|
||||||
|
{ text: 'About to crash', found: crashLogContent.includes('About to crash') },
|
||||||
|
{ text: 'Process is running', found: crashLogContent.includes('Process is running') }
|
||||||
|
];
|
||||||
|
|
||||||
|
console.log('\n✅ Verification:');
|
||||||
|
checks.forEach(check => {
|
||||||
|
console.log(` ${check.found ? '✓' : '✗'} Contains "${check.text}"`);
|
||||||
|
});
|
||||||
|
|
||||||
|
const allChecksPassed = checks.every(c => c.found);
|
||||||
|
|
||||||
|
// Clean up
|
||||||
|
console.log('\n🧹 Cleaning up...');
|
||||||
|
execSync(`tsx ts/cli.ts delete ${processId}`, { stdio: 'inherit' });
|
||||||
|
execSync('tsx ts/cli.ts daemon stop', { stdio: 'inherit' });
|
||||||
|
await fs.unlink(crashScriptPath).catch(() => {});
|
||||||
|
|
||||||
|
if (allChecksPassed) {
|
||||||
|
console.log('\n✅ All crash log tests passed!');
|
||||||
|
} else {
|
||||||
|
console.log('\n❌ Some crash log tests failed!');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('\n❌ Test failed:', error);
|
||||||
|
|
||||||
|
// Clean up on error
|
||||||
|
try {
|
||||||
|
execSync('tsx ts/cli.ts daemon stop', { stdio: 'inherit' });
|
||||||
|
await fs.unlink(crashScriptPath).catch(() => {});
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the test
|
||||||
|
testCrashLog();
|
172
test/test.crashlog.ts
Normal file
172
test/test.crashlog.ts
Normal file
@@ -0,0 +1,172 @@
|
|||||||
|
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
||||||
|
import * as plugins from '../ts/plugins.js';
|
||||||
|
import * as paths from '../ts/paths.js';
|
||||||
|
import * as fs from 'fs/promises';
|
||||||
|
|
||||||
|
// Import tspm client
|
||||||
|
import { tspmIpcClient } from '../ts/client/tspm.ipcclient.js';
|
||||||
|
|
||||||
|
// Test process that will crash
|
||||||
|
const CRASH_SCRIPT = `
|
||||||
|
setInterval(() => {
|
||||||
|
console.log('[test] Process is running...');
|
||||||
|
}, 1000);
|
||||||
|
|
||||||
|
setTimeout(() => {
|
||||||
|
console.error('[test] About to crash with non-zero exit code!');
|
||||||
|
process.exit(42);
|
||||||
|
}, 3000);
|
||||||
|
`;
|
||||||
|
|
||||||
|
tap.test('should create crash logs when process crashes', async (tools) => {
|
||||||
|
const crashScriptPath = plugins.path.join(paths.tspmDir, 'test-crash-script.js');
|
||||||
|
const crashLogsDir = plugins.path.join(paths.tspmDir, 'crashlogs');
|
||||||
|
|
||||||
|
// Clean up any existing crash logs
|
||||||
|
try {
|
||||||
|
await fs.rm(crashLogsDir, { recursive: true, force: true });
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
// Write the crash script
|
||||||
|
await fs.writeFile(crashScriptPath, CRASH_SCRIPT);
|
||||||
|
|
||||||
|
// Start the daemon
|
||||||
|
console.log('Starting daemon...');
|
||||||
|
const daemonResult = await tools.runCommand('tsx ts/cli/tspm.cli.ts daemon start');
|
||||||
|
expect(daemonResult.exitCode).toEqual(0);
|
||||||
|
|
||||||
|
// Wait for daemon to be ready
|
||||||
|
await tools.wait(2000);
|
||||||
|
|
||||||
|
// Add a process that will crash
|
||||||
|
console.log('Adding crash test process...');
|
||||||
|
const addResult = await tools.runCommand(`tsx ts/cli/tspm.cli.ts add "node ${crashScriptPath}" --name crash-test`);
|
||||||
|
expect(addResult.exitCode).toEqual(0);
|
||||||
|
|
||||||
|
// Extract process ID from output
|
||||||
|
const idMatch = addResult.stdout.match(/Process added with ID: (\d+)/);
|
||||||
|
expect(idMatch).toBeTruthy();
|
||||||
|
const processId = parseInt(idMatch![1]);
|
||||||
|
|
||||||
|
// Start the process
|
||||||
|
console.log('Starting process that will crash...');
|
||||||
|
const startResult = await tools.runCommand(`tsx ts/cli/tspm.cli.ts start ${processId}`);
|
||||||
|
expect(startResult.exitCode).toEqual(0);
|
||||||
|
|
||||||
|
// Wait for the process to crash (it crashes after 3 seconds)
|
||||||
|
console.log('Waiting for process to crash...');
|
||||||
|
await tools.wait(5000);
|
||||||
|
|
||||||
|
// Check if crash log was created
|
||||||
|
console.log('Checking for crash log...');
|
||||||
|
const crashLogFiles = await fs.readdir(crashLogsDir).catch(() => []);
|
||||||
|
console.log(`Found ${crashLogFiles.length} crash log files:`, crashLogFiles);
|
||||||
|
|
||||||
|
// Should have at least one crash log
|
||||||
|
expect(crashLogFiles.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
// Find the crash log for our test process
|
||||||
|
const testCrashLog = crashLogFiles.find(file => file.includes('crash-test'));
|
||||||
|
expect(testCrashLog).toBeTruthy();
|
||||||
|
|
||||||
|
// Read and verify crash log content
|
||||||
|
const crashLogPath = plugins.path.join(crashLogsDir, testCrashLog!);
|
||||||
|
const crashLogContent = await fs.readFile(crashLogPath, 'utf-8');
|
||||||
|
|
||||||
|
console.log('Crash log content:');
|
||||||
|
console.log(crashLogContent);
|
||||||
|
|
||||||
|
// Verify crash log contains expected information
|
||||||
|
expect(crashLogContent).toIncludeIgnoreCase('crash report');
|
||||||
|
expect(crashLogContent).toIncludeIgnoreCase('exit code: 42');
|
||||||
|
expect(crashLogContent).toIncludeIgnoreCase('About to crash');
|
||||||
|
|
||||||
|
// Stop the process
|
||||||
|
console.log('Cleaning up...');
|
||||||
|
await tools.runCommand(`tsx ts/cli/tspm.cli.ts delete ${processId}`);
|
||||||
|
|
||||||
|
// Stop the daemon
|
||||||
|
await tools.runCommand('tsx ts/cli/tspm.cli.ts daemon stop');
|
||||||
|
|
||||||
|
// Clean up test file
|
||||||
|
await fs.unlink(crashScriptPath).catch(() => {});
|
||||||
|
});
|
||||||
|
|
||||||
|
tap.test('should create crash logs when process is killed', async (tools) => {
|
||||||
|
const killScriptPath = plugins.path.join(paths.tspmDir, 'test-kill-script.js');
|
||||||
|
const crashLogsDir = plugins.path.join(paths.tspmDir, 'crashlogs');
|
||||||
|
|
||||||
|
// Write a script that runs indefinitely
|
||||||
|
const KILL_SCRIPT = `
|
||||||
|
setInterval(() => {
|
||||||
|
console.log('[test] Process is running and will be killed...');
|
||||||
|
}, 500);
|
||||||
|
`;
|
||||||
|
|
||||||
|
await fs.writeFile(killScriptPath, KILL_SCRIPT);
|
||||||
|
|
||||||
|
// Start the daemon
|
||||||
|
console.log('Starting daemon...');
|
||||||
|
const daemonResult = await tools.runCommand('tsx ts/cli/tspm.cli.ts daemon start');
|
||||||
|
expect(daemonResult.exitCode).toEqual(0);
|
||||||
|
|
||||||
|
// Wait for daemon to be ready
|
||||||
|
await tools.wait(2000);
|
||||||
|
|
||||||
|
// Add a process that we'll kill
|
||||||
|
console.log('Adding kill test process...');
|
||||||
|
const addResult = await tools.runCommand(`tsx ts/cli/tspm.cli.ts add "node ${killScriptPath}" --name kill-test`);
|
||||||
|
expect(addResult.exitCode).toEqual(0);
|
||||||
|
|
||||||
|
// Extract process ID
|
||||||
|
const idMatch = addResult.stdout.match(/Process added with ID: (\d+)/);
|
||||||
|
expect(idMatch).toBeTruthy();
|
||||||
|
const processId = parseInt(idMatch![1]);
|
||||||
|
|
||||||
|
// Start the process
|
||||||
|
console.log('Starting process to be killed...');
|
||||||
|
const startResult = await tools.runCommand(`tsx ts/cli/tspm.cli.ts start ${processId}`);
|
||||||
|
expect(startResult.exitCode).toEqual(0);
|
||||||
|
|
||||||
|
// Wait for process to run a bit
|
||||||
|
await tools.wait(2000);
|
||||||
|
|
||||||
|
// Get the actual PID of the running process
|
||||||
|
const statusResult = await tools.runCommand(`tsx ts/cli/tspm.cli.ts describe ${processId}`);
|
||||||
|
const pidMatch = statusResult.stdout.match(/pid:\s+(\d+)/);
|
||||||
|
|
||||||
|
if (pidMatch) {
|
||||||
|
const pid = parseInt(pidMatch[1]);
|
||||||
|
console.log(`Killing process with PID ${pid}...`);
|
||||||
|
|
||||||
|
// Kill the process with SIGTERM
|
||||||
|
await tools.runCommand(`kill -TERM ${pid}`);
|
||||||
|
|
||||||
|
// Wait for crash log to be created
|
||||||
|
await tools.wait(3000);
|
||||||
|
|
||||||
|
// Check for crash log
|
||||||
|
console.log('Checking for crash log from killed process...');
|
||||||
|
const crashLogFiles = await fs.readdir(crashLogsDir).catch(() => []);
|
||||||
|
const killCrashLog = crashLogFiles.find(file => file.includes('kill-test'));
|
||||||
|
|
||||||
|
if (killCrashLog) {
|
||||||
|
const crashLogPath = plugins.path.join(crashLogsDir, killCrashLog);
|
||||||
|
const crashLogContent = await fs.readFile(crashLogPath, 'utf-8');
|
||||||
|
|
||||||
|
console.log('Kill crash log content:');
|
||||||
|
console.log(crashLogContent);
|
||||||
|
|
||||||
|
// Verify it contains signal information
|
||||||
|
expect(crashLogContent).toIncludeIgnoreCase('signal: SIGTERM');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up
|
||||||
|
console.log('Cleaning up...');
|
||||||
|
await tools.runCommand(`tsx ts/cli/tspm.cli.ts delete ${processId}`);
|
||||||
|
await tools.runCommand('tsx ts/cli/tspm.cli.ts daemon stop');
|
||||||
|
await fs.unlink(killScriptPath).catch(() => {});
|
||||||
|
});
|
||||||
|
|
||||||
|
export default tap.start();
|
@@ -3,6 +3,6 @@
|
|||||||
*/
|
*/
|
||||||
export const commitinfo = {
|
export const commitinfo = {
|
||||||
name: '@git.zone/tspm',
|
name: '@git.zone/tspm',
|
||||||
version: '5.9.0',
|
version: '5.10.0',
|
||||||
description: 'a no fuzz process manager'
|
description: 'a no fuzz process manager'
|
||||||
}
|
}
|
||||||
|
@@ -17,6 +17,9 @@ export class TspmIpcClient {
|
|||||||
private socketPath: string;
|
private socketPath: string;
|
||||||
private daemonPidFile: string;
|
private daemonPidFile: string;
|
||||||
private isConnected: boolean = false;
|
private isConnected: boolean = false;
|
||||||
|
// Store event handlers for cleanup
|
||||||
|
private heartbeatTimeoutHandler?: () => void;
|
||||||
|
private markDisconnectedHandler?: () => void;
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
this.socketPath = plugins.path.join(paths.tspmDir, 'tspm.sock');
|
this.socketPath = plugins.path.join(paths.tspmDir, 'tspm.sock');
|
||||||
@@ -74,20 +77,21 @@ export class TspmIpcClient {
|
|||||||
this.isConnected = true;
|
this.isConnected = true;
|
||||||
|
|
||||||
// Handle heartbeat timeouts gracefully
|
// Handle heartbeat timeouts gracefully
|
||||||
this.ipcClient.on('heartbeatTimeout', () => {
|
this.heartbeatTimeoutHandler = () => {
|
||||||
console.warn('Heartbeat timeout detected, connection may be degraded');
|
console.warn('Heartbeat timeout detected, connection may be degraded');
|
||||||
this.isConnected = false;
|
this.isConnected = false;
|
||||||
});
|
};
|
||||||
|
this.ipcClient.on('heartbeatTimeout', this.heartbeatTimeoutHandler);
|
||||||
|
|
||||||
// Reflect connection lifecycle on the client state
|
// Reflect connection lifecycle on the client state
|
||||||
const markDisconnected = () => {
|
this.markDisconnectedHandler = () => {
|
||||||
this.isConnected = false;
|
this.isConnected = false;
|
||||||
};
|
};
|
||||||
// Common lifecycle events
|
// Common lifecycle events
|
||||||
this.ipcClient.on('disconnect', markDisconnected as any);
|
this.ipcClient.on('disconnect', this.markDisconnectedHandler as any);
|
||||||
this.ipcClient.on('close', markDisconnected as any);
|
this.ipcClient.on('close', this.markDisconnectedHandler as any);
|
||||||
this.ipcClient.on('end', markDisconnected as any);
|
this.ipcClient.on('end', this.markDisconnectedHandler as any);
|
||||||
this.ipcClient.on('error', markDisconnected as any);
|
this.ipcClient.on('error', this.markDisconnectedHandler as any);
|
||||||
|
|
||||||
// connected
|
// connected
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@@ -103,6 +107,21 @@ export class TspmIpcClient {
|
|||||||
*/
|
*/
|
||||||
public async disconnect(): Promise<void> {
|
public async disconnect(): Promise<void> {
|
||||||
if (this.ipcClient) {
|
if (this.ipcClient) {
|
||||||
|
// Remove event listeners before disconnecting
|
||||||
|
if (this.heartbeatTimeoutHandler) {
|
||||||
|
this.ipcClient.removeListener('heartbeatTimeout', this.heartbeatTimeoutHandler);
|
||||||
|
}
|
||||||
|
if (this.markDisconnectedHandler) {
|
||||||
|
this.ipcClient.removeListener('disconnect', this.markDisconnectedHandler as any);
|
||||||
|
this.ipcClient.removeListener('close', this.markDisconnectedHandler as any);
|
||||||
|
this.ipcClient.removeListener('end', this.markDisconnectedHandler as any);
|
||||||
|
this.ipcClient.removeListener('error', this.markDisconnectedHandler as any);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear handler references
|
||||||
|
this.heartbeatTimeoutHandler = undefined;
|
||||||
|
this.markDisconnectedHandler = undefined;
|
||||||
|
|
||||||
await this.ipcClient.disconnect();
|
await this.ipcClient.disconnect();
|
||||||
this.ipcClient = null;
|
this.ipcClient = null;
|
||||||
this.isConnected = false;
|
this.isConnected = false;
|
||||||
|
265
ts/daemon/crashlogmanager.ts
Normal file
265
ts/daemon/crashlogmanager.ts
Normal file
@@ -0,0 +1,265 @@
|
|||||||
|
import * as plugins from '../plugins.js';
|
||||||
|
import * as paths from '../paths.js';
|
||||||
|
import type { IProcessLog } from '../shared/protocol/ipc.types.js';
|
||||||
|
import type { ProcessId } from '../shared/protocol/id.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Manages crash log storage for failed processes
|
||||||
|
*/
|
||||||
|
export class CrashLogManager {
|
||||||
|
private crashLogsDir: string;
|
||||||
|
private readonly MAX_CRASH_LOGS = 100;
|
||||||
|
private readonly MAX_LOG_SIZE_BYTES = 1024 * 1024; // 1MB
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.crashLogsDir = plugins.path.join(paths.tspmDir, 'crashlogs');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save a crash log for a failed process
|
||||||
|
*/
|
||||||
|
public async saveCrashLog(
|
||||||
|
processId: ProcessId,
|
||||||
|
processName: string,
|
||||||
|
logs: IProcessLog[],
|
||||||
|
exitCode: number | null,
|
||||||
|
signal: string | null,
|
||||||
|
restartCount: number,
|
||||||
|
memoryUsage?: number
|
||||||
|
): Promise<void> {
|
||||||
|
try {
|
||||||
|
// Ensure directory exists
|
||||||
|
await this.ensureCrashLogsDir();
|
||||||
|
|
||||||
|
// Generate filename with timestamp
|
||||||
|
const timestamp = new Date();
|
||||||
|
const dateStr = this.formatDate(timestamp);
|
||||||
|
const sanitizedName = this.sanitizeFilename(processName);
|
||||||
|
const filename = `${dateStr}_${processId}_${sanitizedName}.log`;
|
||||||
|
const filepath = plugins.path.join(this.crashLogsDir, filename);
|
||||||
|
|
||||||
|
// Get recent logs that fit within size limit
|
||||||
|
const recentLogs = this.getRecentLogs(logs, this.MAX_LOG_SIZE_BYTES);
|
||||||
|
|
||||||
|
// Create crash report
|
||||||
|
const crashReport = this.formatCrashReport({
|
||||||
|
processId,
|
||||||
|
processName,
|
||||||
|
timestamp,
|
||||||
|
exitCode,
|
||||||
|
signal,
|
||||||
|
restartCount,
|
||||||
|
memoryUsage,
|
||||||
|
logs: recentLogs
|
||||||
|
});
|
||||||
|
|
||||||
|
// Write crash log
|
||||||
|
await plugins.smartfile.memory.toFs(crashReport, filepath);
|
||||||
|
|
||||||
|
// Rotate old logs if needed
|
||||||
|
await this.rotateOldLogs();
|
||||||
|
|
||||||
|
console.log(`Crash log saved: ${filename}`);
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Failed to save crash log for process ${processId}:`, error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format date for filename: YYYY-MM-DD_HH-mm-ss
|
||||||
|
*/
|
||||||
|
private formatDate(date: Date): string {
|
||||||
|
const year = date.getFullYear();
|
||||||
|
const month = String(date.getMonth() + 1).padStart(2, '0');
|
||||||
|
const day = String(date.getDate()).padStart(2, '0');
|
||||||
|
const hours = String(date.getHours()).padStart(2, '0');
|
||||||
|
const minutes = String(date.getMinutes()).padStart(2, '0');
|
||||||
|
const seconds = String(date.getSeconds()).padStart(2, '0');
|
||||||
|
return `${year}-${month}-${day}_${hours}-${minutes}-${seconds}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sanitize process name for use in filename
|
||||||
|
*/
|
||||||
|
private sanitizeFilename(name: string): string {
|
||||||
|
// Replace problematic characters with underscore
|
||||||
|
return name
|
||||||
|
.replace(/[^a-zA-Z0-9-_]/g, '_')
|
||||||
|
.replace(/_+/g, '_')
|
||||||
|
.substring(0, 50); // Limit length
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get recent logs that fit within the size limit
|
||||||
|
*/
|
||||||
|
private getRecentLogs(logs: IProcessLog[], maxBytes: number): IProcessLog[] {
|
||||||
|
if (logs.length === 0) return [];
|
||||||
|
|
||||||
|
// Start from the end and work backwards
|
||||||
|
const recentLogs: IProcessLog[] = [];
|
||||||
|
let currentSize = 0;
|
||||||
|
|
||||||
|
for (let i = logs.length - 1; i >= 0; i--) {
|
||||||
|
const log = logs[i];
|
||||||
|
const logSize = this.estimateLogSize(log);
|
||||||
|
|
||||||
|
if (currentSize + logSize > maxBytes && recentLogs.length > 0) {
|
||||||
|
// Would exceed limit, stop adding
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
recentLogs.unshift(log);
|
||||||
|
currentSize += logSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
return recentLogs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Estimate size of a log entry in bytes
|
||||||
|
*/
|
||||||
|
private estimateLogSize(log: IProcessLog): number {
|
||||||
|
// Format: [timestamp] [type] message\n
|
||||||
|
const formatted = `[${new Date(log.timestamp).toISOString()}] [${log.type}] ${log.message}\n`;
|
||||||
|
return Buffer.byteLength(formatted, 'utf8');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format a crash report with metadata and logs
|
||||||
|
*/
|
||||||
|
private formatCrashReport(data: {
|
||||||
|
processId: ProcessId;
|
||||||
|
processName: string;
|
||||||
|
timestamp: Date;
|
||||||
|
exitCode: number | null;
|
||||||
|
signal: string | null;
|
||||||
|
restartCount: number;
|
||||||
|
memoryUsage?: number;
|
||||||
|
logs: IProcessLog[];
|
||||||
|
}): string {
|
||||||
|
const lines: string[] = [
|
||||||
|
'================================================================================',
|
||||||
|
'TSPM CRASH REPORT',
|
||||||
|
'================================================================================',
|
||||||
|
`Process: ${data.processName} (ID: ${data.processId})`,
|
||||||
|
`Date: ${data.timestamp.toISOString()}`,
|
||||||
|
`Exit Code: ${data.exitCode ?? 'N/A'}`,
|
||||||
|
`Signal: ${data.signal ?? 'N/A'}`,
|
||||||
|
`Restart Attempt: ${data.restartCount}/10`,
|
||||||
|
];
|
||||||
|
|
||||||
|
if (data.memoryUsage !== undefined && data.memoryUsage > 0) {
|
||||||
|
lines.push(`Memory Usage: ${this.humanReadableBytes(data.memoryUsage)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
lines.push(
|
||||||
|
'================================================================================',
|
||||||
|
'',
|
||||||
|
`LAST ${data.logs.length} LOG ENTRIES:`,
|
||||||
|
'--------------------------------------------------------------------------------',
|
||||||
|
''
|
||||||
|
);
|
||||||
|
|
||||||
|
// Add log entries
|
||||||
|
for (const log of data.logs) {
|
||||||
|
const timestamp = new Date(log.timestamp).toISOString();
|
||||||
|
const type = log.type.toUpperCase().padEnd(6);
|
||||||
|
lines.push(`[${timestamp}] [${type}] ${log.message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
lines.push(
|
||||||
|
'',
|
||||||
|
'================================================================================',
|
||||||
|
'END OF CRASH REPORT',
|
||||||
|
'================================================================================',
|
||||||
|
''
|
||||||
|
);
|
||||||
|
|
||||||
|
return lines.join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert bytes to human-readable format
|
||||||
|
*/
|
||||||
|
private humanReadableBytes(bytes: number): string {
|
||||||
|
if (bytes === 0) return '0 Bytes';
|
||||||
|
const k = 1024;
|
||||||
|
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
|
||||||
|
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||||
|
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure crash logs directory exists
|
||||||
|
*/
|
||||||
|
private async ensureCrashLogsDir(): Promise<void> {
|
||||||
|
await plugins.smartfile.fs.ensureDir(this.crashLogsDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rotate old crash logs when exceeding max count
|
||||||
|
*/
|
||||||
|
private async rotateOldLogs(): Promise<void> {
|
||||||
|
try {
|
||||||
|
// Get all crash log files
|
||||||
|
const files = await plugins.smartfile.fs.listFileTree(this.crashLogsDir, '*.log');
|
||||||
|
|
||||||
|
if (files.length <= this.MAX_CRASH_LOGS) {
|
||||||
|
return; // No rotation needed
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get file stats and sort by modification time (oldest first)
|
||||||
|
const fileStats = await Promise.all(
|
||||||
|
files.map(async (file) => {
|
||||||
|
const filepath = plugins.path.join(this.crashLogsDir, file);
|
||||||
|
const stats = await plugins.smartfile.fs.stat(filepath);
|
||||||
|
return { filepath, mtime: stats.mtime.getTime() };
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
fileStats.sort((a, b) => a.mtime - b.mtime);
|
||||||
|
|
||||||
|
// Delete oldest files to stay under limit
|
||||||
|
const filesToDelete = fileStats.length - this.MAX_CRASH_LOGS;
|
||||||
|
for (let i = 0; i < filesToDelete; i++) {
|
||||||
|
await plugins.smartfile.fs.remove(fileStats[i].filepath);
|
||||||
|
console.log(`Rotated old crash log: ${plugins.path.basename(fileStats[i].filepath)}`);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to rotate crash logs:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get list of crash logs for a specific process
|
||||||
|
*/
|
||||||
|
public async getCrashLogsForProcess(processId: ProcessId): Promise<string[]> {
|
||||||
|
try {
|
||||||
|
await this.ensureCrashLogsDir();
|
||||||
|
const files = await plugins.smartfile.fs.listFileTree(this.crashLogsDir, `*_${processId}_*.log`);
|
||||||
|
return files.map(file => plugins.path.join(this.crashLogsDir, file));
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Failed to get crash logs for process ${processId}:`, error);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clean up all crash logs (for maintenance)
|
||||||
|
*/
|
||||||
|
public async cleanupAllCrashLogs(): Promise<void> {
|
||||||
|
try {
|
||||||
|
await this.ensureCrashLogsDir();
|
||||||
|
const files = await plugins.smartfile.fs.listFileTree(this.crashLogsDir, '*.log');
|
||||||
|
|
||||||
|
for (const file of files) {
|
||||||
|
const filepath = plugins.path.join(this.crashLogsDir, file);
|
||||||
|
await plugins.smartfile.fs.remove(filepath);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Cleaned up ${files.length} crash logs`);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to cleanup crash logs:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -2,6 +2,7 @@ import * as plugins from '../plugins.js';
|
|||||||
import { EventEmitter } from 'events';
|
import { EventEmitter } from 'events';
|
||||||
import { ProcessWrapper } from './processwrapper.js';
|
import { ProcessWrapper } from './processwrapper.js';
|
||||||
import { LogPersistence } from './logpersistence.js';
|
import { LogPersistence } from './logpersistence.js';
|
||||||
|
import { CrashLogManager } from './crashlogmanager.js';
|
||||||
import { Logger, ProcessError, handleError } from '../shared/common/utils.errorhandler.js';
|
import { Logger, ProcessError, handleError } from '../shared/common/utils.errorhandler.js';
|
||||||
import type { IMonitorConfig, IProcessLog } from '../shared/protocol/ipc.types.js';
|
import type { IMonitorConfig, IProcessLog } from '../shared/protocol/ipc.types.js';
|
||||||
import type { ProcessId } from '../shared/protocol/id.js';
|
import type { ProcessId } from '../shared/protocol/id.js';
|
||||||
@@ -15,6 +16,7 @@ export class ProcessMonitor extends EventEmitter {
|
|||||||
private logger: Logger;
|
private logger: Logger;
|
||||||
private logs: IProcessLog[] = [];
|
private logs: IProcessLog[] = [];
|
||||||
private logPersistence: LogPersistence;
|
private logPersistence: LogPersistence;
|
||||||
|
private crashLogManager: CrashLogManager;
|
||||||
private processId?: ProcessId;
|
private processId?: ProcessId;
|
||||||
private currentLogMemorySize: number = 0;
|
private currentLogMemorySize: number = 0;
|
||||||
private readonly MAX_LOG_MEMORY_SIZE = 10 * 1024 * 1024; // 10MB
|
private readonly MAX_LOG_MEMORY_SIZE = 10 * 1024 * 1024; // 10MB
|
||||||
@@ -26,6 +28,11 @@ export class ProcessMonitor extends EventEmitter {
|
|||||||
private readonly RESET_WINDOW_MS = 60 * 60 * 1000; // 1 hour
|
private readonly RESET_WINDOW_MS = 60 * 60 * 1000; // 1 hour
|
||||||
private lastMemoryUsage: number = 0;
|
private lastMemoryUsage: number = 0;
|
||||||
private lastCpuUsage: number = 0;
|
private lastCpuUsage: number = 0;
|
||||||
|
// Store event listeners for cleanup
|
||||||
|
private logHandler?: (log: IProcessLog) => void;
|
||||||
|
private startHandler?: (pid: number) => void;
|
||||||
|
private exitHandler?: (code: number | null, signal: string | null) => Promise<void>;
|
||||||
|
private errorHandler?: (error: Error | ProcessError) => Promise<void>;
|
||||||
|
|
||||||
constructor(config: IMonitorConfig & { id?: ProcessId }) {
|
constructor(config: IMonitorConfig & { id?: ProcessId }) {
|
||||||
super();
|
super();
|
||||||
@@ -33,6 +40,7 @@ export class ProcessMonitor extends EventEmitter {
|
|||||||
this.logger = new Logger(`ProcessMonitor:${config.name || 'unnamed'}`);
|
this.logger = new Logger(`ProcessMonitor:${config.name || 'unnamed'}`);
|
||||||
this.logs = [];
|
this.logs = [];
|
||||||
this.logPersistence = new LogPersistence();
|
this.logPersistence = new LogPersistence();
|
||||||
|
this.crashLogManager = new CrashLogManager();
|
||||||
this.processId = config.id;
|
this.processId = config.id;
|
||||||
this.currentLogMemorySize = 0;
|
this.currentLogMemorySize = 0;
|
||||||
}
|
}
|
||||||
@@ -83,6 +91,14 @@ export class ProcessMonitor extends EventEmitter {
|
|||||||
|
|
||||||
this.logger.info(`Spawning process: ${this.config.command}`);
|
this.logger.info(`Spawning process: ${this.config.command}`);
|
||||||
|
|
||||||
|
// Clear any orphaned pidusage cache entries before spawning
|
||||||
|
try {
|
||||||
|
(plugins.pidusage as any)?.clearAll?.();
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
// Clean up previous listeners if any
|
||||||
|
this.cleanupListeners();
|
||||||
|
|
||||||
// Create a new process wrapper
|
// Create a new process wrapper
|
||||||
this.processWrapper = new ProcessWrapper({
|
this.processWrapper = new ProcessWrapper({
|
||||||
name: this.config.name || 'unnamed-process',
|
name: this.config.name || 'unnamed-process',
|
||||||
@@ -94,7 +110,7 @@ export class ProcessMonitor extends EventEmitter {
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Set up event handlers
|
// Set up event handlers
|
||||||
this.processWrapper.on('log', (log: IProcessLog): void => {
|
this.logHandler = (log: IProcessLog): void => {
|
||||||
// Store the log in our buffer
|
// Store the log in our buffer
|
||||||
this.logs.push(log);
|
this.logs.push(log);
|
||||||
if (process.env.TSPM_DEBUG) {
|
if (process.env.TSPM_DEBUG) {
|
||||||
@@ -117,6 +133,7 @@ export class ProcessMonitor extends EventEmitter {
|
|||||||
// Remove oldest logs until we're under the memory limit
|
// Remove oldest logs until we're under the memory limit
|
||||||
const removed = this.logs.shift()!;
|
const removed = this.logs.shift()!;
|
||||||
const removedSize = this.logSizeMap.get(removed) ?? this.estimateLogSize(removed);
|
const removedSize = this.logSizeMap.get(removed) ?? this.estimateLogSize(removed);
|
||||||
|
this.logSizeMap.delete(removed); // Clean up map entry to prevent memory leak
|
||||||
this.currentLogMemorySize -= removedSize;
|
this.currentLogMemorySize -= removedSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -127,16 +144,16 @@ export class ProcessMonitor extends EventEmitter {
|
|||||||
if (log.type === 'system') {
|
if (log.type === 'system') {
|
||||||
this.log(log.message);
|
this.log(log.message);
|
||||||
}
|
}
|
||||||
});
|
};
|
||||||
|
this.processWrapper.on('log', this.logHandler);
|
||||||
|
|
||||||
// Re-emit start event with PID for upstream handlers
|
// Re-emit start event with PID for upstream handlers
|
||||||
this.processWrapper.on('start', (pid: number): void => {
|
this.startHandler = (pid: number): void => {
|
||||||
this.emit('start', pid);
|
this.emit('start', pid);
|
||||||
});
|
};
|
||||||
|
this.processWrapper.on('start', this.startHandler);
|
||||||
|
|
||||||
this.processWrapper.on(
|
this.exitHandler = async (code: number | null, signal: string | null): Promise<void> => {
|
||||||
'exit',
|
|
||||||
async (code: number | null, signal: string | null): Promise<void> => {
|
|
||||||
const exitMsg = `Process exited with code ${code}, signal ${signal}.`;
|
const exitMsg = `Process exited with code ${code}, signal ${signal}.`;
|
||||||
this.logger.info(exitMsg);
|
this.logger.info(exitMsg);
|
||||||
this.log(exitMsg);
|
this.log(exitMsg);
|
||||||
@@ -149,6 +166,27 @@ export class ProcessMonitor extends EventEmitter {
|
|||||||
}
|
}
|
||||||
} catch {}
|
} catch {}
|
||||||
|
|
||||||
|
// Detect if this was a crash (non-zero exit code or killed by signal)
|
||||||
|
const isCrash = (code !== null && code !== 0) || signal !== null;
|
||||||
|
|
||||||
|
// Save crash log if this was a crash
|
||||||
|
if (isCrash && this.processId && this.config.name) {
|
||||||
|
try {
|
||||||
|
await this.crashLogManager.saveCrashLog(
|
||||||
|
this.processId,
|
||||||
|
this.config.name,
|
||||||
|
this.logs,
|
||||||
|
code,
|
||||||
|
signal,
|
||||||
|
this.restartCount,
|
||||||
|
this.lastMemoryUsage
|
||||||
|
);
|
||||||
|
this.logger.info(`Saved crash log for process ${this.config.name}`);
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error(`Failed to save crash log: ${error}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Flush logs to disk on exit
|
// Flush logs to disk on exit
|
||||||
if (this.processId && this.logs.length > 0) {
|
if (this.processId && this.logs.length > 0) {
|
||||||
try {
|
try {
|
||||||
@@ -169,10 +207,10 @@ export class ProcessMonitor extends EventEmitter {
|
|||||||
'Not restarting process because monitor is stopped',
|
'Not restarting process because monitor is stopped',
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
},
|
};
|
||||||
);
|
this.processWrapper.on('exit', this.exitHandler);
|
||||||
|
|
||||||
this.processWrapper.on('error', async (error: Error | ProcessError): Promise<void> => {
|
this.errorHandler = async (error: Error | ProcessError): Promise<void> => {
|
||||||
const errorMsg =
|
const errorMsg =
|
||||||
error instanceof ProcessError
|
error instanceof ProcessError
|
||||||
? `Process error: ${error.toString()}`
|
? `Process error: ${error.toString()}`
|
||||||
@@ -181,6 +219,24 @@ export class ProcessMonitor extends EventEmitter {
|
|||||||
this.logger.error(error);
|
this.logger.error(error);
|
||||||
this.log(errorMsg);
|
this.log(errorMsg);
|
||||||
|
|
||||||
|
// Save crash log for errors
|
||||||
|
if (this.processId && this.config.name) {
|
||||||
|
try {
|
||||||
|
await this.crashLogManager.saveCrashLog(
|
||||||
|
this.processId,
|
||||||
|
this.config.name,
|
||||||
|
this.logs,
|
||||||
|
null, // no exit code for errors
|
||||||
|
null, // no signal for errors
|
||||||
|
this.restartCount,
|
||||||
|
this.lastMemoryUsage
|
||||||
|
);
|
||||||
|
this.logger.info(`Saved crash log for process ${this.config.name} due to error`);
|
||||||
|
} catch (crashLogError) {
|
||||||
|
this.logger.error(`Failed to save crash log: ${crashLogError}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Flush logs to disk on error
|
// Flush logs to disk on error
|
||||||
if (this.processId && this.logs.length > 0) {
|
if (this.processId && this.logs.length > 0) {
|
||||||
try {
|
try {
|
||||||
@@ -196,7 +252,8 @@ export class ProcessMonitor extends EventEmitter {
|
|||||||
} else {
|
} else {
|
||||||
this.logger.debug('Not restarting process because monitor is stopped');
|
this.logger.debug('Not restarting process because monitor is stopped');
|
||||||
}
|
}
|
||||||
});
|
};
|
||||||
|
this.processWrapper.on('error', this.errorHandler);
|
||||||
|
|
||||||
// Start the process
|
// Start the process
|
||||||
try {
|
try {
|
||||||
@@ -210,6 +267,31 @@ export class ProcessMonitor extends EventEmitter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clean up event listeners from process wrapper
|
||||||
|
*/
|
||||||
|
private cleanupListeners(): void {
|
||||||
|
if (this.processWrapper) {
|
||||||
|
if (this.logHandler) {
|
||||||
|
this.processWrapper.removeListener('log', this.logHandler);
|
||||||
|
}
|
||||||
|
if (this.startHandler) {
|
||||||
|
this.processWrapper.removeListener('start', this.startHandler);
|
||||||
|
}
|
||||||
|
if (this.exitHandler) {
|
||||||
|
this.processWrapper.removeListener('exit', this.exitHandler);
|
||||||
|
}
|
||||||
|
if (this.errorHandler) {
|
||||||
|
this.processWrapper.removeListener('error', this.errorHandler);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Clear references
|
||||||
|
this.logHandler = undefined;
|
||||||
|
this.startHandler = undefined;
|
||||||
|
this.exitHandler = undefined;
|
||||||
|
this.errorHandler = undefined;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Schedule a restart with incremental debounce and failure cutoff.
|
* Schedule a restart with incremental debounce and failure cutoff.
|
||||||
*/
|
*/
|
||||||
@@ -360,6 +442,14 @@ export class ProcessMonitor extends EventEmitter {
|
|||||||
this.logger.debug(
|
this.logger.debug(
|
||||||
`Total memory for process group: ${this.humanReadableBytes(totalMemory)}`,
|
`Total memory for process group: ${this.humanReadableBytes(totalMemory)}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Clear pidusage cache for all PIDs to prevent memory leaks
|
||||||
|
for (const pid of pids) {
|
||||||
|
try {
|
||||||
|
(plugins.pidusage as any)?.clear?.(pid);
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
|
||||||
resolve({ memory: totalMemory, cpu: totalCpu });
|
resolve({ memory: totalMemory, cpu: totalCpu });
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
@@ -387,6 +477,9 @@ export class ProcessMonitor extends EventEmitter {
|
|||||||
this.log('Stopping process monitor.');
|
this.log('Stopping process monitor.');
|
||||||
this.stopped = true;
|
this.stopped = true;
|
||||||
|
|
||||||
|
// Clean up event listeners
|
||||||
|
this.cleanupListeners();
|
||||||
|
|
||||||
// Flush logs to disk before stopping
|
// Flush logs to disk before stopping
|
||||||
if (this.processId && this.logs.length > 0) {
|
if (this.processId && this.logs.length > 0) {
|
||||||
try {
|
try {
|
||||||
|
@@ -23,6 +23,13 @@ export class ProcessWrapper extends EventEmitter {
|
|||||||
private runId: string = '';
|
private runId: string = '';
|
||||||
private stdoutRemainder: string = '';
|
private stdoutRemainder: string = '';
|
||||||
private stderrRemainder: string = '';
|
private stderrRemainder: string = '';
|
||||||
|
// Store event handlers for cleanup
|
||||||
|
private exitHandler?: (code: number | null, signal: string | null) => void;
|
||||||
|
private errorHandler?: (error: Error) => void;
|
||||||
|
private stdoutDataHandler?: (data: Buffer) => void;
|
||||||
|
private stdoutEndHandler?: () => void;
|
||||||
|
private stderrDataHandler?: (data: Buffer) => void;
|
||||||
|
private stderrEndHandler?: () => void;
|
||||||
|
|
||||||
// Helper: send a signal to the process and all its children (best-effort)
|
// Helper: send a signal to the process and all its children (best-effort)
|
||||||
private async killProcessTree(signal: NodeJS.Signals): Promise<void> {
|
private async killProcessTree(signal: NodeJS.Signals): Promise<void> {
|
||||||
@@ -84,7 +91,7 @@ export class ProcessWrapper extends EventEmitter {
|
|||||||
this.startTime = new Date();
|
this.startTime = new Date();
|
||||||
|
|
||||||
// Handle process exit
|
// Handle process exit
|
||||||
this.process.on('exit', (code, signal) => {
|
this.exitHandler = (code, signal) => {
|
||||||
const exitMessage = `Process exited with code ${code}, signal ${signal}`;
|
const exitMessage = `Process exited with code ${code}, signal ${signal}`;
|
||||||
this.logger.info(exitMessage);
|
this.logger.info(exitMessage);
|
||||||
this.addSystemLog(exitMessage);
|
this.addSystemLog(exitMessage);
|
||||||
@@ -97,10 +104,11 @@ export class ProcessWrapper extends EventEmitter {
|
|||||||
this.process = null;
|
this.process = null;
|
||||||
|
|
||||||
this.emit('exit', code, signal);
|
this.emit('exit', code, signal);
|
||||||
});
|
};
|
||||||
|
this.process.on('exit', this.exitHandler);
|
||||||
|
|
||||||
// Handle errors
|
// Handle errors
|
||||||
this.process.on('error', (error) => {
|
this.errorHandler = (error) => {
|
||||||
const processError = new ProcessError(
|
const processError = new ProcessError(
|
||||||
error.message,
|
error.message,
|
||||||
'ERR_PROCESS_EXECUTION',
|
'ERR_PROCESS_EXECUTION',
|
||||||
@@ -109,7 +117,8 @@ export class ProcessWrapper extends EventEmitter {
|
|||||||
this.logger.error(processError);
|
this.logger.error(processError);
|
||||||
this.addSystemLog(`Process error: ${processError.toString()}`);
|
this.addSystemLog(`Process error: ${processError.toString()}`);
|
||||||
this.emit('error', processError);
|
this.emit('error', processError);
|
||||||
});
|
};
|
||||||
|
this.process.on('error', this.errorHandler);
|
||||||
|
|
||||||
// Capture stdout
|
// Capture stdout
|
||||||
if (this.process.stdout) {
|
if (this.process.stdout) {
|
||||||
@@ -118,7 +127,7 @@ export class ProcessWrapper extends EventEmitter {
|
|||||||
`[ProcessWrapper] Setting up stdout listener for process ${this.process.pid}`,
|
`[ProcessWrapper] Setting up stdout listener for process ${this.process.pid}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
this.process.stdout.on('data', (data) => {
|
this.stdoutDataHandler = (data) => {
|
||||||
if (process.env.TSPM_DEBUG) {
|
if (process.env.TSPM_DEBUG) {
|
||||||
console.error(
|
console.error(
|
||||||
`[ProcessWrapper] Received stdout data from PID ${this.process?.pid}: ${data
|
`[ProcessWrapper] Received stdout data from PID ${this.process?.pid}: ${data
|
||||||
@@ -141,23 +150,25 @@ export class ProcessWrapper extends EventEmitter {
|
|||||||
this.logger.debug(`Captured stdout: ${line}`);
|
this.logger.debug(`Captured stdout: ${line}`);
|
||||||
this.addLog('stdout', line);
|
this.addLog('stdout', line);
|
||||||
}
|
}
|
||||||
});
|
};
|
||||||
|
this.process.stdout.on('data', this.stdoutDataHandler);
|
||||||
|
|
||||||
// Flush remainder on stream end
|
// Flush remainder on stream end
|
||||||
this.process.stdout.on('end', () => {
|
this.stdoutEndHandler = () => {
|
||||||
if (this.stdoutRemainder) {
|
if (this.stdoutRemainder) {
|
||||||
this.logger.debug(`Flushing stdout remainder: ${this.stdoutRemainder}`);
|
this.logger.debug(`Flushing stdout remainder: ${this.stdoutRemainder}`);
|
||||||
this.addLog('stdout', this.stdoutRemainder);
|
this.addLog('stdout', this.stdoutRemainder);
|
||||||
this.stdoutRemainder = '';
|
this.stdoutRemainder = '';
|
||||||
}
|
}
|
||||||
});
|
};
|
||||||
|
this.process.stdout.on('end', this.stdoutEndHandler);
|
||||||
} else {
|
} else {
|
||||||
this.logger.warn('Process stdout is null');
|
this.logger.warn('Process stdout is null');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Capture stderr
|
// Capture stderr
|
||||||
if (this.process.stderr) {
|
if (this.process.stderr) {
|
||||||
this.process.stderr.on('data', (data) => {
|
this.stderrDataHandler = (data) => {
|
||||||
// Add data to remainder buffer and split by newlines
|
// Add data to remainder buffer and split by newlines
|
||||||
const text = this.stderrRemainder + data.toString();
|
const text = this.stderrRemainder + data.toString();
|
||||||
const lines = text.split('\n');
|
const lines = text.split('\n');
|
||||||
@@ -169,15 +180,17 @@ export class ProcessWrapper extends EventEmitter {
|
|||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
this.addLog('stderr', line);
|
this.addLog('stderr', line);
|
||||||
}
|
}
|
||||||
});
|
};
|
||||||
|
this.process.stderr.on('data', this.stderrDataHandler);
|
||||||
|
|
||||||
// Flush remainder on stream end
|
// Flush remainder on stream end
|
||||||
this.process.stderr.on('end', () => {
|
this.stderrEndHandler = () => {
|
||||||
if (this.stderrRemainder) {
|
if (this.stderrRemainder) {
|
||||||
this.addLog('stderr', this.stderrRemainder);
|
this.addLog('stderr', this.stderrRemainder);
|
||||||
this.stderrRemainder = '';
|
this.stderrRemainder = '';
|
||||||
}
|
}
|
||||||
});
|
};
|
||||||
|
this.process.stderr.on('end', this.stderrEndHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
this.addSystemLog(`Process started with PID ${this.process.pid}`);
|
this.addSystemLog(`Process started with PID ${this.process.pid}`);
|
||||||
@@ -200,6 +213,46 @@ export class ProcessWrapper extends EventEmitter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clean up event listeners from process and streams
|
||||||
|
*/
|
||||||
|
private cleanupListeners(): void {
|
||||||
|
if (this.process) {
|
||||||
|
if (this.exitHandler) {
|
||||||
|
this.process.removeListener('exit', this.exitHandler);
|
||||||
|
}
|
||||||
|
if (this.errorHandler) {
|
||||||
|
this.process.removeListener('error', this.errorHandler);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.process.stdout) {
|
||||||
|
if (this.stdoutDataHandler) {
|
||||||
|
this.process.stdout.removeListener('data', this.stdoutDataHandler);
|
||||||
|
}
|
||||||
|
if (this.stdoutEndHandler) {
|
||||||
|
this.process.stdout.removeListener('end', this.stdoutEndHandler);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.process.stderr) {
|
||||||
|
if (this.stderrDataHandler) {
|
||||||
|
this.process.stderr.removeListener('data', this.stderrDataHandler);
|
||||||
|
}
|
||||||
|
if (this.stderrEndHandler) {
|
||||||
|
this.process.stderr.removeListener('end', this.stderrEndHandler);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear references
|
||||||
|
this.exitHandler = undefined;
|
||||||
|
this.errorHandler = undefined;
|
||||||
|
this.stdoutDataHandler = undefined;
|
||||||
|
this.stdoutEndHandler = undefined;
|
||||||
|
this.stderrDataHandler = undefined;
|
||||||
|
this.stderrEndHandler = undefined;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stop the wrapped process
|
* Stop the wrapped process
|
||||||
*/
|
*/
|
||||||
@@ -210,6 +263,9 @@ export class ProcessWrapper extends EventEmitter {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clean up event listeners before stopping
|
||||||
|
this.cleanupListeners();
|
||||||
|
|
||||||
this.logger.info('Stopping process...');
|
this.logger.info('Stopping process...');
|
||||||
this.addSystemLog('Stopping process...');
|
this.addSystemLog('Stopping process...');
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user