fix accumulation

This commit is contained in:
Juergen Kunz
2025-06-08 12:25:31 +00:00
parent 82a350bf51
commit dc3eda5e29
3 changed files with 153 additions and 4 deletions

View File

@ -674,3 +674,51 @@ if (!record.connectionClosed && record.outgoing && record.bytesReceived > 0 && r
- Both sockets are still alive (not destroyed)
This complements the zombie detection by handling cases where sockets remain technically alive but the connection is effectively dead.
## 🚨 CRITICAL FIX: Cleanup Queue Bug (January 2025)
### Critical Bug Found
The cleanup queue had a severe bug that caused connection accumulation when more than 100 connections needed cleanup:
```typescript
// BUG: This cleared the ENTIRE queue after processing only the first batch!
const toCleanup = Array.from(this.cleanupQueue).slice(0, this.cleanupBatchSize);
this.cleanupQueue.clear(); // ❌ This discarded all connections beyond the first 100!
```
### Fix Implemented
```typescript
// Now only removes the connections being processed
const toCleanup = Array.from(this.cleanupQueue).slice(0, this.cleanupBatchSize);
for (const connectionId of toCleanup) {
this.cleanupQueue.delete(connectionId); // ✅ Only remove what we process
const record = this.connectionRecords.get(connectionId);
if (record) {
this.cleanupConnection(record, record.incomingTerminationReason || 'normal');
}
}
```
### Impact
- **Before**: If 150 connections needed cleanup, only the first 100 would be processed and the remaining 50 would accumulate forever
- **After**: All connections are properly cleaned up in batches
### Additional Improvements
1. **Faster Inactivity Checks**: Reduced from 30s to 10s intervals
- Zombies and stuck connections are detected 3x faster
- Reduces the window for accumulation
2. **Duplicate Prevention**: Added check in queueCleanup to prevent processing already-closed connections
- Prevents unnecessary work
- Ensures connections are only cleaned up once
### Summary of All Fixes
1. **Connection Timeout** (already documented) - Prevents accumulation when backends are unreachable
2. **Zombie Detection** - Cleans up connections with destroyed sockets
3. **Stuck Connection Detection** - Cleans up connections to hanging backends
4. **Cleanup Queue Bug** - Ensures ALL connections get cleaned up, not just the first 100
5. **Faster Detection** - Reduced check interval from 30s to 10s
These fixes combined should prevent connection accumulation in all known scenarios.

View File

@ -0,0 +1,93 @@
import { expect, tap } from '@git.zone/tstest/tapbundle';
import { SmartProxy } from '../ts/index.js';
tap.test('cleanup queue bug - verify queue processing handles more than batch size', async (tools) => {
console.log('\n=== Cleanup Queue Bug Test ===');
console.log('Purpose: Verify that the cleanup queue correctly processes all connections');
console.log('even when there are more than the batch size (100)');
// Create proxy
const proxy = new SmartProxy({
routes: [{
name: 'test-route',
match: { ports: 8588 },
action: {
type: 'forward',
target: { host: 'localhost', port: 9996 }
}
}],
enableDetailedLogging: false,
});
await proxy.start();
console.log('✓ Proxy started on port 8588');
// Access connection manager
const cm = (proxy as any).connectionManager;
// Create mock connection records
console.log('\n--- Creating 150 mock connections ---');
const mockConnections: any[] = [];
for (let i = 0; i < 150; i++) {
const mockRecord = {
id: `mock-${i}`,
incoming: { destroyed: true, remoteAddress: '127.0.0.1' },
outgoing: { destroyed: true },
connectionClosed: false,
incomingStartTime: Date.now(),
lastActivity: Date.now(),
remoteIP: '127.0.0.1',
remotePort: 10000 + i,
localPort: 8588,
bytesReceived: 100,
bytesSent: 100,
incomingTerminationReason: null,
cleanupTimer: null
};
// Add to connection records
cm.connectionRecords.set(mockRecord.id, mockRecord);
mockConnections.push(mockRecord);
}
console.log(`Created ${cm.getConnectionCount()} mock connections`);
expect(cm.getConnectionCount()).toEqual(150);
// Queue all connections for cleanup
console.log('\n--- Queueing all connections for cleanup ---');
for (const conn of mockConnections) {
cm.initiateCleanupOnce(conn, 'test_cleanup');
}
console.log(`Cleanup queue size: ${cm.cleanupQueue.size}`);
expect(cm.cleanupQueue.size).toEqual(150);
// Wait for cleanup to complete
console.log('\n--- Waiting for cleanup batches to process ---');
// The first batch should process immediately (100 connections)
// Then additional batches should be scheduled
await new Promise(resolve => setTimeout(resolve, 500));
// Check final state
const finalCount = cm.getConnectionCount();
console.log(`\nFinal connection count: ${finalCount}`);
console.log(`Cleanup queue size: ${cm.cleanupQueue.size}`);
// All connections should be cleaned up
expect(finalCount).toEqual(0);
expect(cm.cleanupQueue.size).toEqual(0);
// Verify termination stats
const stats = cm.getTerminationStats();
console.log('Termination stats:', stats);
expect(stats.incoming.test_cleanup).toEqual(150);
// Cleanup
await proxy.stop();
console.log('\n✓ Test complete: Cleanup queue now correctly processes all connections');
});
tap.start();

View File

@ -140,10 +140,10 @@ export class ConnectionManager extends LifecycleComponent {
* Start the inactivity check timer
*/
private startInactivityCheckTimer(): void {
// Check every 30 seconds for connections that need inactivity check
// Check more frequently (every 10 seconds) to catch zombies and stuck connections faster
this.setInterval(() => {
this.performOptimizedInactivityCheck();
}, 30000);
}, 10000);
// Note: LifecycleComponent's setInterval already calls unref()
}
@ -194,6 +194,13 @@ export class ConnectionManager extends LifecycleComponent {
* Queue a connection for cleanup
*/
private queueCleanup(connectionId: string): void {
// Check if connection is already being processed
const record = this.connectionRecords.get(connectionId);
if (!record || record.connectionClosed) {
// Already cleaned up or doesn't exist, skip
return;
}
this.cleanupQueue.add(connectionId);
// Process immediately if queue is getting large
@ -217,9 +224,10 @@ export class ConnectionManager extends LifecycleComponent {
}
const toCleanup = Array.from(this.cleanupQueue).slice(0, this.cleanupBatchSize);
this.cleanupQueue.clear();
// Remove only the items we're processing, not the entire queue!
for (const connectionId of toCleanup) {
this.cleanupQueue.delete(connectionId);
const record = this.connectionRecords.get(connectionId);
if (record) {
this.cleanupConnection(record, record.incomingTerminationReason || 'normal');