Compare commits
13 Commits
Author | SHA1 | Date | |
---|---|---|---|
4c847fd3d7 | |||
2e11f9358c | |||
9bf15ff756 | |||
6726de277e | |||
dc3eda5e29 | |||
82a350bf51 | |||
890e907664 | |||
19590ef107 | |||
47735adbf2 | |||
9094b76b1b | |||
9aebcd488d | |||
311691c2cc | |||
578d1ba2f7 |
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@push.rocks/smartproxy",
|
||||
"version": "19.5.21",
|
||||
"version": "19.6.0",
|
||||
"private": false,
|
||||
"description": "A powerful proxy package with unified route-based configuration for high traffic management. Features include SSL/TLS support, flexible routing patterns, WebSocket handling, advanced security options, and automatic ACME certificate management.",
|
||||
"main": "dist_ts/index.js",
|
||||
@ -31,6 +31,7 @@
|
||||
"@push.rocks/smartnetwork": "^4.0.2",
|
||||
"@push.rocks/smartpromise": "^4.2.3",
|
||||
"@push.rocks/smartrequest": "^2.1.0",
|
||||
"@push.rocks/smartrx": "^3.0.10",
|
||||
"@push.rocks/smartstring": "^4.0.15",
|
||||
"@push.rocks/taskbuffer": "^3.1.7",
|
||||
"@tsclass/tsclass": "^9.2.0",
|
||||
|
13
pnpm-lock.yaml
generated
13
pnpm-lock.yaml
generated
@ -35,6 +35,9 @@ importers:
|
||||
'@push.rocks/smartrequest':
|
||||
specifier: ^2.1.0
|
||||
version: 2.1.0
|
||||
'@push.rocks/smartrx':
|
||||
specifier: ^3.0.10
|
||||
version: 3.0.10
|
||||
'@push.rocks/smartstring':
|
||||
specifier: ^4.0.15
|
||||
version: 4.0.15
|
||||
@ -977,9 +980,6 @@ packages:
|
||||
'@push.rocks/smartrx@3.0.10':
|
||||
resolution: {integrity: sha512-USjIYcsSfzn14cwOsxgq/bBmWDTTzy3ouWAnW5NdMyRRzEbmeNrvmy6TRqNeDlJ2PsYNTt1rr/zGUqvIy72ITg==}
|
||||
|
||||
'@push.rocks/smartrx@3.0.7':
|
||||
resolution: {integrity: sha512-qCWy0s3RLAgGSnaw/Gu0BNaJ59CsI6RK5OJDCCqxc7P2X/S755vuLtnAR5/0dEjdhCHXHX9ytPZx+o9g/CNiyA==}
|
||||
|
||||
'@push.rocks/smarts3@2.2.5':
|
||||
resolution: {integrity: sha512-OZjD0jBCUTJCLnwraxBcyZ3he5buXf2OEM1zipiTBChA2EcKUZWKk/a6KR5WT+NlFCIIuB23UG+U+cxsIWM91Q==}
|
||||
|
||||
@ -6131,11 +6131,6 @@ snapshots:
|
||||
'@push.rocks/smartpromise': 4.2.3
|
||||
rxjs: 7.8.2
|
||||
|
||||
'@push.rocks/smartrx@3.0.7':
|
||||
dependencies:
|
||||
'@push.rocks/smartpromise': 4.2.3
|
||||
rxjs: 7.8.2
|
||||
|
||||
'@push.rocks/smarts3@2.2.5':
|
||||
dependencies:
|
||||
'@push.rocks/smartbucket': 3.3.7
|
||||
@ -6301,7 +6296,7 @@ snapshots:
|
||||
'@push.rocks/smartenv': 5.0.12
|
||||
'@push.rocks/smartjson': 5.0.20
|
||||
'@push.rocks/smartpromise': 4.2.3
|
||||
'@push.rocks/smartrx': 3.0.7
|
||||
'@push.rocks/smartrx': 3.0.10
|
||||
'@tempfix/idb': 8.0.3
|
||||
fake-indexeddb: 5.0.2
|
||||
|
||||
|
724
readme.connections.md
Normal file
724
readme.connections.md
Normal file
@ -0,0 +1,724 @@
|
||||
# Connection Management in SmartProxy
|
||||
|
||||
This document describes connection handling, cleanup mechanisms, and known issues in SmartProxy, particularly focusing on proxy chain configurations.
|
||||
|
||||
## Connection Accumulation Investigation (January 2025)
|
||||
|
||||
### Problem Statement
|
||||
Connections may accumulate on the outer proxy in proxy chain configurations, despite implemented fixes.
|
||||
|
||||
### Historical Context
|
||||
- **v19.5.12-v19.5.15**: Major connection cleanup improvements
|
||||
- **v19.5.19+**: PROXY protocol support with WrappedSocket implementation
|
||||
- **v19.5.20**: Fixed race condition in immediate routing cleanup
|
||||
|
||||
### Current Architecture
|
||||
|
||||
#### Connection Flow in Proxy Chains
|
||||
```
|
||||
Client → Outer Proxy (8001) → Inner Proxy (8002) → Backend (httpbin.org:443)
|
||||
```
|
||||
|
||||
1. **Outer Proxy**:
|
||||
- Accepts client connection
|
||||
- Sends PROXY protocol header to inner proxy
|
||||
- Tracks connection in ConnectionManager
|
||||
- Immediate routing for non-TLS ports
|
||||
|
||||
2. **Inner Proxy**:
|
||||
- Parses PROXY protocol to get real client IP
|
||||
- Establishes connection to backend
|
||||
- Tracks its own connections separately
|
||||
|
||||
### Potential Causes of Connection Accumulation
|
||||
|
||||
#### 1. Race Condition in Immediate Routing
|
||||
When a connection is immediately routed (non-TLS ports), there's a timing window:
|
||||
```typescript
|
||||
// route-connection-handler.ts, line ~231
|
||||
this.routeConnection(socket, record, '', undefined);
|
||||
// Connection is routed before all setup is complete
|
||||
```
|
||||
|
||||
**Issue**: If client disconnects during backend connection setup, cleanup may not trigger properly.
|
||||
|
||||
#### 2. Outgoing Socket Assignment Timing
|
||||
Despite the fix in v19.5.20:
|
||||
```typescript
|
||||
// Line 1362 in setupDirectConnection
|
||||
record.outgoing = targetSocket;
|
||||
```
|
||||
There's still a window between socket creation and the `connect` event where cleanup might miss the outgoing socket.
|
||||
|
||||
#### 3. Batch Cleanup Delays
|
||||
ConnectionManager uses queued cleanup:
|
||||
- Batch size: 100 connections
|
||||
- Batch interval: 100ms
|
||||
- Under rapid connection/disconnection, queue might lag
|
||||
|
||||
#### 4. Different Cleanup Paths
|
||||
Multiple cleanup triggers exist:
|
||||
- Socket 'close' event
|
||||
- Socket 'error' event
|
||||
- Inactivity timeout
|
||||
- Connection timeout
|
||||
- Manual cleanup
|
||||
|
||||
Not all paths may properly handle proxy chain scenarios.
|
||||
|
||||
#### 5. Keep-Alive Connection Handling
|
||||
Keep-alive connections have special treatment:
|
||||
- Extended inactivity timeout (6x normal)
|
||||
- Warning before closure
|
||||
- May accumulate if backend is unresponsive
|
||||
|
||||
### Observed Symptoms
|
||||
|
||||
1. **Outer proxy connection count grows over time**
|
||||
2. **Inner proxy maintains zero or low connection count**
|
||||
3. **Connections show as closed in logs but remain in tracking**
|
||||
4. **Memory usage gradually increases**
|
||||
|
||||
### Debug Strategies
|
||||
|
||||
#### 1. Enhanced Logging
|
||||
Add connection state logging at key points:
|
||||
```typescript
|
||||
// When outgoing socket is created
|
||||
logger.log('debug', `Outgoing socket created for ${connectionId}`, {
|
||||
hasOutgoing: !!record.outgoing,
|
||||
outgoingState: record.outgoing?.readyState
|
||||
});
|
||||
```
|
||||
|
||||
#### 2. Connection State Inspection
|
||||
Periodically log detailed connection state:
|
||||
```typescript
|
||||
for (const [id, record] of connectionManager.getConnections()) {
|
||||
console.log({
|
||||
id,
|
||||
age: Date.now() - record.incomingStartTime,
|
||||
incomingDestroyed: record.incoming.destroyed,
|
||||
outgoingDestroyed: record.outgoing?.destroyed,
|
||||
hasCleanupTimer: !!record.cleanupTimer
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
#### 3. Cleanup Verification
|
||||
Track cleanup completion:
|
||||
```typescript
|
||||
// In cleanupConnection
|
||||
logger.log('debug', `Cleanup completed for ${record.id}`, {
|
||||
recordsRemaining: this.connectionRecords.size
|
||||
});
|
||||
```
|
||||
|
||||
### Recommendations
|
||||
|
||||
1. **Immediate Cleanup for Proxy Chains**
|
||||
- Skip batch queue for proxy chain connections
|
||||
- Use synchronous cleanup when PROXY protocol is detected
|
||||
|
||||
2. **Socket State Validation**
|
||||
- Check both `destroyed` and `readyState` before cleanup decisions
|
||||
- Handle 'opening' state sockets explicitly
|
||||
|
||||
3. **Timeout Adjustments**
|
||||
- Shorter timeouts for proxy chain connections
|
||||
- More aggressive cleanup for connections without data transfer
|
||||
|
||||
4. **Connection Limits**
|
||||
- Per-route connection limits
|
||||
- Backpressure when approaching limits
|
||||
|
||||
5. **Monitoring**
|
||||
- Export connection metrics
|
||||
- Alert on connection count thresholds
|
||||
- Track connection age distribution
|
||||
|
||||
### Test Scenarios to Reproduce
|
||||
|
||||
1. **Rapid Connect/Disconnect**
|
||||
```bash
|
||||
# Create many short-lived connections
|
||||
for i in {1..1000}; do
|
||||
(echo -n | nc localhost 8001) &
|
||||
done
|
||||
```
|
||||
|
||||
2. **Slow Backend**
|
||||
- Configure inner proxy to connect to unresponsive backend
|
||||
- Monitor outer proxy connection count
|
||||
|
||||
3. **Mixed Traffic**
|
||||
- Combine TLS and non-TLS connections
|
||||
- Add keep-alive connections
|
||||
- Observe accumulation patterns
|
||||
|
||||
### Future Improvements
|
||||
|
||||
1. **Connection Pool Isolation**
|
||||
- Separate pools for proxy chain vs direct connections
|
||||
- Different cleanup strategies per pool
|
||||
|
||||
2. **Circuit Breaker**
|
||||
- Detect accumulation and trigger aggressive cleanup
|
||||
- Temporary refuse new connections when near limit
|
||||
|
||||
3. **Connection State Machine**
|
||||
- Explicit states: CONNECTING, ESTABLISHED, CLOSING, CLOSED
|
||||
- State transition validation
|
||||
- Timeout per state
|
||||
|
||||
4. **Metrics Collection**
|
||||
- Connection lifecycle events
|
||||
- Cleanup success/failure rates
|
||||
- Time spent in each state
|
||||
|
||||
### Root Cause Identified (January 2025)
|
||||
|
||||
**The primary issue is on the inner proxy when backends are unreachable:**
|
||||
|
||||
When the backend is unreachable (e.g., non-routable IP like 10.255.255.1):
|
||||
1. The outgoing socket gets stuck in "opening" state indefinitely
|
||||
2. The `createSocketWithErrorHandler` in socket-utils.ts doesn't implement connection timeout
|
||||
3. `socket.setTimeout()` only handles inactivity AFTER connection, not during connect phase
|
||||
4. Connections accumulate because they never transition to error state
|
||||
5. Socket timeout warnings fire but connections are preserved as keep-alive
|
||||
|
||||
**Code Issue:**
|
||||
```typescript
|
||||
// socket-utils.ts line 275
|
||||
if (timeout) {
|
||||
socket.setTimeout(timeout); // This only handles inactivity, not connection!
|
||||
}
|
||||
```
|
||||
|
||||
**Required Fix:**
|
||||
|
||||
1. Add `connectionTimeout` to ISmartProxyOptions interface:
|
||||
```typescript
|
||||
// In interfaces.ts
|
||||
connectionTimeout?: number; // Timeout for establishing connection (ms), default: 30000 (30s)
|
||||
```
|
||||
|
||||
2. Update `createSocketWithErrorHandler` in socket-utils.ts:
|
||||
```typescript
|
||||
export function createSocketWithErrorHandler(options: SafeSocketOptions): plugins.net.Socket {
|
||||
const { port, host, onError, onConnect, timeout } = options;
|
||||
|
||||
const socket = new plugins.net.Socket();
|
||||
let connected = false;
|
||||
let connectionTimeout: NodeJS.Timeout | null = null;
|
||||
|
||||
socket.on('error', (error) => {
|
||||
if (connectionTimeout) {
|
||||
clearTimeout(connectionTimeout);
|
||||
connectionTimeout = null;
|
||||
}
|
||||
if (onError) onError(error);
|
||||
});
|
||||
|
||||
socket.on('connect', () => {
|
||||
connected = true;
|
||||
if (connectionTimeout) {
|
||||
clearTimeout(connectionTimeout);
|
||||
connectionTimeout = null;
|
||||
}
|
||||
if (timeout) socket.setTimeout(timeout); // Set inactivity timeout
|
||||
if (onConnect) onConnect();
|
||||
});
|
||||
|
||||
// Implement connection establishment timeout
|
||||
if (timeout) {
|
||||
connectionTimeout = setTimeout(() => {
|
||||
if (!connected && !socket.destroyed) {
|
||||
const error = new Error(`Connection timeout after ${timeout}ms to ${host}:${port}`);
|
||||
(error as any).code = 'ETIMEDOUT';
|
||||
socket.destroy();
|
||||
if (onError) onError(error);
|
||||
}
|
||||
}, timeout);
|
||||
}
|
||||
|
||||
socket.connect(port, host);
|
||||
return socket;
|
||||
}
|
||||
```
|
||||
|
||||
3. Pass connectionTimeout in route-connection-handler.ts:
|
||||
```typescript
|
||||
const targetSocket = createSocketWithErrorHandler({
|
||||
port: finalTargetPort,
|
||||
host: finalTargetHost,
|
||||
timeout: this.settings.connectionTimeout || 30000, // Connection timeout
|
||||
onError: (error) => { /* existing */ },
|
||||
onConnect: async () => { /* existing */ }
|
||||
});
|
||||
```
|
||||
|
||||
### Investigation Results (January 2025)
|
||||
|
||||
Based on extensive testing with debug scripts:
|
||||
|
||||
1. **Normal Operation**: In controlled tests, connections are properly cleaned up:
|
||||
- Immediate routing cleanup handler properly destroys outgoing connections
|
||||
- Both outer and inner proxies maintain 0 connections after clients disconnect
|
||||
- Keep-alive connections are tracked and cleaned up correctly
|
||||
|
||||
2. **Potential Edge Cases Not Covered by Tests**:
|
||||
- **HTTP/2 Connections**: May have different lifecycle than HTTP/1.1
|
||||
- **WebSocket Connections**: Long-lived upgrade connections might persist
|
||||
- **Partial TLS Handshakes**: Connections that start TLS but don't complete
|
||||
- **PROXY Protocol Parse Failures**: Malformed headers from untrusted sources
|
||||
- **Connection Pool Reuse**: HttpProxy component may maintain its own pools
|
||||
|
||||
3. **Timing-Sensitive Scenarios**:
|
||||
- Client disconnects exactly when `record.outgoing` is being assigned
|
||||
- Backend connects but immediately RSTs
|
||||
- Proxy chain where middle proxy restarts
|
||||
- Multiple rapid reconnects with same source IP/port
|
||||
|
||||
4. **Configuration-Specific Issues**:
|
||||
- Mixed `sendProxyProtocol` settings in chain
|
||||
- Different `keepAlive` settings between proxies
|
||||
- Mismatched timeout values
|
||||
- Routes with `forwardingEngine: 'nftables'`
|
||||
|
||||
### Additional Debug Points
|
||||
|
||||
Add these debug logs to identify the specific scenario:
|
||||
|
||||
```typescript
|
||||
// In route-connection-handler.ts setupDirectConnection
|
||||
logger.log('debug', `Setting outgoing socket for ${connectionId}`, {
|
||||
timestamp: Date.now(),
|
||||
hasOutgoing: !!record.outgoing,
|
||||
socketState: targetSocket.readyState
|
||||
});
|
||||
|
||||
// In connection-manager.ts cleanupConnection
|
||||
logger.log('debug', `Cleanup attempt for ${record.id}`, {
|
||||
alreadyClosed: record.connectionClosed,
|
||||
hasIncoming: !!record.incoming,
|
||||
hasOutgoing: !!record.outgoing,
|
||||
incomingDestroyed: record.incoming?.destroyed,
|
||||
outgoingDestroyed: record.outgoing?.destroyed
|
||||
});
|
||||
```
|
||||
|
||||
### Workarounds
|
||||
|
||||
Until root cause is identified:
|
||||
|
||||
1. **Periodic Force Cleanup**:
|
||||
```typescript
|
||||
setInterval(() => {
|
||||
const connections = connectionManager.getConnections();
|
||||
for (const [id, record] of connections) {
|
||||
if (record.incoming?.destroyed && !record.connectionClosed) {
|
||||
connectionManager.cleanupConnection(record, 'force_cleanup');
|
||||
}
|
||||
}
|
||||
}, 60000); // Every minute
|
||||
```
|
||||
|
||||
2. **Connection Age Limit**:
|
||||
```typescript
|
||||
// Add max connection age check
|
||||
const maxAge = 3600000; // 1 hour
|
||||
if (Date.now() - record.incomingStartTime > maxAge) {
|
||||
connectionManager.cleanupConnection(record, 'max_age');
|
||||
}
|
||||
```
|
||||
|
||||
3. **Aggressive Timeout Settings**:
|
||||
```typescript
|
||||
{
|
||||
socketTimeout: 60000, // 1 minute
|
||||
inactivityTimeout: 300000, // 5 minutes
|
||||
connectionCleanupInterval: 30000 // 30 seconds
|
||||
}
|
||||
```
|
||||
|
||||
### Related Files
|
||||
- `/ts/proxies/smart-proxy/route-connection-handler.ts` - Main connection handling
|
||||
- `/ts/proxies/smart-proxy/connection-manager.ts` - Connection tracking and cleanup
|
||||
- `/ts/core/utils/socket-utils.ts` - Socket cleanup utilities
|
||||
- `/test/test.proxy-chain-cleanup.node.ts` - Test for connection cleanup
|
||||
- `/test/test.proxy-chaining-accumulation.node.ts` - Test for accumulation prevention
|
||||
- `/.nogit/debug/connection-accumulation-debug.ts` - Debug script for connection states
|
||||
- `/.nogit/debug/connection-accumulation-keepalive.ts` - Keep-alive specific tests
|
||||
- `/.nogit/debug/connection-accumulation-http.ts` - HTTP traffic through proxy chains
|
||||
|
||||
### Summary
|
||||
|
||||
**Issue Identified**: Connection accumulation occurs on the **inner proxy** (not outer) when backends are unreachable.
|
||||
|
||||
**Root Cause**: The `createSocketWithErrorHandler` function in socket-utils.ts doesn't implement connection establishment timeout. It only sets `socket.setTimeout()` which handles inactivity AFTER connection is established, not during the connect phase.
|
||||
|
||||
**Impact**: When connecting to unreachable IPs (e.g., 10.255.255.1), outgoing sockets remain in "opening" state indefinitely, causing connections to accumulate.
|
||||
|
||||
**Fix Required**:
|
||||
1. Add `connectionTimeout` setting to ISmartProxyOptions
|
||||
2. Implement proper connection timeout in `createSocketWithErrorHandler`
|
||||
3. Pass the timeout value from route-connection-handler
|
||||
|
||||
**Workaround Until Fixed**: Configure shorter socket timeouts and use the periodic force cleanup suggested above.
|
||||
|
||||
The connection cleanup mechanisms have been significantly improved in v19.5.20:
|
||||
1. Race condition fixed by setting `record.outgoing` before connecting
|
||||
2. Immediate routing cleanup handler always destroys outgoing connections
|
||||
3. Tests confirm no accumulation in standard scenarios with reachable backends
|
||||
|
||||
However, the missing connection establishment timeout causes accumulation when backends are unreachable or very slow to connect.
|
||||
|
||||
### Outer Proxy Sudden Accumulation After Hours
|
||||
|
||||
**User Report**: "The counter goes up suddenly after some hours on the outer proxy"
|
||||
|
||||
**Investigation Findings**:
|
||||
|
||||
1. **Cleanup Queue Mechanism**:
|
||||
- Connections are cleaned up in batches of 100 via a queue
|
||||
- If the cleanup timer gets stuck or cleared without restart, connections accumulate
|
||||
- The timer is set with `setTimeout` and could be affected by event loop blocking
|
||||
|
||||
2. **Potential Causes for Sudden Spikes**:
|
||||
|
||||
a) **Cleanup Timer Failure**:
|
||||
```typescript
|
||||
// In ConnectionManager, if this timer gets cleared but not restarted:
|
||||
this.cleanupTimer = this.setTimeout(() => {
|
||||
this.processCleanupQueue();
|
||||
}, 100);
|
||||
```
|
||||
|
||||
b) **Memory Pressure**:
|
||||
- After hours of operation, memory fragmentation or pressure could cause delays
|
||||
- Garbage collection pauses might interfere with timer execution
|
||||
|
||||
c) **Event Listener Accumulation**:
|
||||
- Socket event listeners might accumulate over time
|
||||
- Server 'connection' event handlers are particularly important
|
||||
|
||||
d) **Keep-Alive Connection Cascades**:
|
||||
- When many keep-alive connections timeout simultaneously
|
||||
- Outer proxy has different timeout than inner proxy
|
||||
- Mass disconnection events can overwhelm cleanup queue
|
||||
|
||||
e) **HttpProxy Component Issues**:
|
||||
- If using `useHttpProxy`, the HttpProxy bridge might maintain connection pools
|
||||
- These pools might not be properly cleaned after hours
|
||||
|
||||
3. **Why "Sudden" After Hours**:
|
||||
- Not a gradual leak but triggered by specific conditions
|
||||
- Likely related to periodic events or thresholds:
|
||||
- Inactivity check runs every 30 seconds
|
||||
- Keep-alive connections have extended timeouts (6x normal)
|
||||
- Parity check has 30-minute timeout for half-closed connections
|
||||
|
||||
4. **Reproduction Scenarios**:
|
||||
- Mass client disconnection/reconnection (network blip)
|
||||
- Keep-alive timeout cascade when inner proxy times out first
|
||||
- Cleanup timer getting stuck during high load
|
||||
- Memory pressure causing event loop delays
|
||||
|
||||
### Additional Monitoring Recommendations
|
||||
|
||||
1. **Add Cleanup Queue Monitoring**:
|
||||
```typescript
|
||||
setInterval(() => {
|
||||
const cm = proxy.connectionManager;
|
||||
if (cm.cleanupQueue.size > 100 && !cm.cleanupTimer) {
|
||||
logger.error('Cleanup queue stuck!', {
|
||||
queueSize: cm.cleanupQueue.size,
|
||||
hasTimer: !!cm.cleanupTimer
|
||||
});
|
||||
}
|
||||
}, 60000);
|
||||
```
|
||||
|
||||
2. **Track Timer Health**:
|
||||
- Monitor if cleanup timer is running
|
||||
- Check for event loop blocking
|
||||
- Log when batch processing takes too long
|
||||
|
||||
3. **Memory Monitoring**:
|
||||
- Track heap usage over time
|
||||
- Monitor for memory leaks in long-running processes
|
||||
- Force periodic garbage collection if needed
|
||||
|
||||
### Immediate Mitigations
|
||||
|
||||
1. **Restart Cleanup Timer**:
|
||||
```typescript
|
||||
// Emergency cleanup timer restart
|
||||
if (!cm.cleanupTimer && cm.cleanupQueue.size > 0) {
|
||||
cm.cleanupTimer = setTimeout(() => {
|
||||
cm.processCleanupQueue();
|
||||
}, 100);
|
||||
}
|
||||
```
|
||||
|
||||
2. **Force Periodic Cleanup**:
|
||||
```typescript
|
||||
setInterval(() => {
|
||||
const cm = connectionManager;
|
||||
if (cm.getConnectionCount() > threshold) {
|
||||
cm.performOptimizedInactivityCheck();
|
||||
// Force process cleanup queue
|
||||
cm.processCleanupQueue();
|
||||
}
|
||||
}, 300000); // Every 5 minutes
|
||||
```
|
||||
|
||||
3. **Connection Age Limits**:
|
||||
- Set maximum connection lifetime
|
||||
- Force close connections older than threshold
|
||||
- More aggressive cleanup for proxy chains
|
||||
|
||||
## ✅ FIXED: Zombie Connection Detection (January 2025)
|
||||
|
||||
### Root Cause Identified
|
||||
"Zombie connections" occur when sockets are destroyed without triggering their close/error event handlers. This causes connections to remain tracked with both sockets destroyed but `connectionClosed=false`. This is particularly problematic in proxy chains where the inner proxy might close connections in ways that don't trigger proper events on the outer proxy.
|
||||
|
||||
### Fix Implemented
|
||||
Added zombie detection to the periodic inactivity check in ConnectionManager:
|
||||
|
||||
```typescript
|
||||
// In performOptimizedInactivityCheck()
|
||||
// Check ALL connections for zombie state
|
||||
for (const [connectionId, record] of this.connectionRecords) {
|
||||
if (!record.connectionClosed) {
|
||||
const incomingDestroyed = record.incoming?.destroyed || false;
|
||||
const outgoingDestroyed = record.outgoing?.destroyed || false;
|
||||
|
||||
// Check for zombie connections: both sockets destroyed but not cleaned up
|
||||
if (incomingDestroyed && outgoingDestroyed) {
|
||||
logger.log('warn', `Zombie connection detected: ${connectionId} - both sockets destroyed but not cleaned up`, {
|
||||
connectionId,
|
||||
remoteIP: record.remoteIP,
|
||||
age: plugins.prettyMs(now - record.incomingStartTime),
|
||||
component: 'connection-manager'
|
||||
});
|
||||
|
||||
// Clean up immediately
|
||||
this.cleanupConnection(record, 'zombie_cleanup');
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for half-zombie: one socket destroyed
|
||||
if (incomingDestroyed || outgoingDestroyed) {
|
||||
const age = now - record.incomingStartTime;
|
||||
// Give it 30 seconds grace period for normal cleanup
|
||||
if (age > 30000) {
|
||||
logger.log('warn', `Half-zombie connection detected: ${connectionId} - ${incomingDestroyed ? 'incoming' : 'outgoing'} destroyed`, {
|
||||
connectionId,
|
||||
remoteIP: record.remoteIP,
|
||||
age: plugins.prettyMs(age),
|
||||
incomingDestroyed,
|
||||
outgoingDestroyed,
|
||||
component: 'connection-manager'
|
||||
});
|
||||
|
||||
// Clean up
|
||||
this.cleanupConnection(record, 'half_zombie_cleanup');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### How It Works
|
||||
1. **Full Zombie Detection**: Detects when both incoming and outgoing sockets are destroyed but the connection hasn't been cleaned up
|
||||
2. **Half-Zombie Detection**: Detects when only one socket is destroyed, with a 30-second grace period for normal cleanup to occur
|
||||
3. **Automatic Cleanup**: Immediately cleans up zombie connections when detected
|
||||
4. **Runs Periodically**: Integrated into the existing inactivity check that runs every 30 seconds
|
||||
|
||||
### Why This Fixes the Outer Proxy Accumulation
|
||||
- When inner proxy closes connections abruptly (e.g., due to backend failure), the outer proxy's outgoing socket might be destroyed without firing close/error events
|
||||
- These become zombie connections that previously accumulated indefinitely
|
||||
- Now they are detected and cleaned up within 30 seconds
|
||||
|
||||
### Test Results
|
||||
Debug scripts confirmed:
|
||||
- Zombie connections can be created when sockets are destroyed directly without events
|
||||
- The zombie detection successfully identifies and cleans up these connections
|
||||
- Both full zombies (both sockets destroyed) and half-zombies (one socket destroyed) are handled
|
||||
|
||||
This fix addresses the specific issue where "connections that are closed on the inner proxy, always also close on the outer proxy" as requested by the user.
|
||||
|
||||
## 🔍 Production Diagnostics (January 2025)
|
||||
|
||||
Since the zombie detection fix didn't fully resolve the issue, use the ProductionConnectionMonitor to diagnose the actual problem:
|
||||
|
||||
### How to Use the Production Monitor
|
||||
|
||||
1. **Add to your proxy startup script**:
|
||||
```typescript
|
||||
import ProductionConnectionMonitor from './production-connection-monitor.js';
|
||||
|
||||
// After proxy.start()
|
||||
const monitor = new ProductionConnectionMonitor(proxy);
|
||||
monitor.start(5000); // Check every 5 seconds
|
||||
|
||||
// Monitor will automatically capture diagnostics when:
|
||||
// - Connections exceed threshold (default: 50)
|
||||
// - Sudden spike occurs (default: +20 connections)
|
||||
```
|
||||
|
||||
2. **Diagnostics are saved to**: `.nogit/connection-diagnostics/`
|
||||
|
||||
3. **Force capture anytime**: `monitor.forceCaptureNow()`
|
||||
|
||||
### What the Monitor Captures
|
||||
|
||||
For each connection:
|
||||
- Socket states (destroyed, readable, writable, readyState)
|
||||
- Connection flags (closed, keepAlive, TLS status)
|
||||
- Data transfer statistics
|
||||
- Time since last activity
|
||||
- Cleanup queue status
|
||||
- Event listener counts
|
||||
- Termination reasons
|
||||
|
||||
### Pattern Analysis
|
||||
|
||||
The monitor automatically identifies:
|
||||
- **Zombie connections**: Both sockets destroyed but not cleaned up
|
||||
- **Half-zombies**: One socket destroyed
|
||||
- **Stuck connecting**: Outgoing socket stuck in connecting state
|
||||
- **No outgoing**: Missing outgoing socket
|
||||
- **Keep-alive stuck**: Keep-alive connections with no recent activity
|
||||
- **Old connections**: Connections older than 1 hour
|
||||
- **No data transfer**: Connections with no bytes transferred
|
||||
- **Listener leaks**: Excessive event listeners
|
||||
|
||||
### Common Accumulation Patterns
|
||||
|
||||
1. **Connecting State Stuck**
|
||||
- Outgoing socket shows `connecting: true` indefinitely
|
||||
- Usually means connection timeout not working
|
||||
- Check if backend is reachable
|
||||
|
||||
2. **Missing Outgoing Socket**
|
||||
- Connection has no outgoing socket but isn't closed
|
||||
- May indicate immediate routing issues
|
||||
- Check error logs during connection setup
|
||||
|
||||
3. **Event Listener Accumulation**
|
||||
- High listener counts (>20) on sockets
|
||||
- Indicates cleanup not removing all listeners
|
||||
- Can cause memory leaks
|
||||
|
||||
4. **Keep-Alive Zombies**
|
||||
- Keep-alive connections not timing out
|
||||
- Check keepAlive timeout settings
|
||||
- May need more aggressive cleanup
|
||||
|
||||
### Next Steps
|
||||
|
||||
1. **Run the monitor in production** during accumulation
|
||||
2. **Share the diagnostic files** from `.nogit/connection-diagnostics/`
|
||||
3. **Look for patterns** in the captured snapshots
|
||||
4. **Check specific connection IDs** that accumulate
|
||||
|
||||
The diagnostic files will show exactly what state connections are in when accumulation occurs, allowing targeted fixes for the specific issue.
|
||||
|
||||
## ✅ FIXED: Stuck Connection Detection (January 2025)
|
||||
|
||||
### Additional Root Cause Found
|
||||
Connections to hanging backends (that accept but never respond) were not being cleaned up because:
|
||||
- Both sockets remain alive (not destroyed)
|
||||
- Keep-alive prevents normal timeout
|
||||
- No data is sent back to the client despite receiving data
|
||||
- These don't qualify as "zombies" since sockets aren't destroyed
|
||||
|
||||
### Fix Implemented
|
||||
Added stuck connection detection to the periodic inactivity check:
|
||||
|
||||
```typescript
|
||||
// Check for stuck connections: no data sent back to client
|
||||
if (!record.connectionClosed && record.outgoing && record.bytesReceived > 0 && record.bytesSent === 0) {
|
||||
const age = now - record.incomingStartTime;
|
||||
// If connection is older than 60 seconds and no data sent back, likely stuck
|
||||
if (age > 60000) {
|
||||
logger.log('warn', `Stuck connection detected: ${connectionId} - received ${record.bytesReceived} bytes but sent 0 bytes`, {
|
||||
connectionId,
|
||||
remoteIP: record.remoteIP,
|
||||
age: plugins.prettyMs(age),
|
||||
bytesReceived: record.bytesReceived,
|
||||
targetHost: record.targetHost,
|
||||
targetPort: record.targetPort,
|
||||
component: 'connection-manager'
|
||||
});
|
||||
|
||||
// Clean up
|
||||
this.cleanupConnection(record, 'stuck_no_response');
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### What This Fixes
|
||||
- Connections to backends that accept but never respond
|
||||
- Proxy chains where inner proxy connects to unresponsive services
|
||||
- Scenarios where keep-alive prevents normal timeout mechanisms
|
||||
- Connections that receive client data but never send anything back
|
||||
|
||||
### Detection Criteria
|
||||
- Connection has received bytes from client (`bytesReceived > 0`)
|
||||
- No bytes sent back to client (`bytesSent === 0`)
|
||||
- Connection is older than 60 seconds
|
||||
- Both sockets are still alive (not destroyed)
|
||||
|
||||
This complements the zombie detection by handling cases where sockets remain technically alive but the connection is effectively dead.
|
||||
|
||||
## 🚨 CRITICAL FIX: Cleanup Queue Bug (January 2025)
|
||||
|
||||
### Critical Bug Found
|
||||
The cleanup queue had a severe bug that caused connection accumulation when more than 100 connections needed cleanup:
|
||||
|
||||
```typescript
|
||||
// BUG: This cleared the ENTIRE queue after processing only the first batch!
|
||||
const toCleanup = Array.from(this.cleanupQueue).slice(0, this.cleanupBatchSize);
|
||||
this.cleanupQueue.clear(); // ❌ This discarded all connections beyond the first 100!
|
||||
```
|
||||
|
||||
### Fix Implemented
|
||||
```typescript
|
||||
// Now only removes the connections being processed
|
||||
const toCleanup = Array.from(this.cleanupQueue).slice(0, this.cleanupBatchSize);
|
||||
for (const connectionId of toCleanup) {
|
||||
this.cleanupQueue.delete(connectionId); // ✅ Only remove what we process
|
||||
const record = this.connectionRecords.get(connectionId);
|
||||
if (record) {
|
||||
this.cleanupConnection(record, record.incomingTerminationReason || 'normal');
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Impact
|
||||
- **Before**: If 150 connections needed cleanup, only the first 100 would be processed and the remaining 50 would accumulate forever
|
||||
- **After**: All connections are properly cleaned up in batches
|
||||
|
||||
### Additional Improvements
|
||||
|
||||
1. **Faster Inactivity Checks**: Reduced from 30s to 10s intervals
|
||||
- Zombies and stuck connections are detected 3x faster
|
||||
- Reduces the window for accumulation
|
||||
|
||||
2. **Duplicate Prevention**: Added check in queueCleanup to prevent processing already-closed connections
|
||||
- Prevents unnecessary work
|
||||
- Ensures connections are only cleaned up once
|
||||
|
||||
### Summary of All Fixes
|
||||
|
||||
1. **Connection Timeout** (already documented) - Prevents accumulation when backends are unreachable
|
||||
2. **Zombie Detection** - Cleans up connections with destroyed sockets
|
||||
3. **Stuck Connection Detection** - Cleans up connections to hanging backends
|
||||
4. **Cleanup Queue Bug** - Ensures ALL connections get cleaned up, not just the first 100
|
||||
5. **Faster Detection** - Reduced check interval from 30s to 10s
|
||||
|
||||
These fixes combined should prevent connection accumulation in all known scenarios.
|
@ -856,4 +856,42 @@ The WrappedSocket class has been implemented as the foundation for PROXY protoco
|
||||
For detailed information about proxy protocol implementation and proxy chaining:
|
||||
- **[Proxy Protocol Guide](./readme.proxy-protocol.md)** - Complete implementation details and configuration
|
||||
- **[Proxy Protocol Examples](./readme.proxy-protocol-example.md)** - Code examples and conceptual implementation
|
||||
- **[Proxy Chain Summary](./readme.proxy-chain-summary.md)** - Quick reference for proxy chaining setup
|
||||
- **[Proxy Chain Summary](./readme.proxy-chain-summary.md)** - Quick reference for proxy chaining setup
|
||||
|
||||
## Connection Cleanup Edge Cases Investigation (v19.5.20+)
|
||||
|
||||
### Issue Discovered
|
||||
"Zombie connections" can occur when both sockets are destroyed but the connection record hasn't been cleaned up. This happens when sockets are destroyed without triggering their close/error event handlers.
|
||||
|
||||
### Root Cause
|
||||
1. **Event Handler Bypass**: In edge cases (network failures, proxy chain failures, forced socket destruction), sockets can be destroyed without their event handlers being called
|
||||
2. **Cleanup Queue Delay**: The `initiateCleanupOnce` method adds connections to a cleanup queue (batch of 100 every 100ms), which may not process fast enough
|
||||
3. **Inactivity Check Limitation**: The periodic inactivity check only examines `lastActivity` timestamps, not actual socket states
|
||||
|
||||
### Test Results
|
||||
Debug script (`connection-manager-direct-test.ts`) revealed:
|
||||
- **Normal cleanup works**: When socket events fire normally, cleanup is reliable
|
||||
- **Zombies ARE created**: Direct socket destruction creates zombies (destroyed sockets, connectionClosed=false)
|
||||
- **Manual cleanup works**: Calling `initiateCleanupOnce` on a zombie does clean it up
|
||||
- **Inactivity check misses zombies**: The check doesn't detect connections with destroyed sockets
|
||||
|
||||
### Potential Solutions
|
||||
1. **Periodic Zombie Detection**: Add zombie detection to the inactivity check:
|
||||
```typescript
|
||||
// In performOptimizedInactivityCheck
|
||||
if (record.incoming?.destroyed && record.outgoing?.destroyed && !record.connectionClosed) {
|
||||
this.cleanupConnection(record, 'zombie_detected');
|
||||
}
|
||||
```
|
||||
|
||||
2. **Socket State Monitoring**: Check socket states during connection operations
|
||||
3. **Defensive Socket Handling**: Always attach cleanup handlers before any operation that might destroy sockets
|
||||
4. **Immediate Cleanup Option**: For critical paths, use `cleanupConnection` instead of `initiateCleanupOnce`
|
||||
|
||||
### Impact
|
||||
- Memory leaks in edge cases (network failures, proxy chain issues)
|
||||
- Connection count inaccuracy
|
||||
- Potential resource exhaustion over time
|
||||
|
||||
### Test Files
|
||||
- `.nogit/debug/connection-manager-direct-test.ts` - Direct ConnectionManager testing showing zombie creation
|
118
readme.md
118
readme.md
@ -919,6 +919,124 @@ Available helper functions:
|
||||
})
|
||||
```
|
||||
|
||||
## Metrics and Monitoring
|
||||
|
||||
SmartProxy includes a comprehensive metrics collection system that provides real-time insights into proxy performance, connection statistics, and throughput data.
|
||||
|
||||
### Getting Metrics
|
||||
|
||||
```typescript
|
||||
const proxy = new SmartProxy({ /* config */ });
|
||||
await proxy.start();
|
||||
|
||||
// Access metrics through the getStats() method
|
||||
const stats = proxy.getStats();
|
||||
|
||||
// Get current active connections
|
||||
console.log(`Active connections: ${stats.getActiveConnections()}`);
|
||||
|
||||
// Get total connections since start
|
||||
console.log(`Total connections: ${stats.getTotalConnections()}`);
|
||||
|
||||
// Get requests per second (RPS)
|
||||
console.log(`Current RPS: ${stats.getRequestsPerSecond()}`);
|
||||
|
||||
// Get throughput data
|
||||
const throughput = stats.getThroughput();
|
||||
console.log(`Bytes received: ${throughput.bytesIn}`);
|
||||
console.log(`Bytes sent: ${throughput.bytesOut}`);
|
||||
|
||||
// Get connections by route
|
||||
const routeConnections = stats.getConnectionsByRoute();
|
||||
for (const [route, count] of routeConnections) {
|
||||
console.log(`Route ${route}: ${count} connections`);
|
||||
}
|
||||
|
||||
// Get connections by IP address
|
||||
const ipConnections = stats.getConnectionsByIP();
|
||||
for (const [ip, count] of ipConnections) {
|
||||
console.log(`IP ${ip}: ${count} connections`);
|
||||
}
|
||||
```
|
||||
|
||||
### Available Metrics
|
||||
|
||||
The `IProxyStats` interface provides the following methods:
|
||||
|
||||
- `getActiveConnections()`: Current number of active connections
|
||||
- `getTotalConnections()`: Total connections handled since proxy start
|
||||
- `getRequestsPerSecond()`: Current requests per second (1-minute average)
|
||||
- `getThroughput()`: Total bytes transferred (in/out)
|
||||
- `getConnectionsByRoute()`: Connection count per route
|
||||
- `getConnectionsByIP()`: Connection count per client IP
|
||||
|
||||
### Monitoring Example
|
||||
|
||||
```typescript
|
||||
// Create a monitoring loop
|
||||
setInterval(() => {
|
||||
const stats = proxy.getStats();
|
||||
|
||||
// Log key metrics
|
||||
console.log({
|
||||
timestamp: new Date().toISOString(),
|
||||
activeConnections: stats.getActiveConnections(),
|
||||
rps: stats.getRequestsPerSecond(),
|
||||
throughput: stats.getThroughput()
|
||||
});
|
||||
|
||||
// Check for high connection counts from specific IPs
|
||||
const ipConnections = stats.getConnectionsByIP();
|
||||
for (const [ip, count] of ipConnections) {
|
||||
if (count > 100) {
|
||||
console.warn(`High connection count from ${ip}: ${count}`);
|
||||
}
|
||||
}
|
||||
}, 10000); // Every 10 seconds
|
||||
```
|
||||
|
||||
### Exporting Metrics
|
||||
|
||||
You can export metrics in various formats for external monitoring systems:
|
||||
|
||||
```typescript
|
||||
// Export as JSON
|
||||
app.get('/metrics.json', (req, res) => {
|
||||
const stats = proxy.getStats();
|
||||
res.json({
|
||||
activeConnections: stats.getActiveConnections(),
|
||||
totalConnections: stats.getTotalConnections(),
|
||||
requestsPerSecond: stats.getRequestsPerSecond(),
|
||||
throughput: stats.getThroughput(),
|
||||
connectionsByRoute: Object.fromEntries(stats.getConnectionsByRoute()),
|
||||
connectionsByIP: Object.fromEntries(stats.getConnectionsByIP())
|
||||
});
|
||||
});
|
||||
|
||||
// Export as Prometheus format
|
||||
app.get('/metrics', (req, res) => {
|
||||
const stats = proxy.getStats();
|
||||
res.set('Content-Type', 'text/plain');
|
||||
res.send(`
|
||||
# HELP smartproxy_active_connections Current active connections
|
||||
# TYPE smartproxy_active_connections gauge
|
||||
smartproxy_active_connections ${stats.getActiveConnections()}
|
||||
|
||||
# HELP smartproxy_requests_per_second Current requests per second
|
||||
# TYPE smartproxy_requests_per_second gauge
|
||||
smartproxy_requests_per_second ${stats.getRequestsPerSecond()}
|
||||
|
||||
# HELP smartproxy_bytes_in Total bytes received
|
||||
# TYPE smartproxy_bytes_in counter
|
||||
smartproxy_bytes_in ${stats.getThroughput().bytesIn}
|
||||
|
||||
# HELP smartproxy_bytes_out Total bytes sent
|
||||
# TYPE smartproxy_bytes_out counter
|
||||
smartproxy_bytes_out ${stats.getThroughput().bytesOut}
|
||||
`);
|
||||
});
|
||||
```
|
||||
|
||||
## Other Components
|
||||
|
||||
While SmartProxy provides a unified API for most needs, you can also use individual components:
|
||||
|
591
readme.metrics.md
Normal file
591
readme.metrics.md
Normal file
@ -0,0 +1,591 @@
|
||||
# SmartProxy Metrics Implementation Plan
|
||||
|
||||
This document outlines the plan for implementing comprehensive metrics tracking in SmartProxy.
|
||||
|
||||
## Overview
|
||||
|
||||
The metrics system will provide real-time insights into proxy performance, connection statistics, and throughput data. The implementation will be efficient, thread-safe, and have minimal impact on proxy performance.
|
||||
|
||||
**Key Design Decisions**:
|
||||
|
||||
1. **On-demand computation**: Instead of maintaining duplicate state, the MetricsCollector computes metrics on-demand from existing data structures.
|
||||
|
||||
2. **SmartProxy-centric architecture**: MetricsCollector receives the SmartProxy instance, providing access to all components:
|
||||
- ConnectionManager for connection data
|
||||
- RouteManager for route metadata
|
||||
- Settings for configuration
|
||||
- Future components without API changes
|
||||
|
||||
This approach:
|
||||
- Eliminates synchronization issues
|
||||
- Reduces memory overhead
|
||||
- Simplifies the implementation
|
||||
- Guarantees metrics accuracy
|
||||
- Leverages existing battle-tested components
|
||||
- Provides flexibility for future enhancements
|
||||
|
||||
## Metrics Interface
|
||||
|
||||
```typescript
|
||||
interface IProxyStats {
|
||||
getActiveConnections(): number;
|
||||
getConnectionsByRoute(): Map<string, number>;
|
||||
getConnectionsByIP(): Map<string, number>;
|
||||
getTotalConnections(): number;
|
||||
getRequestsPerSecond(): number;
|
||||
getThroughput(): { bytesIn: number, bytesOut: number };
|
||||
}
|
||||
```
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
### 1. Create MetricsCollector Class
|
||||
|
||||
**Location**: `/ts/proxies/smart-proxy/metrics-collector.ts`
|
||||
|
||||
```typescript
|
||||
import type { SmartProxy } from './smart-proxy.js';
|
||||
|
||||
export class MetricsCollector implements IProxyStats {
|
||||
constructor(
|
||||
private smartProxy: SmartProxy
|
||||
) {}
|
||||
|
||||
// RPS tracking (the only state we need to maintain)
|
||||
private requestTimestamps: number[] = [];
|
||||
private readonly RPS_WINDOW_SIZE = 60000; // 1 minute window
|
||||
|
||||
// All other metrics are computed on-demand from SmartProxy's components
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Integration Points
|
||||
|
||||
Since metrics are computed on-demand from ConnectionManager's records, we only need minimal integration:
|
||||
|
||||
#### A. Request Tracking for RPS
|
||||
|
||||
**File**: `/ts/proxies/smart-proxy/route-connection-handler.ts`
|
||||
|
||||
```typescript
|
||||
// In handleNewConnection when a new connection is accepted
|
||||
this.metricsCollector.recordRequest();
|
||||
```
|
||||
|
||||
#### B. SmartProxy Component Access
|
||||
|
||||
Through the SmartProxy instance, MetricsCollector can access:
|
||||
- `smartProxy.connectionManager` - All active connections and their details
|
||||
- `smartProxy.routeManager` - Route configurations and metadata
|
||||
- `smartProxy.settings` - Configuration for thresholds and limits
|
||||
- `smartProxy.servers` - Server instances and port information
|
||||
- Any other components as needed for future metrics
|
||||
|
||||
No additional hooks needed!
|
||||
|
||||
### 3. Metric Implementations
|
||||
|
||||
#### A. Active Connections
|
||||
|
||||
```typescript
|
||||
getActiveConnections(): number {
|
||||
return this.smartProxy.connectionManager.getConnectionCount();
|
||||
}
|
||||
```
|
||||
|
||||
#### B. Connections by Route
|
||||
|
||||
```typescript
|
||||
getConnectionsByRoute(): Map<string, number> {
|
||||
const routeCounts = new Map<string, number>();
|
||||
|
||||
// Compute from active connections
|
||||
for (const [_, record] of this.smartProxy.connectionManager.getConnections()) {
|
||||
const routeName = record.routeName || 'unknown';
|
||||
const current = routeCounts.get(routeName) || 0;
|
||||
routeCounts.set(routeName, current + 1);
|
||||
}
|
||||
|
||||
return routeCounts;
|
||||
}
|
||||
```
|
||||
|
||||
#### C. Connections by IP
|
||||
|
||||
```typescript
|
||||
getConnectionsByIP(): Map<string, number> {
|
||||
const ipCounts = new Map<string, number>();
|
||||
|
||||
// Compute from active connections
|
||||
for (const [_, record] of this.smartProxy.connectionManager.getConnections()) {
|
||||
const ip = record.remoteIP;
|
||||
const current = ipCounts.get(ip) || 0;
|
||||
ipCounts.set(ip, current + 1);
|
||||
}
|
||||
|
||||
return ipCounts;
|
||||
}
|
||||
|
||||
// Additional helper methods for IP tracking
|
||||
getTopIPs(limit: number = 10): Array<{ip: string, connections: number}> {
|
||||
const ipCounts = this.getConnectionsByIP();
|
||||
const sorted = Array.from(ipCounts.entries())
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.slice(0, limit)
|
||||
.map(([ip, connections]) => ({ ip, connections }));
|
||||
|
||||
return sorted;
|
||||
}
|
||||
|
||||
isIPBlocked(ip: string, maxConnectionsPerIP: number): boolean {
|
||||
const ipCounts = this.getConnectionsByIP();
|
||||
const currentConnections = ipCounts.get(ip) || 0;
|
||||
return currentConnections >= maxConnectionsPerIP;
|
||||
}
|
||||
```
|
||||
|
||||
#### D. Total Connections
|
||||
|
||||
```typescript
|
||||
getTotalConnections(): number {
|
||||
// Get from termination stats
|
||||
const stats = this.smartProxy.connectionManager.getTerminationStats();
|
||||
let total = this.smartProxy.connectionManager.getConnectionCount(); // Add active connections
|
||||
|
||||
// Add all terminated connections
|
||||
for (const reason in stats.incoming) {
|
||||
total += stats.incoming[reason];
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
```
|
||||
|
||||
#### E. Requests Per Second
|
||||
|
||||
```typescript
|
||||
getRequestsPerSecond(): number {
|
||||
const now = Date.now();
|
||||
const windowStart = now - this.RPS_WINDOW_SIZE;
|
||||
|
||||
// Clean old timestamps
|
||||
this.requestTimestamps = this.requestTimestamps.filter(ts => ts > windowStart);
|
||||
|
||||
// Calculate RPS based on window
|
||||
const requestsInWindow = this.requestTimestamps.length;
|
||||
return requestsInWindow / (this.RPS_WINDOW_SIZE / 1000);
|
||||
}
|
||||
|
||||
recordRequest(): void {
|
||||
this.requestTimestamps.push(Date.now());
|
||||
|
||||
// Prevent unbounded growth
|
||||
if (this.requestTimestamps.length > 10000) {
|
||||
this.cleanupOldRequests();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### F. Throughput Tracking
|
||||
|
||||
```typescript
|
||||
getThroughput(): { bytesIn: number, bytesOut: number } {
|
||||
let bytesIn = 0;
|
||||
let bytesOut = 0;
|
||||
|
||||
// Sum bytes from all active connections
|
||||
for (const [_, record] of this.smartProxy.connectionManager.getConnections()) {
|
||||
bytesIn += record.bytesReceived;
|
||||
bytesOut += record.bytesSent;
|
||||
}
|
||||
|
||||
return { bytesIn, bytesOut };
|
||||
}
|
||||
|
||||
// Get throughput rate (bytes per second) for last minute
|
||||
getThroughputRate(): { bytesInPerSec: number, bytesOutPerSec: number } {
|
||||
const now = Date.now();
|
||||
let recentBytesIn = 0;
|
||||
let recentBytesOut = 0;
|
||||
let connectionCount = 0;
|
||||
|
||||
// Calculate bytes transferred in last minute from active connections
|
||||
for (const [_, record] of this.smartProxy.connectionManager.getConnections()) {
|
||||
const connectionAge = now - record.incomingStartTime;
|
||||
if (connectionAge < 60000) { // Connection started within last minute
|
||||
recentBytesIn += record.bytesReceived;
|
||||
recentBytesOut += record.bytesSent;
|
||||
connectionCount++;
|
||||
} else {
|
||||
// For older connections, estimate rate based on average
|
||||
const rate = connectionAge / 60000;
|
||||
recentBytesIn += record.bytesReceived / rate;
|
||||
recentBytesOut += record.bytesSent / rate;
|
||||
connectionCount++;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
bytesInPerSec: Math.round(recentBytesIn / 60),
|
||||
bytesOutPerSec: Math.round(recentBytesOut / 60)
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Performance Optimizations
|
||||
|
||||
Since metrics are computed on-demand from existing data structures, performance optimizations are minimal:
|
||||
|
||||
#### A. Caching for Frequent Queries
|
||||
|
||||
```typescript
|
||||
private cachedMetrics: {
|
||||
timestamp: number;
|
||||
connectionsByRoute?: Map<string, number>;
|
||||
connectionsByIP?: Map<string, number>;
|
||||
} = { timestamp: 0 };
|
||||
|
||||
private readonly CACHE_TTL = 1000; // 1 second cache
|
||||
|
||||
getConnectionsByRoute(): Map<string, number> {
|
||||
const now = Date.now();
|
||||
|
||||
// Return cached value if fresh
|
||||
if (this.cachedMetrics.connectionsByRoute &&
|
||||
now - this.cachedMetrics.timestamp < this.CACHE_TTL) {
|
||||
return this.cachedMetrics.connectionsByRoute;
|
||||
}
|
||||
|
||||
// Compute fresh value
|
||||
const routeCounts = new Map<string, number>();
|
||||
for (const [_, record] of this.smartProxy.connectionManager.getConnections()) {
|
||||
const routeName = record.routeName || 'unknown';
|
||||
const current = routeCounts.get(routeName) || 0;
|
||||
routeCounts.set(routeName, current + 1);
|
||||
}
|
||||
|
||||
// Cache and return
|
||||
this.cachedMetrics.connectionsByRoute = routeCounts;
|
||||
this.cachedMetrics.timestamp = now;
|
||||
return routeCounts;
|
||||
}
|
||||
```
|
||||
|
||||
#### B. RPS Cleanup
|
||||
|
||||
```typescript
|
||||
// Only cleanup needed is for RPS timestamps
|
||||
private cleanupOldRequests(): void {
|
||||
const cutoff = Date.now() - this.RPS_WINDOW_SIZE;
|
||||
this.requestTimestamps = this.requestTimestamps.filter(ts => ts > cutoff);
|
||||
}
|
||||
```
|
||||
|
||||
### 5. SmartProxy Integration
|
||||
|
||||
#### A. Add to SmartProxy Class
|
||||
|
||||
```typescript
|
||||
export class SmartProxy {
|
||||
private metricsCollector: MetricsCollector;
|
||||
|
||||
constructor(options: ISmartProxyOptions) {
|
||||
// ... existing code ...
|
||||
|
||||
// Pass SmartProxy instance to MetricsCollector
|
||||
this.metricsCollector = new MetricsCollector(this);
|
||||
}
|
||||
|
||||
// Public API
|
||||
public getStats(): IProxyStats {
|
||||
return this.metricsCollector;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### B. Configuration Options
|
||||
|
||||
```typescript
|
||||
interface ISmartProxyOptions {
|
||||
// ... existing options ...
|
||||
|
||||
metrics?: {
|
||||
enabled?: boolean; // Default: true
|
||||
rpsWindowSize?: number; // Default: 60000 (1 minute)
|
||||
throughputWindowSize?: number; // Default: 60000 (1 minute)
|
||||
cleanupInterval?: number; // Default: 60000 (1 minute)
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
### 6. Advanced Metrics (Future Enhancement)
|
||||
|
||||
```typescript
|
||||
interface IAdvancedProxyStats extends IProxyStats {
|
||||
// Latency metrics
|
||||
getAverageLatency(): number;
|
||||
getLatencyPercentiles(): { p50: number, p95: number, p99: number };
|
||||
|
||||
// Error metrics
|
||||
getErrorRate(): number;
|
||||
getErrorsByType(): Map<string, number>;
|
||||
|
||||
// Route-specific metrics
|
||||
getRouteMetrics(routeName: string): IRouteMetrics;
|
||||
|
||||
// Time-series data
|
||||
getHistoricalMetrics(duration: number): IHistoricalMetrics;
|
||||
|
||||
// Server/Port metrics (leveraging SmartProxy access)
|
||||
getPortUtilization(): Map<number, { connections: number, maxConnections: number }>;
|
||||
getCertificateExpiry(): Map<string, Date>;
|
||||
}
|
||||
|
||||
// Example implementation showing SmartProxy component access
|
||||
getPortUtilization(): Map<number, { connections: number, maxConnections: number }> {
|
||||
const portStats = new Map();
|
||||
|
||||
// Access servers through SmartProxy
|
||||
for (const [port, server] of this.smartProxy.servers) {
|
||||
const connections = Array.from(this.smartProxy.connectionManager.getConnections())
|
||||
.filter(([_, record]) => record.localPort === port).length;
|
||||
|
||||
// Access route configuration through SmartProxy
|
||||
const routes = this.smartProxy.routeManager.getRoutesForPort(port);
|
||||
const maxConnections = routes[0]?.advanced?.maxConnections ||
|
||||
this.smartProxy.settings.defaults?.security?.maxConnections ||
|
||||
10000;
|
||||
|
||||
portStats.set(port, { connections, maxConnections });
|
||||
}
|
||||
|
||||
return portStats;
|
||||
}
|
||||
```
|
||||
|
||||
### 7. HTTP Metrics Endpoint (Optional)
|
||||
|
||||
```typescript
|
||||
// Expose metrics via HTTP endpoint
|
||||
class MetricsHttpHandler {
|
||||
handleRequest(req: IncomingMessage, res: ServerResponse): void {
|
||||
if (req.url === '/metrics') {
|
||||
const stats = this.proxy.getStats();
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({
|
||||
activeConnections: stats.getActiveConnections(),
|
||||
totalConnections: stats.getTotalConnections(),
|
||||
requestsPerSecond: stats.getRequestsPerSecond(),
|
||||
throughput: stats.getThroughput(),
|
||||
connectionsByRoute: Object.fromEntries(stats.getConnectionsByRoute()),
|
||||
connectionsByIP: Object.fromEntries(stats.getConnectionsByIP()),
|
||||
topIPs: stats.getTopIPs(20)
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 8. Testing Strategy
|
||||
|
||||
The simplified design makes testing much easier since we can mock the ConnectionManager's data:
|
||||
|
||||
#### A. Unit Tests
|
||||
|
||||
```typescript
|
||||
// test/test.metrics-collector.ts
|
||||
tap.test('MetricsCollector computes metrics correctly', async () => {
|
||||
// Mock ConnectionManager with test data
|
||||
const mockConnectionManager = {
|
||||
getConnectionCount: () => 2,
|
||||
getConnections: () => new Map([
|
||||
['conn1', { remoteIP: '192.168.1.1', routeName: 'api', bytesReceived: 1000, bytesSent: 500 }],
|
||||
['conn2', { remoteIP: '192.168.1.1', routeName: 'web', bytesReceived: 2000, bytesSent: 1000 }]
|
||||
]),
|
||||
getTerminationStats: () => ({ incoming: { normal: 10, timeout: 2 } })
|
||||
};
|
||||
|
||||
const collector = new MetricsCollector(mockConnectionManager as any);
|
||||
|
||||
expect(collector.getActiveConnections()).toEqual(2);
|
||||
expect(collector.getConnectionsByIP().get('192.168.1.1')).toEqual(2);
|
||||
expect(collector.getTotalConnections()).toEqual(14); // 2 active + 12 terminated
|
||||
});
|
||||
```
|
||||
|
||||
#### B. Integration Tests
|
||||
|
||||
```typescript
|
||||
// test/test.metrics-integration.ts
|
||||
tap.test('SmartProxy provides accurate metrics', async () => {
|
||||
const proxy = new SmartProxy({ /* config */ });
|
||||
await proxy.start();
|
||||
|
||||
// Create connections and verify metrics
|
||||
const stats = proxy.getStats();
|
||||
expect(stats.getActiveConnections()).toEqual(0);
|
||||
});
|
||||
```
|
||||
|
||||
#### C. Performance Tests
|
||||
|
||||
```typescript
|
||||
// test/test.metrics-performance.ts
|
||||
tap.test('Metrics collection has minimal performance impact', async () => {
|
||||
// Measure proxy performance with and without metrics
|
||||
// Ensure overhead is < 1%
|
||||
});
|
||||
```
|
||||
|
||||
### 9. Implementation Phases
|
||||
|
||||
#### Phase 1: Core Metrics (Days 1-2)
|
||||
- [ ] Create MetricsCollector class
|
||||
- [ ] Implement all metric methods (reading from ConnectionManager)
|
||||
- [ ] Add RPS tracking
|
||||
- [ ] Add to SmartProxy with getStats() method
|
||||
|
||||
#### Phase 2: Testing & Optimization (Days 3-4)
|
||||
- [ ] Add comprehensive unit tests with mocked data
|
||||
- [ ] Add integration tests with real proxy
|
||||
- [ ] Implement caching for performance
|
||||
- [ ] Add RPS cleanup mechanism
|
||||
|
||||
#### Phase 3: Advanced Features (Days 5-7)
|
||||
- [ ] Add HTTP metrics endpoint
|
||||
- [ ] Implement Prometheus export format
|
||||
- [ ] Add IP-based rate limiting helpers
|
||||
- [ ] Create monitoring dashboard example
|
||||
|
||||
**Note**: The simplified design reduces implementation time from 4 weeks to 1 week!
|
||||
|
||||
### 10. Usage Examples
|
||||
|
||||
```typescript
|
||||
// Basic usage
|
||||
const proxy = new SmartProxy({
|
||||
routes: [...],
|
||||
metrics: { enabled: true }
|
||||
});
|
||||
|
||||
await proxy.start();
|
||||
|
||||
// Get metrics
|
||||
const stats = proxy.getStats();
|
||||
console.log(`Active connections: ${stats.getActiveConnections()}`);
|
||||
console.log(`RPS: ${stats.getRequestsPerSecond()}`);
|
||||
console.log(`Throughput: ${JSON.stringify(stats.getThroughput())}`);
|
||||
|
||||
// Monitor specific routes
|
||||
const routeConnections = stats.getConnectionsByRoute();
|
||||
for (const [route, count] of routeConnections) {
|
||||
console.log(`Route ${route}: ${count} connections`);
|
||||
}
|
||||
|
||||
// Monitor connections by IP
|
||||
const ipConnections = stats.getConnectionsByIP();
|
||||
for (const [ip, count] of ipConnections) {
|
||||
console.log(`IP ${ip}: ${count} connections`);
|
||||
}
|
||||
|
||||
// Get top IPs by connection count
|
||||
const topIPs = stats.getTopIPs(10);
|
||||
console.log('Top 10 IPs:', topIPs);
|
||||
|
||||
// Check if IP should be rate limited
|
||||
if (stats.isIPBlocked('192.168.1.100', 100)) {
|
||||
console.log('IP has too many connections');
|
||||
}
|
||||
```
|
||||
|
||||
### 11. Monitoring Integration
|
||||
|
||||
```typescript
|
||||
// Export to monitoring systems
|
||||
class PrometheusExporter {
|
||||
export(stats: IProxyStats): string {
|
||||
return `
|
||||
# HELP smartproxy_active_connections Current number of active connections
|
||||
# TYPE smartproxy_active_connections gauge
|
||||
smartproxy_active_connections ${stats.getActiveConnections()}
|
||||
|
||||
# HELP smartproxy_total_connections Total connections since start
|
||||
# TYPE smartproxy_total_connections counter
|
||||
smartproxy_total_connections ${stats.getTotalConnections()}
|
||||
|
||||
# HELP smartproxy_requests_per_second Current requests per second
|
||||
# TYPE smartproxy_requests_per_second gauge
|
||||
smartproxy_requests_per_second ${stats.getRequestsPerSecond()}
|
||||
`;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 12. Documentation
|
||||
|
||||
- Add metrics section to main README
|
||||
- Create metrics API documentation
|
||||
- Add monitoring setup guide
|
||||
- Provide dashboard configuration examples
|
||||
|
||||
## Success Criteria
|
||||
|
||||
1. **Performance**: Metrics collection adds < 1% overhead
|
||||
2. **Accuracy**: All metrics are accurate within 1% margin
|
||||
3. **Memory**: No memory leaks over 24-hour operation
|
||||
4. **Thread Safety**: No race conditions under high load
|
||||
5. **Usability**: Simple, intuitive API for accessing metrics
|
||||
|
||||
## Privacy and Security Considerations
|
||||
|
||||
### IP Address Tracking
|
||||
|
||||
1. **Privacy Compliance**:
|
||||
- Consider GDPR and other privacy regulations when storing IP addresses
|
||||
- Implement configurable IP anonymization (e.g., mask last octet)
|
||||
- Add option to disable IP tracking entirely
|
||||
|
||||
2. **Security**:
|
||||
- Use IP metrics for rate limiting and DDoS protection
|
||||
- Implement automatic blocking for IPs exceeding connection limits
|
||||
- Consider integration with IP reputation services
|
||||
|
||||
3. **Implementation Options**:
|
||||
```typescript
|
||||
interface IMetricsOptions {
|
||||
trackIPs?: boolean; // Default: true
|
||||
anonymizeIPs?: boolean; // Default: false
|
||||
maxConnectionsPerIP?: number; // Default: 100
|
||||
ipBlockDuration?: number; // Default: 3600000 (1 hour)
|
||||
}
|
||||
```
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
1. **Distributed Metrics**: Aggregate metrics across multiple proxy instances
|
||||
2. **Historical Storage**: Store metrics in time-series database
|
||||
3. **Alerting**: Built-in alerting based on metric thresholds
|
||||
4. **Custom Metrics**: Allow users to define custom metrics
|
||||
5. **GraphQL API**: Provide GraphQL endpoint for flexible metric queries
|
||||
6. **IP Analytics**:
|
||||
- Geographic distribution of connections
|
||||
- Automatic anomaly detection for IP patterns
|
||||
- Integration with threat intelligence feeds
|
||||
|
||||
## Benefits of the Simplified Design
|
||||
|
||||
By using a SmartProxy-centric architecture with on-demand computation:
|
||||
|
||||
1. **Zero Synchronization Issues**: Metrics always reflect the true state
|
||||
2. **Minimal Memory Overhead**: No duplicate data structures
|
||||
3. **Simpler Implementation**: ~200 lines instead of ~1000 lines
|
||||
4. **Easier Testing**: Can mock SmartProxy components
|
||||
5. **Better Performance**: No overhead from state updates
|
||||
6. **Guaranteed Accuracy**: Single source of truth
|
||||
7. **Faster Development**: 1 week instead of 4 weeks
|
||||
8. **Future Flexibility**: Access to all SmartProxy components without API changes
|
||||
9. **Holistic Metrics**: Can correlate data across components (connections, routes, settings, certificates, etc.)
|
||||
10. **Clean Architecture**: MetricsCollector is a true SmartProxy component, not an isolated module
|
||||
|
||||
This approach leverages the existing, well-tested SmartProxy infrastructure while providing a clean, simple metrics API that can grow with the proxy's capabilities.
|
202
readme.monitoring.md
Normal file
202
readme.monitoring.md
Normal file
@ -0,0 +1,202 @@
|
||||
# Production Connection Monitoring
|
||||
|
||||
This document explains how to use the ProductionConnectionMonitor to diagnose connection accumulation issues in real-time.
|
||||
|
||||
## Quick Start
|
||||
|
||||
```typescript
|
||||
import ProductionConnectionMonitor from './.nogit/debug/production-connection-monitor.js';
|
||||
|
||||
// After starting your proxy
|
||||
const monitor = new ProductionConnectionMonitor(proxy);
|
||||
monitor.start(5000); // Check every 5 seconds
|
||||
|
||||
// The monitor will automatically capture diagnostics when:
|
||||
// - Connections exceed 50 (default threshold)
|
||||
// - Sudden spike of 20+ connections occurs
|
||||
// - You manually call monitor.forceCaptureNow()
|
||||
```
|
||||
|
||||
## What Gets Captured
|
||||
|
||||
When accumulation is detected, the monitor saves a JSON file with:
|
||||
|
||||
### Connection Details
|
||||
- Socket states (destroyed, readable, writable, readyState)
|
||||
- Connection age and activity timestamps
|
||||
- Data transfer statistics (bytes sent/received)
|
||||
- Target host and port information
|
||||
- Keep-alive status
|
||||
- Event listener counts
|
||||
|
||||
### System State
|
||||
- Memory usage
|
||||
- Event loop lag
|
||||
- Connection count trends
|
||||
- Termination statistics
|
||||
|
||||
## Reading Diagnostic Files
|
||||
|
||||
Files are saved to `.nogit/connection-diagnostics/` with names like:
|
||||
```
|
||||
accumulation_2025-06-07T20-20-43-733Z_force_capture.json
|
||||
```
|
||||
|
||||
### Key Fields to Check
|
||||
|
||||
1. **Socket States**
|
||||
```json
|
||||
"incomingState": {
|
||||
"destroyed": false,
|
||||
"readable": true,
|
||||
"writable": true,
|
||||
"readyState": "open"
|
||||
}
|
||||
```
|
||||
- Both destroyed = zombie connection
|
||||
- One destroyed = half-zombie
|
||||
- Both alive but old = potential stuck connection
|
||||
|
||||
2. **Data Transfer**
|
||||
```json
|
||||
"bytesReceived": 36,
|
||||
"bytesSent": 0,
|
||||
"timeSinceLastActivity": 60000
|
||||
```
|
||||
- No bytes sent back = stuck connection
|
||||
- High bytes but old = slow backend
|
||||
- No activity = idle connection
|
||||
|
||||
3. **Connection Flags**
|
||||
```json
|
||||
"hasReceivedInitialData": false,
|
||||
"hasKeepAlive": true,
|
||||
"connectionClosed": false
|
||||
```
|
||||
- hasReceivedInitialData=false on non-TLS = immediate routing
|
||||
- hasKeepAlive=true = extended timeout applies
|
||||
- connectionClosed=false = still tracked
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### 1. Hanging Backend Pattern
|
||||
```json
|
||||
{
|
||||
"bytesReceived": 36,
|
||||
"bytesSent": 0,
|
||||
"age": 120000,
|
||||
"targetHost": "backend.example.com",
|
||||
"incomingState": { "destroyed": false },
|
||||
"outgoingState": { "destroyed": false }
|
||||
}
|
||||
```
|
||||
**Fix**: The stuck connection detection (60s timeout) should clean these up.
|
||||
|
||||
### 2. Zombie Connection Pattern
|
||||
```json
|
||||
{
|
||||
"incomingState": { "destroyed": true },
|
||||
"outgoingState": { "destroyed": true },
|
||||
"connectionClosed": false
|
||||
}
|
||||
```
|
||||
**Fix**: The zombie detection should clean these up within 30s.
|
||||
|
||||
### 3. Event Listener Leak Pattern
|
||||
```json
|
||||
{
|
||||
"incomingListeners": {
|
||||
"data": 15,
|
||||
"error": 20,
|
||||
"close": 18
|
||||
}
|
||||
}
|
||||
```
|
||||
**Issue**: Event listeners accumulating, potential memory leak.
|
||||
|
||||
### 4. No Outgoing Socket Pattern
|
||||
```json
|
||||
{
|
||||
"outgoingState": { "exists": false },
|
||||
"connectionClosed": false,
|
||||
"age": 5000
|
||||
}
|
||||
```
|
||||
**Issue**: Connection setup failed but cleanup didn't trigger.
|
||||
|
||||
## Forcing Diagnostic Capture
|
||||
|
||||
To capture current state immediately:
|
||||
```typescript
|
||||
monitor.forceCaptureNow();
|
||||
```
|
||||
|
||||
This is useful when you notice accumulation starting.
|
||||
|
||||
## Automated Analysis
|
||||
|
||||
The monitor automatically analyzes patterns and logs:
|
||||
- Zombie/half-zombie counts
|
||||
- Stuck connection counts
|
||||
- Old connection counts
|
||||
- Memory usage
|
||||
- Recommendations
|
||||
|
||||
## Integration Example
|
||||
|
||||
```typescript
|
||||
// In your proxy startup script
|
||||
import { SmartProxy } from '@push.rocks/smartproxy';
|
||||
import ProductionConnectionMonitor from './production-connection-monitor.js';
|
||||
|
||||
async function startProxyWithMonitoring() {
|
||||
const proxy = new SmartProxy({
|
||||
// your config
|
||||
});
|
||||
|
||||
await proxy.start();
|
||||
|
||||
// Start monitoring
|
||||
const monitor = new ProductionConnectionMonitor(proxy);
|
||||
monitor.start(5000);
|
||||
|
||||
// Optional: Capture on specific events
|
||||
process.on('SIGUSR1', () => {
|
||||
console.log('Manual diagnostic capture triggered');
|
||||
monitor.forceCaptureNow();
|
||||
});
|
||||
|
||||
// Graceful shutdown
|
||||
process.on('SIGTERM', async () => {
|
||||
monitor.stop();
|
||||
await proxy.stop();
|
||||
process.exit(0);
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Monitor Not Detecting Accumulation
|
||||
- Check threshold settings (default: 50 connections)
|
||||
- Reduce check interval for faster detection
|
||||
- Use forceCaptureNow() to capture current state
|
||||
|
||||
### Too Many False Positives
|
||||
- Increase accumulation threshold
|
||||
- Increase spike threshold
|
||||
- Adjust check interval
|
||||
|
||||
### Missing Diagnostic Data
|
||||
- Ensure output directory exists and is writable
|
||||
- Check disk space
|
||||
- Verify process has write permissions
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. Deploy the monitor to production
|
||||
2. Wait for accumulation to occur
|
||||
3. Share diagnostic files for analysis
|
||||
4. Apply targeted fixes based on patterns found
|
||||
|
||||
The diagnostic data will reveal the exact state of connections when accumulation occurs, enabling precise fixes for your specific scenario.
|
93
test/test.cleanup-queue-bug.node.ts
Normal file
93
test/test.cleanup-queue-bug.node.ts
Normal file
@ -0,0 +1,93 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import { SmartProxy } from '../ts/index.js';
|
||||
|
||||
tap.test('cleanup queue bug - verify queue processing handles more than batch size', async (tools) => {
|
||||
console.log('\n=== Cleanup Queue Bug Test ===');
|
||||
console.log('Purpose: Verify that the cleanup queue correctly processes all connections');
|
||||
console.log('even when there are more than the batch size (100)');
|
||||
|
||||
// Create proxy
|
||||
const proxy = new SmartProxy({
|
||||
routes: [{
|
||||
name: 'test-route',
|
||||
match: { ports: 8588 },
|
||||
action: {
|
||||
type: 'forward',
|
||||
target: { host: 'localhost', port: 9996 }
|
||||
}
|
||||
}],
|
||||
enableDetailedLogging: false,
|
||||
});
|
||||
|
||||
await proxy.start();
|
||||
console.log('✓ Proxy started on port 8588');
|
||||
|
||||
// Access connection manager
|
||||
const cm = (proxy as any).connectionManager;
|
||||
|
||||
// Create mock connection records
|
||||
console.log('\n--- Creating 150 mock connections ---');
|
||||
const mockConnections: any[] = [];
|
||||
|
||||
for (let i = 0; i < 150; i++) {
|
||||
const mockRecord = {
|
||||
id: `mock-${i}`,
|
||||
incoming: { destroyed: true, remoteAddress: '127.0.0.1' },
|
||||
outgoing: { destroyed: true },
|
||||
connectionClosed: false,
|
||||
incomingStartTime: Date.now(),
|
||||
lastActivity: Date.now(),
|
||||
remoteIP: '127.0.0.1',
|
||||
remotePort: 10000 + i,
|
||||
localPort: 8588,
|
||||
bytesReceived: 100,
|
||||
bytesSent: 100,
|
||||
incomingTerminationReason: null,
|
||||
cleanupTimer: null
|
||||
};
|
||||
|
||||
// Add to connection records
|
||||
cm.connectionRecords.set(mockRecord.id, mockRecord);
|
||||
mockConnections.push(mockRecord);
|
||||
}
|
||||
|
||||
console.log(`Created ${cm.getConnectionCount()} mock connections`);
|
||||
expect(cm.getConnectionCount()).toEqual(150);
|
||||
|
||||
// Queue all connections for cleanup
|
||||
console.log('\n--- Queueing all connections for cleanup ---');
|
||||
for (const conn of mockConnections) {
|
||||
cm.initiateCleanupOnce(conn, 'test_cleanup');
|
||||
}
|
||||
|
||||
console.log(`Cleanup queue size: ${cm.cleanupQueue.size}`);
|
||||
expect(cm.cleanupQueue.size).toEqual(150);
|
||||
|
||||
// Wait for cleanup to complete
|
||||
console.log('\n--- Waiting for cleanup batches to process ---');
|
||||
|
||||
// The first batch should process immediately (100 connections)
|
||||
// Then additional batches should be scheduled
|
||||
await new Promise(resolve => setTimeout(resolve, 500));
|
||||
|
||||
// Check final state
|
||||
const finalCount = cm.getConnectionCount();
|
||||
console.log(`\nFinal connection count: ${finalCount}`);
|
||||
console.log(`Cleanup queue size: ${cm.cleanupQueue.size}`);
|
||||
|
||||
// All connections should be cleaned up
|
||||
expect(finalCount).toEqual(0);
|
||||
expect(cm.cleanupQueue.size).toEqual(0);
|
||||
|
||||
// Verify termination stats
|
||||
const stats = cm.getTerminationStats();
|
||||
console.log('Termination stats:', stats);
|
||||
expect(stats.incoming.test_cleanup).toEqual(150);
|
||||
|
||||
// Cleanup
|
||||
await proxy.stop();
|
||||
|
||||
console.log('\n✓ Test complete: Cleanup queue now correctly processes all connections');
|
||||
});
|
||||
|
||||
tap.start();
|
250
test/test.keepalive-support.node.ts
Normal file
250
test/test.keepalive-support.node.ts
Normal file
@ -0,0 +1,250 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as net from 'net';
|
||||
import { SmartProxy } from '../ts/index.js';
|
||||
import * as plugins from '../ts/plugins.js';
|
||||
|
||||
tap.test('keepalive support - verify keepalive connections are properly handled', async (tools) => {
|
||||
console.log('\n=== KeepAlive Support Test ===');
|
||||
console.log('Purpose: Verify that keepalive connections are not prematurely cleaned up');
|
||||
|
||||
// Create a simple echo backend
|
||||
const echoBackend = net.createServer((socket) => {
|
||||
socket.on('data', (data) => {
|
||||
// Echo back received data
|
||||
try {
|
||||
socket.write(data);
|
||||
} catch (err) {
|
||||
// Ignore write errors during shutdown
|
||||
}
|
||||
});
|
||||
|
||||
socket.on('error', (err) => {
|
||||
// Ignore errors from backend sockets
|
||||
console.log(`Backend socket error (expected during cleanup): ${err.code}`);
|
||||
});
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
echoBackend.listen(9998, () => {
|
||||
console.log('✓ Echo backend started on port 9998');
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
|
||||
// Test 1: Standard keepalive treatment
|
||||
console.log('\n--- Test 1: Standard KeepAlive Treatment ---');
|
||||
|
||||
const proxy1 = new SmartProxy({
|
||||
routes: [{
|
||||
name: 'keepalive-route',
|
||||
match: { ports: 8590 },
|
||||
action: {
|
||||
type: 'forward',
|
||||
target: { host: 'localhost', port: 9998 }
|
||||
}
|
||||
}],
|
||||
keepAlive: true,
|
||||
keepAliveTreatment: 'standard',
|
||||
inactivityTimeout: 5000, // 5 seconds for faster testing
|
||||
enableDetailedLogging: false,
|
||||
});
|
||||
|
||||
await proxy1.start();
|
||||
console.log('✓ Proxy with standard keepalive started on port 8590');
|
||||
|
||||
// Create a keepalive connection
|
||||
const client1 = net.connect(8590, 'localhost');
|
||||
|
||||
// Add error handler to prevent unhandled errors
|
||||
client1.on('error', (err) => {
|
||||
console.log(`Client1 error (expected during cleanup): ${err.code}`);
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
client1.on('connect', () => {
|
||||
console.log('Client connected');
|
||||
client1.setKeepAlive(true, 1000);
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
|
||||
// Send initial data
|
||||
client1.write('Hello keepalive\n');
|
||||
|
||||
// Wait for echo
|
||||
await new Promise<void>((resolve) => {
|
||||
client1.once('data', (data) => {
|
||||
console.log(`Received echo: ${data.toString().trim()}`);
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
|
||||
// Check connection is marked as keepalive
|
||||
const cm1 = (proxy1 as any).connectionManager;
|
||||
const connections1 = cm1.getConnections();
|
||||
let keepAliveCount = 0;
|
||||
|
||||
for (const [id, record] of connections1) {
|
||||
if (record.hasKeepAlive) {
|
||||
keepAliveCount++;
|
||||
console.log(`KeepAlive connection ${id}: hasKeepAlive=${record.hasKeepAlive}`);
|
||||
}
|
||||
}
|
||||
|
||||
expect(keepAliveCount).toEqual(1);
|
||||
|
||||
// Wait to ensure it's not cleaned up prematurely
|
||||
await plugins.smartdelay.delayFor(6000);
|
||||
|
||||
const afterWaitCount1 = cm1.getConnectionCount();
|
||||
console.log(`Connections after 6s wait: ${afterWaitCount1}`);
|
||||
expect(afterWaitCount1).toEqual(1); // Should still be connected
|
||||
|
||||
// Send more data to keep it alive
|
||||
client1.write('Still alive\n');
|
||||
|
||||
// Clean up test 1
|
||||
client1.destroy();
|
||||
await proxy1.stop();
|
||||
await plugins.smartdelay.delayFor(500); // Wait for port to be released
|
||||
|
||||
// Test 2: Extended keepalive treatment
|
||||
console.log('\n--- Test 2: Extended KeepAlive Treatment ---');
|
||||
|
||||
const proxy2 = new SmartProxy({
|
||||
routes: [{
|
||||
name: 'keepalive-extended',
|
||||
match: { ports: 8591 },
|
||||
action: {
|
||||
type: 'forward',
|
||||
target: { host: 'localhost', port: 9998 }
|
||||
}
|
||||
}],
|
||||
keepAlive: true,
|
||||
keepAliveTreatment: 'extended',
|
||||
keepAliveInactivityMultiplier: 6,
|
||||
inactivityTimeout: 2000, // 2 seconds base, 12 seconds with multiplier
|
||||
enableDetailedLogging: false,
|
||||
});
|
||||
|
||||
await proxy2.start();
|
||||
console.log('✓ Proxy with extended keepalive started on port 8591');
|
||||
|
||||
const client2 = net.connect(8591, 'localhost');
|
||||
|
||||
// Add error handler to prevent unhandled errors
|
||||
client2.on('error', (err) => {
|
||||
console.log(`Client2 error (expected during cleanup): ${err.code}`);
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
client2.on('connect', () => {
|
||||
console.log('Client connected with extended timeout');
|
||||
client2.setKeepAlive(true, 1000);
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
|
||||
// Send initial data
|
||||
client2.write('Extended keepalive\n');
|
||||
|
||||
// Check connection
|
||||
const cm2 = (proxy2 as any).connectionManager;
|
||||
await plugins.smartdelay.delayFor(1000);
|
||||
|
||||
const connections2 = cm2.getConnections();
|
||||
for (const [id, record] of connections2) {
|
||||
console.log(`Extended connection ${id}: hasKeepAlive=${record.hasKeepAlive}, treatment=extended`);
|
||||
}
|
||||
|
||||
// Wait 3 seconds (would timeout with standard treatment)
|
||||
await plugins.smartdelay.delayFor(3000);
|
||||
|
||||
const midWaitCount = cm2.getConnectionCount();
|
||||
console.log(`Connections after 3s (base timeout exceeded): ${midWaitCount}`);
|
||||
expect(midWaitCount).toEqual(1); // Should still be connected due to extended treatment
|
||||
|
||||
// Clean up test 2
|
||||
client2.destroy();
|
||||
await proxy2.stop();
|
||||
await plugins.smartdelay.delayFor(500); // Wait for port to be released
|
||||
|
||||
// Test 3: Immortal keepalive treatment
|
||||
console.log('\n--- Test 3: Immortal KeepAlive Treatment ---');
|
||||
|
||||
const proxy3 = new SmartProxy({
|
||||
routes: [{
|
||||
name: 'keepalive-immortal',
|
||||
match: { ports: 8592 },
|
||||
action: {
|
||||
type: 'forward',
|
||||
target: { host: 'localhost', port: 9998 }
|
||||
}
|
||||
}],
|
||||
keepAlive: true,
|
||||
keepAliveTreatment: 'immortal',
|
||||
inactivityTimeout: 1000, // 1 second - should be ignored for immortal
|
||||
enableDetailedLogging: false,
|
||||
});
|
||||
|
||||
await proxy3.start();
|
||||
console.log('✓ Proxy with immortal keepalive started on port 8592');
|
||||
|
||||
const client3 = net.connect(8592, 'localhost');
|
||||
|
||||
// Add error handler to prevent unhandled errors
|
||||
client3.on('error', (err) => {
|
||||
console.log(`Client3 error (expected during cleanup): ${err.code}`);
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
client3.on('connect', () => {
|
||||
console.log('Client connected with immortal treatment');
|
||||
client3.setKeepAlive(true, 1000);
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
|
||||
// Send initial data
|
||||
client3.write('Immortal connection\n');
|
||||
|
||||
// Wait well beyond normal timeout
|
||||
await plugins.smartdelay.delayFor(5000);
|
||||
|
||||
const cm3 = (proxy3 as any).connectionManager;
|
||||
const immortalCount = cm3.getConnectionCount();
|
||||
console.log(`Immortal connections after 5s inactivity: ${immortalCount}`);
|
||||
expect(immortalCount).toEqual(1); // Should never timeout
|
||||
|
||||
// Verify zombie detection doesn't affect immortal connections
|
||||
console.log('\n--- Verifying zombie detection respects keepalive ---');
|
||||
|
||||
// Manually trigger inactivity check
|
||||
cm3.performOptimizedInactivityCheck();
|
||||
|
||||
await plugins.smartdelay.delayFor(1000);
|
||||
|
||||
const afterCheckCount = cm3.getConnectionCount();
|
||||
console.log(`Connections after manual inactivity check: ${afterCheckCount}`);
|
||||
expect(afterCheckCount).toEqual(1); // Should still be alive
|
||||
|
||||
// Clean up
|
||||
client3.destroy();
|
||||
await proxy3.stop();
|
||||
|
||||
// Close backend and wait for it to fully close
|
||||
await new Promise<void>((resolve) => {
|
||||
echoBackend.close(() => {
|
||||
console.log('Echo backend closed');
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
|
||||
console.log('\n✓ All keepalive tests passed:');
|
||||
console.log(' - Standard treatment works correctly');
|
||||
console.log(' - Extended treatment applies multiplier');
|
||||
console.log(' - Immortal treatment never times out');
|
||||
console.log(' - Zombie detection respects keepalive settings');
|
||||
});
|
||||
|
||||
tap.start();
|
280
test/test.metrics-collector.ts
Normal file
280
test/test.metrics-collector.ts
Normal file
@ -0,0 +1,280 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import { SmartProxy } from '../ts/index.js';
|
||||
import * as net from 'net';
|
||||
import * as plugins from '../ts/plugins.js';
|
||||
|
||||
tap.test('MetricsCollector provides accurate metrics', async (tools) => {
|
||||
console.log('\n=== MetricsCollector Test ===');
|
||||
|
||||
// Create a simple echo server for testing
|
||||
const echoServer = net.createServer((socket) => {
|
||||
socket.on('data', (data) => {
|
||||
socket.write(data);
|
||||
});
|
||||
socket.on('error', () => {}); // Ignore errors
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
echoServer.listen(9995, () => {
|
||||
console.log('✓ Echo server started on port 9995');
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
|
||||
// Create SmartProxy with test routes
|
||||
const proxy = new SmartProxy({
|
||||
routes: [
|
||||
{
|
||||
name: 'test-route-1',
|
||||
match: { ports: 8700 },
|
||||
action: {
|
||||
type: 'forward',
|
||||
target: { host: 'localhost', port: 9995 }
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'test-route-2',
|
||||
match: { ports: 8701 },
|
||||
action: {
|
||||
type: 'forward',
|
||||
target: { host: 'localhost', port: 9995 }
|
||||
}
|
||||
}
|
||||
],
|
||||
enableDetailedLogging: true,
|
||||
});
|
||||
|
||||
await proxy.start();
|
||||
console.log('✓ Proxy started on ports 8700 and 8701');
|
||||
|
||||
// Get stats interface
|
||||
const stats = proxy.getStats();
|
||||
|
||||
// Test 1: Initial state
|
||||
console.log('\n--- Test 1: Initial State ---');
|
||||
expect(stats.getActiveConnections()).toEqual(0);
|
||||
expect(stats.getTotalConnections()).toEqual(0);
|
||||
expect(stats.getRequestsPerSecond()).toEqual(0);
|
||||
expect(stats.getConnectionsByRoute().size).toEqual(0);
|
||||
expect(stats.getConnectionsByIP().size).toEqual(0);
|
||||
|
||||
const throughput = stats.getThroughput();
|
||||
expect(throughput.bytesIn).toEqual(0);
|
||||
expect(throughput.bytesOut).toEqual(0);
|
||||
console.log('✓ Initial metrics are all zero');
|
||||
|
||||
// Test 2: Create connections and verify metrics
|
||||
console.log('\n--- Test 2: Active Connections ---');
|
||||
const clients: net.Socket[] = [];
|
||||
|
||||
// Create 3 connections to route 1
|
||||
for (let i = 0; i < 3; i++) {
|
||||
const client = net.connect(8700, 'localhost');
|
||||
clients.push(client);
|
||||
await new Promise<void>((resolve) => {
|
||||
client.on('connect', resolve);
|
||||
client.on('error', () => resolve());
|
||||
});
|
||||
}
|
||||
|
||||
// Create 2 connections to route 2
|
||||
for (let i = 0; i < 2; i++) {
|
||||
const client = net.connect(8701, 'localhost');
|
||||
clients.push(client);
|
||||
await new Promise<void>((resolve) => {
|
||||
client.on('connect', resolve);
|
||||
client.on('error', () => resolve());
|
||||
});
|
||||
}
|
||||
|
||||
// Wait for connections to be fully established and routed
|
||||
await plugins.smartdelay.delayFor(300);
|
||||
|
||||
// Verify connection counts
|
||||
expect(stats.getActiveConnections()).toEqual(5);
|
||||
expect(stats.getTotalConnections()).toEqual(5);
|
||||
console.log(`✓ Active connections: ${stats.getActiveConnections()}`);
|
||||
console.log(`✓ Total connections: ${stats.getTotalConnections()}`);
|
||||
|
||||
// Test 3: Connections by route
|
||||
console.log('\n--- Test 3: Connections by Route ---');
|
||||
const routeConnections = stats.getConnectionsByRoute();
|
||||
console.log('Route connections:', Array.from(routeConnections.entries()));
|
||||
|
||||
// Check if we have the expected counts
|
||||
let route1Count = 0;
|
||||
let route2Count = 0;
|
||||
for (const [routeName, count] of routeConnections) {
|
||||
if (routeName === 'test-route-1') route1Count = count;
|
||||
if (routeName === 'test-route-2') route2Count = count;
|
||||
}
|
||||
|
||||
expect(route1Count).toEqual(3);
|
||||
expect(route2Count).toEqual(2);
|
||||
console.log('✓ Route test-route-1 has 3 connections');
|
||||
console.log('✓ Route test-route-2 has 2 connections');
|
||||
|
||||
// Test 4: Connections by IP
|
||||
console.log('\n--- Test 4: Connections by IP ---');
|
||||
const ipConnections = stats.getConnectionsByIP();
|
||||
// All connections are from localhost (127.0.0.1 or ::1)
|
||||
let totalIPConnections = 0;
|
||||
for (const [ip, count] of ipConnections) {
|
||||
console.log(` IP ${ip}: ${count} connections`);
|
||||
totalIPConnections += count;
|
||||
}
|
||||
expect(totalIPConnections).toEqual(5);
|
||||
console.log('✓ Total connections by IP matches active connections');
|
||||
|
||||
// Test 5: RPS calculation
|
||||
console.log('\n--- Test 5: Requests Per Second ---');
|
||||
const rps = stats.getRequestsPerSecond();
|
||||
console.log(` Current RPS: ${rps.toFixed(2)}`);
|
||||
// We created 5 connections, so RPS should be > 0
|
||||
expect(rps).toBeGreaterThan(0);
|
||||
console.log('✓ RPS is greater than 0');
|
||||
|
||||
// Test 6: Throughput
|
||||
console.log('\n--- Test 6: Throughput ---');
|
||||
// Send some data through connections
|
||||
for (const client of clients) {
|
||||
if (!client.destroyed) {
|
||||
client.write('Hello metrics!\n');
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for data to be transmitted
|
||||
await plugins.smartdelay.delayFor(100);
|
||||
|
||||
const throughputAfter = stats.getThroughput();
|
||||
console.log(` Bytes in: ${throughputAfter.bytesIn}`);
|
||||
console.log(` Bytes out: ${throughputAfter.bytesOut}`);
|
||||
expect(throughputAfter.bytesIn).toBeGreaterThan(0);
|
||||
expect(throughputAfter.bytesOut).toBeGreaterThan(0);
|
||||
console.log('✓ Throughput shows bytes transferred');
|
||||
|
||||
// Test 7: Close some connections
|
||||
console.log('\n--- Test 7: Connection Cleanup ---');
|
||||
// Close first 2 clients
|
||||
clients[0].destroy();
|
||||
clients[1].destroy();
|
||||
|
||||
await plugins.smartdelay.delayFor(100);
|
||||
|
||||
expect(stats.getActiveConnections()).toEqual(3);
|
||||
expect(stats.getTotalConnections()).toEqual(5); // Total should remain the same
|
||||
console.log(`✓ Active connections reduced to ${stats.getActiveConnections()}`);
|
||||
console.log(`✓ Total connections still ${stats.getTotalConnections()}`);
|
||||
|
||||
// Test 8: Helper methods
|
||||
console.log('\n--- Test 8: Helper Methods ---');
|
||||
|
||||
// Test getTopIPs
|
||||
const topIPs = (stats as any).getTopIPs(5);
|
||||
expect(topIPs.length).toBeGreaterThan(0);
|
||||
console.log('✓ getTopIPs returns IP list');
|
||||
|
||||
// Test isIPBlocked
|
||||
const isBlocked = (stats as any).isIPBlocked('127.0.0.1', 10);
|
||||
expect(isBlocked).toEqual(false); // Should not be blocked with limit of 10
|
||||
console.log('✓ isIPBlocked works correctly');
|
||||
|
||||
// Test throughput rate
|
||||
const throughputRate = (stats as any).getThroughputRate();
|
||||
console.log(` Throughput rate: ${throughputRate.bytesInPerSec} bytes/sec in, ${throughputRate.bytesOutPerSec} bytes/sec out`);
|
||||
console.log('✓ getThroughputRate calculates rates');
|
||||
|
||||
// Cleanup
|
||||
console.log('\n--- Cleanup ---');
|
||||
for (const client of clients) {
|
||||
if (!client.destroyed) {
|
||||
client.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
await proxy.stop();
|
||||
echoServer.close();
|
||||
|
||||
console.log('\n✓ All MetricsCollector tests passed');
|
||||
});
|
||||
|
||||
// Test with mock data for unit testing
|
||||
tap.test('MetricsCollector unit test with mock data', async () => {
|
||||
console.log('\n=== MetricsCollector Unit Test ===');
|
||||
|
||||
// Create a mock SmartProxy with mock ConnectionManager
|
||||
const mockConnections = new Map([
|
||||
['conn1', {
|
||||
remoteIP: '192.168.1.1',
|
||||
routeName: 'api',
|
||||
bytesReceived: 1000,
|
||||
bytesSent: 500,
|
||||
incomingStartTime: Date.now() - 5000
|
||||
}],
|
||||
['conn2', {
|
||||
remoteIP: '192.168.1.1',
|
||||
routeName: 'web',
|
||||
bytesReceived: 2000,
|
||||
bytesSent: 1500,
|
||||
incomingStartTime: Date.now() - 10000
|
||||
}],
|
||||
['conn3', {
|
||||
remoteIP: '192.168.1.2',
|
||||
routeName: 'api',
|
||||
bytesReceived: 500,
|
||||
bytesSent: 250,
|
||||
incomingStartTime: Date.now() - 3000
|
||||
}]
|
||||
]);
|
||||
|
||||
const mockSmartProxy = {
|
||||
connectionManager: {
|
||||
getConnectionCount: () => mockConnections.size,
|
||||
getConnections: () => mockConnections,
|
||||
getTerminationStats: () => ({
|
||||
incoming: { normal: 10, timeout: 2, error: 1 }
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
// Import MetricsCollector directly
|
||||
const { MetricsCollector } = await import('../ts/proxies/smart-proxy/metrics-collector.js');
|
||||
const metrics = new MetricsCollector(mockSmartProxy as any);
|
||||
|
||||
// Test metrics calculation
|
||||
console.log('\n--- Testing with Mock Data ---');
|
||||
|
||||
expect(metrics.getActiveConnections()).toEqual(3);
|
||||
console.log(`✓ Active connections: ${metrics.getActiveConnections()}`);
|
||||
|
||||
expect(metrics.getTotalConnections()).toEqual(16); // 3 active + 13 terminated
|
||||
console.log(`✓ Total connections: ${metrics.getTotalConnections()}`);
|
||||
|
||||
const routeConns = metrics.getConnectionsByRoute();
|
||||
expect(routeConns.get('api')).toEqual(2);
|
||||
expect(routeConns.get('web')).toEqual(1);
|
||||
console.log('✓ Connections by route calculated correctly');
|
||||
|
||||
const ipConns = metrics.getConnectionsByIP();
|
||||
expect(ipConns.get('192.168.1.1')).toEqual(2);
|
||||
expect(ipConns.get('192.168.1.2')).toEqual(1);
|
||||
console.log('✓ Connections by IP calculated correctly');
|
||||
|
||||
const throughput = metrics.getThroughput();
|
||||
expect(throughput.bytesIn).toEqual(3500);
|
||||
expect(throughput.bytesOut).toEqual(2250);
|
||||
console.log(`✓ Throughput: ${throughput.bytesIn} bytes in, ${throughput.bytesOut} bytes out`);
|
||||
|
||||
// Test RPS tracking
|
||||
metrics.recordRequest();
|
||||
metrics.recordRequest();
|
||||
metrics.recordRequest();
|
||||
|
||||
const rps = metrics.getRequestsPerSecond();
|
||||
expect(rps).toBeGreaterThan(0);
|
||||
console.log(`✓ RPS tracking works: ${rps.toFixed(2)} req/sec`);
|
||||
|
||||
console.log('\n✓ All unit tests passed');
|
||||
});
|
||||
|
||||
export default tap.start();
|
182
test/test.proxy-chain-cleanup.node.ts
Normal file
182
test/test.proxy-chain-cleanup.node.ts
Normal file
@ -0,0 +1,182 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as plugins from '../ts/plugins.js';
|
||||
import { SmartProxy } from '../ts/index.js';
|
||||
|
||||
let outerProxy: SmartProxy;
|
||||
let innerProxy: SmartProxy;
|
||||
|
||||
tap.test('setup two smartproxies in a chain configuration', async () => {
|
||||
// Setup inner proxy (backend proxy)
|
||||
innerProxy = new SmartProxy({
|
||||
routes: [
|
||||
{
|
||||
match: {
|
||||
ports: 8002
|
||||
},
|
||||
action: {
|
||||
type: 'forward',
|
||||
target: {
|
||||
host: 'httpbin.org',
|
||||
port: 443
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
defaults: {
|
||||
target: {
|
||||
host: 'httpbin.org',
|
||||
port: 443
|
||||
}
|
||||
},
|
||||
acceptProxyProtocol: true,
|
||||
sendProxyProtocol: false,
|
||||
enableDetailedLogging: true,
|
||||
connectionCleanupInterval: 5000, // More frequent cleanup for testing
|
||||
inactivityTimeout: 10000 // Shorter timeout for testing
|
||||
});
|
||||
await innerProxy.start();
|
||||
|
||||
// Setup outer proxy (frontend proxy)
|
||||
outerProxy = new SmartProxy({
|
||||
routes: [
|
||||
{
|
||||
match: {
|
||||
ports: 8001
|
||||
},
|
||||
action: {
|
||||
type: 'forward',
|
||||
target: {
|
||||
host: 'localhost',
|
||||
port: 8002
|
||||
},
|
||||
sendProxyProtocol: true
|
||||
}
|
||||
}
|
||||
],
|
||||
defaults: {
|
||||
target: {
|
||||
host: 'localhost',
|
||||
port: 8002
|
||||
}
|
||||
},
|
||||
sendProxyProtocol: true,
|
||||
enableDetailedLogging: true,
|
||||
connectionCleanupInterval: 5000, // More frequent cleanup for testing
|
||||
inactivityTimeout: 10000 // Shorter timeout for testing
|
||||
});
|
||||
await outerProxy.start();
|
||||
});
|
||||
|
||||
tap.test('should properly cleanup connections in proxy chain', async (tools) => {
|
||||
const testDuration = 30000; // 30 seconds
|
||||
const connectionInterval = 500; // Create new connection every 500ms
|
||||
const connectionDuration = 2000; // Each connection lasts 2 seconds
|
||||
|
||||
let connectionsCreated = 0;
|
||||
let connectionsCompleted = 0;
|
||||
|
||||
// Function to create a test connection
|
||||
const createTestConnection = async () => {
|
||||
connectionsCreated++;
|
||||
const connectionId = connectionsCreated;
|
||||
|
||||
try {
|
||||
const socket = plugins.net.connect({
|
||||
port: 8001,
|
||||
host: 'localhost'
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
socket.on('connect', () => {
|
||||
console.log(`Connection ${connectionId} established`);
|
||||
|
||||
// Send TLS Client Hello for httpbin.org
|
||||
const clientHello = Buffer.from([
|
||||
0x16, 0x03, 0x01, 0x00, 0xc8, // TLS handshake header
|
||||
0x01, 0x00, 0x00, 0xc4, // Client Hello
|
||||
0x03, 0x03, // TLS 1.2
|
||||
...Array(32).fill(0), // Random bytes
|
||||
0x00, // Session ID length
|
||||
0x00, 0x02, 0x13, 0x01, // Cipher suites
|
||||
0x01, 0x00, // Compression methods
|
||||
0x00, 0x97, // Extensions length
|
||||
0x00, 0x00, 0x00, 0x0f, 0x00, 0x0d, // SNI extension
|
||||
0x00, 0x00, 0x0a, 0x68, 0x74, 0x74, 0x70, 0x62, 0x69, 0x6e, 0x2e, 0x6f, 0x72, 0x67 // "httpbin.org"
|
||||
]);
|
||||
|
||||
socket.write(clientHello);
|
||||
|
||||
// Keep connection alive for specified duration
|
||||
setTimeout(() => {
|
||||
socket.destroy();
|
||||
connectionsCompleted++;
|
||||
console.log(`Connection ${connectionId} closed (completed: ${connectionsCompleted}/${connectionsCreated})`);
|
||||
resolve();
|
||||
}, connectionDuration);
|
||||
});
|
||||
|
||||
socket.on('error', (err) => {
|
||||
console.log(`Connection ${connectionId} error: ${err.message}`);
|
||||
connectionsCompleted++;
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
} catch (err) {
|
||||
console.log(`Failed to create connection ${connectionId}: ${err.message}`);
|
||||
connectionsCompleted++;
|
||||
}
|
||||
};
|
||||
|
||||
// Start creating connections
|
||||
const startTime = Date.now();
|
||||
const connectionTimer = setInterval(() => {
|
||||
if (Date.now() - startTime < testDuration) {
|
||||
createTestConnection().catch(() => {});
|
||||
} else {
|
||||
clearInterval(connectionTimer);
|
||||
}
|
||||
}, connectionInterval);
|
||||
|
||||
// Monitor connection counts
|
||||
const monitorInterval = setInterval(() => {
|
||||
const outerConnections = (outerProxy as any).connectionManager.getConnectionCount();
|
||||
const innerConnections = (innerProxy as any).connectionManager.getConnectionCount();
|
||||
|
||||
console.log(`Active connections - Outer: ${outerConnections}, Inner: ${innerConnections}, Created: ${connectionsCreated}, Completed: ${connectionsCompleted}`);
|
||||
}, 2000);
|
||||
|
||||
// Wait for test duration + cleanup time
|
||||
await tools.delayFor(testDuration + 10000);
|
||||
|
||||
clearInterval(connectionTimer);
|
||||
clearInterval(monitorInterval);
|
||||
|
||||
// Wait for all connections to complete
|
||||
while (connectionsCompleted < connectionsCreated) {
|
||||
await tools.delayFor(100);
|
||||
}
|
||||
|
||||
// Give some time for cleanup
|
||||
await tools.delayFor(5000);
|
||||
|
||||
// Check final connection counts
|
||||
const finalOuterConnections = (outerProxy as any).connectionManager.getConnectionCount();
|
||||
const finalInnerConnections = (innerProxy as any).connectionManager.getConnectionCount();
|
||||
|
||||
console.log(`\nFinal connection counts:`);
|
||||
console.log(`Outer proxy: ${finalOuterConnections}`);
|
||||
console.log(`Inner proxy: ${finalInnerConnections}`);
|
||||
console.log(`Total created: ${connectionsCreated}`);
|
||||
console.log(`Total completed: ${connectionsCompleted}`);
|
||||
|
||||
// Both proxies should have cleaned up all connections
|
||||
expect(finalOuterConnections).toEqual(0);
|
||||
expect(finalInnerConnections).toEqual(0);
|
||||
});
|
||||
|
||||
tap.test('cleanup proxies', async () => {
|
||||
await outerProxy.stop();
|
||||
await innerProxy.stop();
|
||||
});
|
||||
|
||||
export default tap.start();
|
144
test/test.stuck-connection-cleanup.node.ts
Normal file
144
test/test.stuck-connection-cleanup.node.ts
Normal file
@ -0,0 +1,144 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as net from 'net';
|
||||
import { SmartProxy } from '../ts/index.js';
|
||||
import * as plugins from '../ts/plugins.js';
|
||||
|
||||
tap.test('stuck connection cleanup - verify connections to hanging backends are cleaned up', async (tools) => {
|
||||
console.log('\n=== Stuck Connection Cleanup Test ===');
|
||||
console.log('Purpose: Verify that connections to backends that accept but never respond are cleaned up');
|
||||
|
||||
// Create a hanging backend that accepts connections but never responds
|
||||
let backendConnections = 0;
|
||||
const hangingBackend = net.createServer((socket) => {
|
||||
backendConnections++;
|
||||
console.log(`Hanging backend: Connection ${backendConnections} received`);
|
||||
// Accept the connection but never send any data back
|
||||
// This simulates a hung backend service
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
hangingBackend.listen(9997, () => {
|
||||
console.log('✓ Hanging backend started on port 9997');
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
|
||||
// Create proxy that forwards to hanging backend
|
||||
const proxy = new SmartProxy({
|
||||
routes: [{
|
||||
name: 'to-hanging-backend',
|
||||
match: { ports: 8589 },
|
||||
action: {
|
||||
type: 'forward',
|
||||
target: { host: 'localhost', port: 9997 }
|
||||
}
|
||||
}],
|
||||
keepAlive: true,
|
||||
enableDetailedLogging: false,
|
||||
inactivityTimeout: 5000, // 5 second inactivity check interval for faster testing
|
||||
});
|
||||
|
||||
await proxy.start();
|
||||
console.log('✓ Proxy started on port 8589');
|
||||
|
||||
// Create connections that will get stuck
|
||||
console.log('\n--- Creating connections to hanging backend ---');
|
||||
const clients: net.Socket[] = [];
|
||||
|
||||
for (let i = 0; i < 5; i++) {
|
||||
const client = net.connect(8589, 'localhost');
|
||||
clients.push(client);
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
client.on('connect', () => {
|
||||
console.log(`Client ${i} connected`);
|
||||
// Send data that will never get a response
|
||||
client.write(`GET / HTTP/1.1\r\nHost: localhost\r\n\r\n`);
|
||||
resolve();
|
||||
});
|
||||
|
||||
client.on('error', (err) => {
|
||||
console.log(`Client ${i} error: ${err.message}`);
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Wait a moment for connections to establish
|
||||
await plugins.smartdelay.delayFor(1000);
|
||||
|
||||
// Check initial connection count
|
||||
const initialCount = (proxy as any).connectionManager.getConnectionCount();
|
||||
console.log(`\nInitial connection count: ${initialCount}`);
|
||||
expect(initialCount).toEqual(5);
|
||||
|
||||
// Get connection details
|
||||
const connections = (proxy as any).connectionManager.getConnections();
|
||||
let stuckCount = 0;
|
||||
|
||||
for (const [id, record] of connections) {
|
||||
if (record.bytesReceived > 0 && record.bytesSent === 0) {
|
||||
stuckCount++;
|
||||
console.log(`Stuck connection ${id}: received=${record.bytesReceived}, sent=${record.bytesSent}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Stuck connections found: ${stuckCount}`);
|
||||
expect(stuckCount).toEqual(5);
|
||||
|
||||
// Wait for inactivity check to run (it checks every 30s by default, but we set it to 5s)
|
||||
console.log('\n--- Waiting for stuck connection detection (65 seconds) ---');
|
||||
console.log('Note: Stuck connections are cleaned up after 60 seconds with no response');
|
||||
|
||||
// Speed up time by manually triggering inactivity check after simulating time passage
|
||||
// First, age the connections by updating their timestamps
|
||||
const now = Date.now();
|
||||
for (const [id, record] of connections) {
|
||||
// Simulate that these connections are 61 seconds old
|
||||
record.incomingStartTime = now - 61000;
|
||||
record.lastActivity = now - 61000;
|
||||
}
|
||||
|
||||
// Manually trigger inactivity check
|
||||
console.log('Manually triggering inactivity check...');
|
||||
(proxy as any).connectionManager.performOptimizedInactivityCheck();
|
||||
|
||||
// Wait for cleanup to complete
|
||||
await plugins.smartdelay.delayFor(1000);
|
||||
|
||||
// Check connection count after cleanup
|
||||
const afterCleanupCount = (proxy as any).connectionManager.getConnectionCount();
|
||||
console.log(`\nConnection count after cleanup: ${afterCleanupCount}`);
|
||||
|
||||
// Verify termination stats
|
||||
const stats = (proxy as any).connectionManager.getTerminationStats();
|
||||
console.log('\nTermination stats:', stats);
|
||||
|
||||
// All connections should be cleaned up as "stuck_no_response"
|
||||
expect(afterCleanupCount).toEqual(0);
|
||||
|
||||
// The termination reason might be under incoming or general stats
|
||||
const stuckCleanups = (stats.incoming.stuck_no_response || 0) +
|
||||
(stats.outgoing?.stuck_no_response || 0);
|
||||
console.log(`Stuck cleanups detected: ${stuckCleanups}`);
|
||||
expect(stuckCleanups).toBeGreaterThan(0);
|
||||
|
||||
// Verify clients were disconnected
|
||||
let closedClients = 0;
|
||||
for (const client of clients) {
|
||||
if (client.destroyed) {
|
||||
closedClients++;
|
||||
}
|
||||
}
|
||||
console.log(`Closed clients: ${closedClients}/5`);
|
||||
expect(closedClients).toEqual(5);
|
||||
|
||||
// Cleanup
|
||||
console.log('\n--- Cleanup ---');
|
||||
await proxy.stop();
|
||||
hangingBackend.close();
|
||||
|
||||
console.log('✓ Test complete: Stuck connections are properly detected and cleaned up');
|
||||
});
|
||||
|
||||
tap.start();
|
306
test/test.zombie-connection-cleanup.node.ts
Normal file
306
test/test.zombie-connection-cleanup.node.ts
Normal file
@ -0,0 +1,306 @@
|
||||
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
||||
import * as net from 'net';
|
||||
import * as plugins from '../ts/plugins.js';
|
||||
|
||||
// Import SmartProxy
|
||||
import { SmartProxy } from '../ts/index.js';
|
||||
|
||||
// Import types through type-only imports
|
||||
import type { ConnectionManager } from '../ts/proxies/smart-proxy/connection-manager.js';
|
||||
import type { IConnectionRecord } from '../ts/proxies/smart-proxy/models/interfaces.js';
|
||||
|
||||
tap.test('zombie connection cleanup - verify inactivity check detects and cleans destroyed sockets', async () => {
|
||||
console.log('\n=== Zombie Connection Cleanup Test ===');
|
||||
console.log('Purpose: Verify that connections with destroyed sockets are detected and cleaned up');
|
||||
console.log('Setup: Client → OuterProxy (8590) → InnerProxy (8591) → Backend (9998)');
|
||||
|
||||
// Create backend server that can be controlled
|
||||
let acceptConnections = true;
|
||||
let destroyImmediately = false;
|
||||
const backendConnections: net.Socket[] = [];
|
||||
|
||||
const backend = net.createServer((socket) => {
|
||||
console.log('Backend: Connection received');
|
||||
backendConnections.push(socket);
|
||||
|
||||
if (destroyImmediately) {
|
||||
console.log('Backend: Destroying connection immediately');
|
||||
socket.destroy();
|
||||
} else {
|
||||
socket.on('data', (data) => {
|
||||
console.log('Backend: Received data, echoing back');
|
||||
socket.write(data);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
backend.listen(9998, () => {
|
||||
console.log('✓ Backend server started on port 9998');
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
|
||||
// Create InnerProxy with faster inactivity check for testing
|
||||
const innerProxy = new SmartProxy({
|
||||
ports: [8591],
|
||||
enableDetailedLogging: true,
|
||||
inactivityTimeout: 5000, // 5 seconds for faster testing
|
||||
inactivityCheckInterval: 1000, // Check every second
|
||||
routes: [{
|
||||
name: 'to-backend',
|
||||
match: { ports: 8591 },
|
||||
action: {
|
||||
type: 'forward',
|
||||
target: {
|
||||
host: 'localhost',
|
||||
port: 9998
|
||||
}
|
||||
}
|
||||
}]
|
||||
});
|
||||
|
||||
// Create OuterProxy with faster inactivity check
|
||||
const outerProxy = new SmartProxy({
|
||||
ports: [8590],
|
||||
enableDetailedLogging: true,
|
||||
inactivityTimeout: 5000, // 5 seconds for faster testing
|
||||
inactivityCheckInterval: 1000, // Check every second
|
||||
routes: [{
|
||||
name: 'to-inner',
|
||||
match: { ports: 8590 },
|
||||
action: {
|
||||
type: 'forward',
|
||||
target: {
|
||||
host: 'localhost',
|
||||
port: 8591
|
||||
}
|
||||
}
|
||||
}]
|
||||
});
|
||||
|
||||
await innerProxy.start();
|
||||
console.log('✓ InnerProxy started on port 8591');
|
||||
|
||||
await outerProxy.start();
|
||||
console.log('✓ OuterProxy started on port 8590');
|
||||
|
||||
// Helper to get connection details
|
||||
const getConnectionDetails = () => {
|
||||
const outerConnMgr = (outerProxy as any).connectionManager as ConnectionManager;
|
||||
const innerConnMgr = (innerProxy as any).connectionManager as ConnectionManager;
|
||||
|
||||
const outerRecords = Array.from((outerConnMgr as any).connectionRecords.values()) as IConnectionRecord[];
|
||||
const innerRecords = Array.from((innerConnMgr as any).connectionRecords.values()) as IConnectionRecord[];
|
||||
|
||||
return {
|
||||
outer: {
|
||||
count: outerConnMgr.getConnectionCount(),
|
||||
records: outerRecords,
|
||||
zombies: outerRecords.filter(r =>
|
||||
!r.connectionClosed &&
|
||||
r.incoming?.destroyed &&
|
||||
(r.outgoing?.destroyed ?? true)
|
||||
),
|
||||
halfZombies: outerRecords.filter(r =>
|
||||
!r.connectionClosed &&
|
||||
(r.incoming?.destroyed || r.outgoing?.destroyed) &&
|
||||
!(r.incoming?.destroyed && (r.outgoing?.destroyed ?? true))
|
||||
)
|
||||
},
|
||||
inner: {
|
||||
count: innerConnMgr.getConnectionCount(),
|
||||
records: innerRecords,
|
||||
zombies: innerRecords.filter(r =>
|
||||
!r.connectionClosed &&
|
||||
r.incoming?.destroyed &&
|
||||
(r.outgoing?.destroyed ?? true)
|
||||
),
|
||||
halfZombies: innerRecords.filter(r =>
|
||||
!r.connectionClosed &&
|
||||
(r.incoming?.destroyed || r.outgoing?.destroyed) &&
|
||||
!(r.incoming?.destroyed && (r.outgoing?.destroyed ?? true))
|
||||
)
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
console.log('\n--- Test 1: Create zombie by destroying sockets without events ---');
|
||||
|
||||
// Create a connection and forcefully destroy sockets to create zombies
|
||||
const client1 = new net.Socket();
|
||||
await new Promise<void>((resolve) => {
|
||||
client1.connect(8590, 'localhost', () => {
|
||||
console.log('Client1 connected to OuterProxy');
|
||||
client1.write('GET / HTTP/1.1\r\nHost: test.com\r\n\r\n');
|
||||
|
||||
// Wait for connection to be established through the chain
|
||||
setTimeout(() => {
|
||||
console.log('Forcefully destroying backend connections to create zombies');
|
||||
|
||||
// Get connection details before destruction
|
||||
const beforeDetails = getConnectionDetails();
|
||||
console.log(`Before destruction: Outer=${beforeDetails.outer.count}, Inner=${beforeDetails.inner.count}`);
|
||||
|
||||
// Destroy all backend connections without proper close events
|
||||
backendConnections.forEach(conn => {
|
||||
if (!conn.destroyed) {
|
||||
// Remove all listeners to prevent proper cleanup
|
||||
conn.removeAllListeners();
|
||||
conn.destroy();
|
||||
}
|
||||
});
|
||||
|
||||
// Also destroy the client socket abruptly
|
||||
client1.removeAllListeners();
|
||||
client1.destroy();
|
||||
|
||||
resolve();
|
||||
}, 500);
|
||||
});
|
||||
});
|
||||
|
||||
// Check immediately after destruction
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
let details = getConnectionDetails();
|
||||
console.log(`\nAfter destruction:`);
|
||||
console.log(` Outer: ${details.outer.count} connections, ${details.outer.zombies.length} zombies, ${details.outer.halfZombies.length} half-zombies`);
|
||||
console.log(` Inner: ${details.inner.count} connections, ${details.inner.zombies.length} zombies, ${details.inner.halfZombies.length} half-zombies`);
|
||||
|
||||
// Wait for inactivity check to run (should detect zombies)
|
||||
console.log('\nWaiting for inactivity check to detect zombies...');
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
|
||||
details = getConnectionDetails();
|
||||
console.log(`\nAfter first inactivity check:`);
|
||||
console.log(` Outer: ${details.outer.count} connections, ${details.outer.zombies.length} zombies, ${details.outer.halfZombies.length} half-zombies`);
|
||||
console.log(` Inner: ${details.inner.count} connections, ${details.inner.zombies.length} zombies, ${details.inner.halfZombies.length} half-zombies`);
|
||||
|
||||
console.log('\n--- Test 2: Create half-zombie by destroying only one socket ---');
|
||||
|
||||
// Clear backend connections array
|
||||
backendConnections.length = 0;
|
||||
|
||||
const client2 = new net.Socket();
|
||||
await new Promise<void>((resolve) => {
|
||||
client2.connect(8590, 'localhost', () => {
|
||||
console.log('Client2 connected to OuterProxy');
|
||||
client2.write('GET / HTTP/1.1\r\nHost: test.com\r\n\r\n');
|
||||
|
||||
setTimeout(() => {
|
||||
console.log('Creating half-zombie by destroying only outgoing socket on outer proxy');
|
||||
|
||||
// Access the connection records directly
|
||||
const outerConnMgr = (outerProxy as any).connectionManager as ConnectionManager;
|
||||
const outerRecords = Array.from((outerConnMgr as any).connectionRecords.values()) as IConnectionRecord[];
|
||||
|
||||
// Find the active connection and destroy only its outgoing socket
|
||||
const activeRecord = outerRecords.find(r => !r.connectionClosed && r.outgoing && !r.outgoing.destroyed);
|
||||
if (activeRecord && activeRecord.outgoing) {
|
||||
console.log('Found active connection, destroying outgoing socket');
|
||||
activeRecord.outgoing.removeAllListeners();
|
||||
activeRecord.outgoing.destroy();
|
||||
}
|
||||
|
||||
resolve();
|
||||
}, 500);
|
||||
});
|
||||
});
|
||||
|
||||
// Check half-zombie state
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
details = getConnectionDetails();
|
||||
console.log(`\nAfter creating half-zombie:`);
|
||||
console.log(` Outer: ${details.outer.count} connections, ${details.outer.zombies.length} zombies, ${details.outer.halfZombies.length} half-zombies`);
|
||||
console.log(` Inner: ${details.inner.count} connections, ${details.inner.zombies.length} zombies, ${details.inner.halfZombies.length} half-zombies`);
|
||||
|
||||
// Wait for 30-second grace period (simulated by multiple checks)
|
||||
console.log('\nWaiting for half-zombie grace period (30 seconds simulated)...');
|
||||
|
||||
// Manually age the connection to trigger half-zombie cleanup
|
||||
const outerConnMgr = (outerProxy as any).connectionManager as ConnectionManager;
|
||||
const records = Array.from((outerConnMgr as any).connectionRecords.values()) as IConnectionRecord[];
|
||||
records.forEach(record => {
|
||||
if (!record.connectionClosed) {
|
||||
// Age the connection by 35 seconds
|
||||
record.incomingStartTime -= 35000;
|
||||
}
|
||||
});
|
||||
|
||||
// Trigger inactivity check
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
|
||||
details = getConnectionDetails();
|
||||
console.log(`\nAfter half-zombie cleanup:`);
|
||||
console.log(` Outer: ${details.outer.count} connections, ${details.outer.zombies.length} zombies, ${details.outer.halfZombies.length} half-zombies`);
|
||||
console.log(` Inner: ${details.inner.count} connections, ${details.inner.zombies.length} zombies, ${details.inner.halfZombies.length} half-zombies`);
|
||||
|
||||
// Clean up client2 properly
|
||||
if (!client2.destroyed) {
|
||||
client2.destroy();
|
||||
}
|
||||
|
||||
console.log('\n--- Test 3: Rapid zombie creation under load ---');
|
||||
|
||||
// Create multiple connections rapidly and destroy them
|
||||
const rapidClients: net.Socket[] = [];
|
||||
|
||||
for (let i = 0; i < 5; i++) {
|
||||
const client = new net.Socket();
|
||||
rapidClients.push(client);
|
||||
|
||||
client.connect(8590, 'localhost', () => {
|
||||
console.log(`Rapid client ${i} connected`);
|
||||
client.write('GET / HTTP/1.1\r\nHost: test.com\r\n\r\n');
|
||||
|
||||
// Destroy after random delay
|
||||
setTimeout(() => {
|
||||
client.removeAllListeners();
|
||||
client.destroy();
|
||||
}, Math.random() * 500);
|
||||
});
|
||||
|
||||
// Small delay between connections
|
||||
await new Promise(resolve => setTimeout(resolve, 50));
|
||||
}
|
||||
|
||||
// Wait a bit
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
|
||||
details = getConnectionDetails();
|
||||
console.log(`\nAfter rapid connections:`);
|
||||
console.log(` Outer: ${details.outer.count} connections, ${details.outer.zombies.length} zombies, ${details.outer.halfZombies.length} half-zombies`);
|
||||
console.log(` Inner: ${details.inner.count} connections, ${details.inner.zombies.length} zombies, ${details.inner.halfZombies.length} half-zombies`);
|
||||
|
||||
// Wait for cleanup
|
||||
console.log('\nWaiting for final cleanup...');
|
||||
await new Promise(resolve => setTimeout(resolve, 3000));
|
||||
|
||||
details = getConnectionDetails();
|
||||
console.log(`\nFinal state:`);
|
||||
console.log(` Outer: ${details.outer.count} connections, ${details.outer.zombies.length} zombies, ${details.outer.halfZombies.length} half-zombies`);
|
||||
console.log(` Inner: ${details.inner.count} connections, ${details.inner.zombies.length} zombies, ${details.inner.halfZombies.length} half-zombies`);
|
||||
|
||||
// Cleanup
|
||||
await outerProxy.stop();
|
||||
await innerProxy.stop();
|
||||
backend.close();
|
||||
|
||||
// Verify all connections are cleaned up
|
||||
console.log('\n--- Verification ---');
|
||||
|
||||
if (details.outer.count === 0 && details.inner.count === 0) {
|
||||
console.log('✅ PASS: All zombie connections were cleaned up');
|
||||
} else {
|
||||
console.log('❌ FAIL: Some connections remain');
|
||||
}
|
||||
|
||||
expect(details.outer.count).toEqual(0);
|
||||
expect(details.inner.count).toEqual(0);
|
||||
expect(details.outer.zombies.length).toEqual(0);
|
||||
expect(details.inner.zombies.length).toEqual(0);
|
||||
expect(details.outer.halfZombies.length).toEqual(0);
|
||||
expect(details.inner.halfZombies.length).toEqual(0);
|
||||
});
|
||||
|
||||
tap.start();
|
@ -258,22 +258,61 @@ export function createSocketWithErrorHandler(options: SafeSocketOptions): plugin
|
||||
// Create socket with immediate error handler attachment
|
||||
const socket = new plugins.net.Socket();
|
||||
|
||||
// Track if connected
|
||||
let connected = false;
|
||||
let connectionTimeout: NodeJS.Timeout | null = null;
|
||||
|
||||
// Attach error handler BEFORE connecting to catch immediate errors
|
||||
socket.on('error', (error) => {
|
||||
console.error(`Socket connection error to ${host}:${port}: ${error.message}`);
|
||||
// Clear the connection timeout if it exists
|
||||
if (connectionTimeout) {
|
||||
clearTimeout(connectionTimeout);
|
||||
connectionTimeout = null;
|
||||
}
|
||||
if (onError) {
|
||||
onError(error);
|
||||
}
|
||||
});
|
||||
|
||||
// Attach connect handler if provided
|
||||
if (onConnect) {
|
||||
socket.on('connect', onConnect);
|
||||
}
|
||||
// Attach connect handler
|
||||
const handleConnect = () => {
|
||||
connected = true;
|
||||
// Clear the connection timeout
|
||||
if (connectionTimeout) {
|
||||
clearTimeout(connectionTimeout);
|
||||
connectionTimeout = null;
|
||||
}
|
||||
// Set inactivity timeout if provided (after connection is established)
|
||||
if (timeout) {
|
||||
socket.setTimeout(timeout);
|
||||
}
|
||||
if (onConnect) {
|
||||
onConnect();
|
||||
}
|
||||
};
|
||||
|
||||
// Set timeout if provided
|
||||
socket.on('connect', handleConnect);
|
||||
|
||||
// Implement connection establishment timeout
|
||||
if (timeout) {
|
||||
socket.setTimeout(timeout);
|
||||
connectionTimeout = setTimeout(() => {
|
||||
if (!connected && !socket.destroyed) {
|
||||
// Connection timed out - destroy the socket
|
||||
const error = new Error(`Connection timeout after ${timeout}ms to ${host}:${port}`);
|
||||
(error as any).code = 'ETIMEDOUT';
|
||||
|
||||
console.error(`Socket connection timeout to ${host}:${port} after ${timeout}ms`);
|
||||
|
||||
// Destroy the socket
|
||||
socket.destroy();
|
||||
|
||||
// Call error handler
|
||||
if (onError) {
|
||||
onError(error);
|
||||
}
|
||||
}
|
||||
}, timeout);
|
||||
}
|
||||
|
||||
// Now attempt to connect - any immediate errors will be caught
|
||||
|
@ -30,6 +30,7 @@ import * as smartacmeHandlers from '@push.rocks/smartacme/dist_ts/handlers/index
|
||||
import * as smartlog from '@push.rocks/smartlog';
|
||||
import * as smartlogDestinationLocal from '@push.rocks/smartlog/destination-local';
|
||||
import * as taskbuffer from '@push.rocks/taskbuffer';
|
||||
import * as smartrx from '@push.rocks/smartrx';
|
||||
|
||||
export {
|
||||
lik,
|
||||
@ -45,6 +46,7 @@ export {
|
||||
smartlog,
|
||||
smartlogDestinationLocal,
|
||||
taskbuffer,
|
||||
smartrx,
|
||||
};
|
||||
|
||||
// third party scope
|
||||
|
@ -140,10 +140,10 @@ export class ConnectionManager extends LifecycleComponent {
|
||||
* Start the inactivity check timer
|
||||
*/
|
||||
private startInactivityCheckTimer(): void {
|
||||
// Check every 30 seconds for connections that need inactivity check
|
||||
// Check more frequently (every 10 seconds) to catch zombies and stuck connections faster
|
||||
this.setInterval(() => {
|
||||
this.performOptimizedInactivityCheck();
|
||||
}, 30000);
|
||||
}, 10000);
|
||||
// Note: LifecycleComponent's setInterval already calls unref()
|
||||
}
|
||||
|
||||
@ -194,6 +194,13 @@ export class ConnectionManager extends LifecycleComponent {
|
||||
* Queue a connection for cleanup
|
||||
*/
|
||||
private queueCleanup(connectionId: string): void {
|
||||
// Check if connection is already being processed
|
||||
const record = this.connectionRecords.get(connectionId);
|
||||
if (!record || record.connectionClosed) {
|
||||
// Already cleaned up or doesn't exist, skip
|
||||
return;
|
||||
}
|
||||
|
||||
this.cleanupQueue.add(connectionId);
|
||||
|
||||
// Process immediately if queue is getting large
|
||||
@ -217,9 +224,10 @@ export class ConnectionManager extends LifecycleComponent {
|
||||
}
|
||||
|
||||
const toCleanup = Array.from(this.cleanupQueue).slice(0, this.cleanupBatchSize);
|
||||
this.cleanupQueue.clear();
|
||||
|
||||
// Remove only the items we're processing, not the entire queue!
|
||||
for (const connectionId of toCleanup) {
|
||||
this.cleanupQueue.delete(connectionId);
|
||||
const record = this.connectionRecords.get(connectionId);
|
||||
if (record) {
|
||||
this.cleanupConnection(record, record.incomingTerminationReason || 'normal');
|
||||
@ -456,6 +464,74 @@ export class ConnectionManager extends LifecycleComponent {
|
||||
}
|
||||
}
|
||||
|
||||
// Also check ALL connections for zombie state (destroyed sockets but not cleaned up)
|
||||
// This is critical for proxy chains where sockets can be destroyed without events
|
||||
for (const [connectionId, record] of this.connectionRecords) {
|
||||
if (!record.connectionClosed) {
|
||||
const incomingDestroyed = record.incoming?.destroyed || false;
|
||||
const outgoingDestroyed = record.outgoing?.destroyed || false;
|
||||
|
||||
// Check for zombie connections: both sockets destroyed but connection not cleaned up
|
||||
if (incomingDestroyed && outgoingDestroyed) {
|
||||
logger.log('warn', `Zombie connection detected: ${connectionId} - both sockets destroyed but not cleaned up`, {
|
||||
connectionId,
|
||||
remoteIP: record.remoteIP,
|
||||
age: plugins.prettyMs(now - record.incomingStartTime),
|
||||
component: 'connection-manager'
|
||||
});
|
||||
|
||||
// Clean up immediately
|
||||
this.cleanupConnection(record, 'zombie_cleanup');
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for half-zombie: one socket destroyed
|
||||
if (incomingDestroyed || outgoingDestroyed) {
|
||||
const age = now - record.incomingStartTime;
|
||||
// Give it 30 seconds grace period for normal cleanup
|
||||
if (age > 30000) {
|
||||
logger.log('warn', `Half-zombie connection detected: ${connectionId} - ${incomingDestroyed ? 'incoming' : 'outgoing'} destroyed`, {
|
||||
connectionId,
|
||||
remoteIP: record.remoteIP,
|
||||
age: plugins.prettyMs(age),
|
||||
incomingDestroyed,
|
||||
outgoingDestroyed,
|
||||
component: 'connection-manager'
|
||||
});
|
||||
|
||||
// Clean up
|
||||
this.cleanupConnection(record, 'half_zombie_cleanup');
|
||||
}
|
||||
}
|
||||
|
||||
// Check for stuck connections: no data sent back to client
|
||||
if (!record.connectionClosed && record.outgoing && record.bytesReceived > 0 && record.bytesSent === 0) {
|
||||
const age = now - record.incomingStartTime;
|
||||
// If connection is older than 60 seconds and no data sent back, likely stuck
|
||||
if (age > 60000) {
|
||||
logger.log('warn', `Stuck connection detected: ${connectionId} - received ${record.bytesReceived} bytes but sent 0 bytes`, {
|
||||
connectionId,
|
||||
remoteIP: record.remoteIP,
|
||||
age: plugins.prettyMs(age),
|
||||
bytesReceived: record.bytesReceived,
|
||||
targetHost: record.targetHost,
|
||||
targetPort: record.targetPort,
|
||||
component: 'connection-manager'
|
||||
});
|
||||
|
||||
// Set termination reason and increment stats
|
||||
if (record.incomingTerminationReason == null) {
|
||||
record.incomingTerminationReason = 'stuck_no_response';
|
||||
this.incrementTerminationStat('incoming', 'stuck_no_response');
|
||||
}
|
||||
|
||||
// Clean up
|
||||
this.cleanupConnection(record, 'stuck_no_response');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process only connections that need checking
|
||||
for (const connectionId of connectionsToCheck) {
|
||||
const record = this.connectionRecords.get(connectionId);
|
||||
|
285
ts/proxies/smart-proxy/metrics-collector.ts
Normal file
285
ts/proxies/smart-proxy/metrics-collector.ts
Normal file
@ -0,0 +1,285 @@
|
||||
import * as plugins from '../../plugins.js';
|
||||
import type { SmartProxy } from './smart-proxy.js';
|
||||
import type { IProxyStats, IProxyStatsExtended } from './models/metrics-types.js';
|
||||
import { logger } from '../../core/utils/logger.js';
|
||||
|
||||
/**
|
||||
* Collects and computes metrics for SmartProxy on-demand
|
||||
*/
|
||||
export class MetricsCollector implements IProxyStatsExtended {
|
||||
// RPS tracking (the only state we need to maintain)
|
||||
private requestTimestamps: number[] = [];
|
||||
private readonly RPS_WINDOW_SIZE = 60000; // 1 minute window
|
||||
|
||||
// Optional caching for performance
|
||||
private cachedMetrics: {
|
||||
timestamp: number;
|
||||
connectionsByRoute?: Map<string, number>;
|
||||
connectionsByIP?: Map<string, number>;
|
||||
} = { timestamp: 0 };
|
||||
|
||||
private readonly CACHE_TTL = 1000; // 1 second cache
|
||||
|
||||
// RxJS subscription for connection events
|
||||
private connectionSubscription?: plugins.smartrx.rxjs.Subscription;
|
||||
|
||||
constructor(
|
||||
private smartProxy: SmartProxy
|
||||
) {
|
||||
// Subscription will be set up in start() method
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current number of active connections
|
||||
*/
|
||||
public getActiveConnections(): number {
|
||||
return this.smartProxy.connectionManager.getConnectionCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get connection counts grouped by route name
|
||||
*/
|
||||
public getConnectionsByRoute(): Map<string, number> {
|
||||
const now = Date.now();
|
||||
|
||||
// Return cached value if fresh
|
||||
if (this.cachedMetrics.connectionsByRoute &&
|
||||
now - this.cachedMetrics.timestamp < this.CACHE_TTL) {
|
||||
return new Map(this.cachedMetrics.connectionsByRoute);
|
||||
}
|
||||
|
||||
// Compute fresh value
|
||||
const routeCounts = new Map<string, number>();
|
||||
const connections = this.smartProxy.connectionManager.getConnections();
|
||||
|
||||
if (this.smartProxy.settings?.enableDetailedLogging) {
|
||||
logger.log('debug', `MetricsCollector: Computing route connections`, {
|
||||
totalConnections: connections.size,
|
||||
component: 'metrics'
|
||||
});
|
||||
}
|
||||
|
||||
for (const [_, record] of connections) {
|
||||
// Try different ways to get the route name
|
||||
const routeName = (record as any).routeName ||
|
||||
record.routeConfig?.name ||
|
||||
(record.routeConfig as any)?.routeName ||
|
||||
'unknown';
|
||||
|
||||
if (this.smartProxy.settings?.enableDetailedLogging) {
|
||||
logger.log('debug', `MetricsCollector: Connection route info`, {
|
||||
connectionId: record.id,
|
||||
routeName,
|
||||
hasRouteConfig: !!record.routeConfig,
|
||||
routeConfigName: record.routeConfig?.name,
|
||||
routeConfigKeys: record.routeConfig ? Object.keys(record.routeConfig) : [],
|
||||
component: 'metrics'
|
||||
});
|
||||
}
|
||||
|
||||
const current = routeCounts.get(routeName) || 0;
|
||||
routeCounts.set(routeName, current + 1);
|
||||
}
|
||||
|
||||
// Cache and return
|
||||
this.cachedMetrics.connectionsByRoute = routeCounts;
|
||||
this.cachedMetrics.timestamp = now;
|
||||
return new Map(routeCounts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get connection counts grouped by IP address
|
||||
*/
|
||||
public getConnectionsByIP(): Map<string, number> {
|
||||
const now = Date.now();
|
||||
|
||||
// Return cached value if fresh
|
||||
if (this.cachedMetrics.connectionsByIP &&
|
||||
now - this.cachedMetrics.timestamp < this.CACHE_TTL) {
|
||||
return new Map(this.cachedMetrics.connectionsByIP);
|
||||
}
|
||||
|
||||
// Compute fresh value
|
||||
const ipCounts = new Map<string, number>();
|
||||
for (const [_, record] of this.smartProxy.connectionManager.getConnections()) {
|
||||
const ip = record.remoteIP;
|
||||
const current = ipCounts.get(ip) || 0;
|
||||
ipCounts.set(ip, current + 1);
|
||||
}
|
||||
|
||||
// Cache and return
|
||||
this.cachedMetrics.connectionsByIP = ipCounts;
|
||||
this.cachedMetrics.timestamp = now;
|
||||
return new Map(ipCounts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the total number of connections since proxy start
|
||||
*/
|
||||
public getTotalConnections(): number {
|
||||
// Get from termination stats
|
||||
const stats = this.smartProxy.connectionManager.getTerminationStats();
|
||||
let total = this.smartProxy.connectionManager.getConnectionCount(); // Add active connections
|
||||
|
||||
// Add all terminated connections
|
||||
for (const reason in stats.incoming) {
|
||||
total += stats.incoming[reason];
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current requests per second rate
|
||||
*/
|
||||
public getRequestsPerSecond(): number {
|
||||
const now = Date.now();
|
||||
const windowStart = now - this.RPS_WINDOW_SIZE;
|
||||
|
||||
// Clean old timestamps
|
||||
this.requestTimestamps = this.requestTimestamps.filter(ts => ts > windowStart);
|
||||
|
||||
// Calculate RPS based on window
|
||||
const requestsInWindow = this.requestTimestamps.length;
|
||||
return requestsInWindow / (this.RPS_WINDOW_SIZE / 1000);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a new request for RPS tracking
|
||||
*/
|
||||
public recordRequest(): void {
|
||||
this.requestTimestamps.push(Date.now());
|
||||
|
||||
// Prevent unbounded growth
|
||||
if (this.requestTimestamps.length > 10000) {
|
||||
this.cleanupOldRequests();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get total throughput (bytes transferred)
|
||||
*/
|
||||
public getThroughput(): { bytesIn: number; bytesOut: number } {
|
||||
let bytesIn = 0;
|
||||
let bytesOut = 0;
|
||||
|
||||
// Sum bytes from all active connections
|
||||
for (const [_, record] of this.smartProxy.connectionManager.getConnections()) {
|
||||
bytesIn += record.bytesReceived;
|
||||
bytesOut += record.bytesSent;
|
||||
}
|
||||
|
||||
return { bytesIn, bytesOut };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get throughput rate (bytes per second) for last minute
|
||||
*/
|
||||
public getThroughputRate(): { bytesInPerSec: number; bytesOutPerSec: number } {
|
||||
const now = Date.now();
|
||||
let recentBytesIn = 0;
|
||||
let recentBytesOut = 0;
|
||||
|
||||
// Calculate bytes transferred in last minute from active connections
|
||||
for (const [_, record] of this.smartProxy.connectionManager.getConnections()) {
|
||||
const connectionAge = now - record.incomingStartTime;
|
||||
if (connectionAge < 60000) { // Connection started within last minute
|
||||
recentBytesIn += record.bytesReceived;
|
||||
recentBytesOut += record.bytesSent;
|
||||
} else {
|
||||
// For older connections, estimate rate based on average
|
||||
const rate = connectionAge / 60000;
|
||||
recentBytesIn += record.bytesReceived / rate;
|
||||
recentBytesOut += record.bytesSent / rate;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
bytesInPerSec: Math.round(recentBytesIn / 60),
|
||||
bytesOutPerSec: Math.round(recentBytesOut / 60)
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get top IPs by connection count
|
||||
*/
|
||||
public getTopIPs(limit: number = 10): Array<{ ip: string; connections: number }> {
|
||||
const ipCounts = this.getConnectionsByIP();
|
||||
const sorted = Array.from(ipCounts.entries())
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.slice(0, limit)
|
||||
.map(([ip, connections]) => ({ ip, connections }));
|
||||
|
||||
return sorted;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an IP has reached the connection limit
|
||||
*/
|
||||
public isIPBlocked(ip: string, maxConnectionsPerIP: number): boolean {
|
||||
const ipCounts = this.getConnectionsByIP();
|
||||
const currentConnections = ipCounts.get(ip) || 0;
|
||||
return currentConnections >= maxConnectionsPerIP;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up old request timestamps
|
||||
*/
|
||||
private cleanupOldRequests(): void {
|
||||
const cutoff = Date.now() - this.RPS_WINDOW_SIZE;
|
||||
this.requestTimestamps = this.requestTimestamps.filter(ts => ts > cutoff);
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the metrics collector and set up subscriptions
|
||||
*/
|
||||
public start(): void {
|
||||
if (!this.smartProxy.routeConnectionHandler) {
|
||||
throw new Error('MetricsCollector: RouteConnectionHandler not available');
|
||||
}
|
||||
|
||||
// Subscribe to the newConnectionSubject from RouteConnectionHandler
|
||||
this.connectionSubscription = this.smartProxy.routeConnectionHandler.newConnectionSubject.subscribe({
|
||||
next: (record) => {
|
||||
this.recordRequest();
|
||||
|
||||
// Optional: Log connection details
|
||||
if (this.smartProxy.settings?.enableDetailedLogging) {
|
||||
logger.log('debug', `MetricsCollector: New connection recorded`, {
|
||||
connectionId: record.id,
|
||||
remoteIP: record.remoteIP,
|
||||
routeName: record.routeConfig?.name || 'unknown',
|
||||
component: 'metrics'
|
||||
});
|
||||
}
|
||||
},
|
||||
error: (err) => {
|
||||
logger.log('error', `MetricsCollector: Error in connection subscription`, {
|
||||
error: err.message,
|
||||
component: 'metrics'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
logger.log('debug', 'MetricsCollector started', { component: 'metrics' });
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the metrics collector and clean up resources
|
||||
*/
|
||||
public stop(): void {
|
||||
if (this.connectionSubscription) {
|
||||
this.connectionSubscription.unsubscribe();
|
||||
this.connectionSubscription = undefined;
|
||||
}
|
||||
|
||||
logger.log('debug', 'MetricsCollector stopped', { component: 'metrics' });
|
||||
}
|
||||
|
||||
/**
|
||||
* Alias for stop() for backward compatibility
|
||||
*/
|
||||
public destroy(): void {
|
||||
this.stop();
|
||||
}
|
||||
}
|
@ -4,3 +4,4 @@
|
||||
// Export everything except IAcmeOptions from interfaces
|
||||
export type { ISmartProxyOptions, IConnectionRecord, TSmartProxyCertProvisionObject } from './interfaces.js';
|
||||
export * from './route-types.js';
|
||||
export * from './metrics-types.js';
|
||||
|
@ -69,6 +69,7 @@ export interface ISmartProxyOptions {
|
||||
maxVersion?: string;
|
||||
|
||||
// Timeout settings
|
||||
connectionTimeout?: number; // Timeout for establishing connection to backend (ms), default: 30000 (30s)
|
||||
initialDataTimeout?: number; // Timeout for initial data/SNI (ms), default: 60000 (60s)
|
||||
socketTimeout?: number; // Socket inactivity timeout (ms), default: 3600000 (1h)
|
||||
inactivityCheckInterval?: number; // How often to check for inactive connections (ms), default: 60000 (60s)
|
||||
|
54
ts/proxies/smart-proxy/models/metrics-types.ts
Normal file
54
ts/proxies/smart-proxy/models/metrics-types.ts
Normal file
@ -0,0 +1,54 @@
|
||||
/**
|
||||
* Interface for proxy statistics and metrics
|
||||
*/
|
||||
export interface IProxyStats {
|
||||
/**
|
||||
* Get the current number of active connections
|
||||
*/
|
||||
getActiveConnections(): number;
|
||||
|
||||
/**
|
||||
* Get connection counts grouped by route name
|
||||
*/
|
||||
getConnectionsByRoute(): Map<string, number>;
|
||||
|
||||
/**
|
||||
* Get connection counts grouped by IP address
|
||||
*/
|
||||
getConnectionsByIP(): Map<string, number>;
|
||||
|
||||
/**
|
||||
* Get the total number of connections since proxy start
|
||||
*/
|
||||
getTotalConnections(): number;
|
||||
|
||||
/**
|
||||
* Get the current requests per second rate
|
||||
*/
|
||||
getRequestsPerSecond(): number;
|
||||
|
||||
/**
|
||||
* Get total throughput (bytes transferred)
|
||||
*/
|
||||
getThroughput(): { bytesIn: number; bytesOut: number };
|
||||
}
|
||||
|
||||
/**
|
||||
* Extended interface for additional metrics helpers
|
||||
*/
|
||||
export interface IProxyStatsExtended extends IProxyStats {
|
||||
/**
|
||||
* Get throughput rate (bytes per second) for last minute
|
||||
*/
|
||||
getThroughputRate(): { bytesInPerSec: number; bytesOutPerSec: number };
|
||||
|
||||
/**
|
||||
* Get top IPs by connection count
|
||||
*/
|
||||
getTopIPs(limit?: number): Array<{ ip: string; connections: number }>;
|
||||
|
||||
/**
|
||||
* Check if an IP has reached the connection limit
|
||||
*/
|
||||
isIPBlocked(ip: string, maxConnectionsPerIP: number): boolean;
|
||||
}
|
@ -23,6 +23,9 @@ export class RouteConnectionHandler {
|
||||
|
||||
// Cache for route contexts to avoid recreation
|
||||
private routeContextCache: Map<string, IRouteContext> = new Map();
|
||||
|
||||
// RxJS Subject for new connections
|
||||
public newConnectionSubject = new plugins.smartrx.rxjs.Subject<IConnectionRecord>();
|
||||
|
||||
constructor(
|
||||
settings: ISmartProxyOptions,
|
||||
@ -35,6 +38,7 @@ export class RouteConnectionHandler {
|
||||
) {
|
||||
this.settings = settings;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a route context object for port and host mapping functions
|
||||
@ -110,6 +114,9 @@ export class RouteConnectionHandler {
|
||||
// Connection was rejected due to limit - socket already destroyed by connection manager
|
||||
return;
|
||||
}
|
||||
|
||||
// Emit new connection event
|
||||
this.newConnectionSubject.next(record);
|
||||
const connectionId = record.id;
|
||||
|
||||
// Apply socket optimizations (apply to underlying socket)
|
||||
@ -199,25 +206,29 @@ export class RouteConnectionHandler {
|
||||
setupSocketHandlers(
|
||||
underlyingSocket,
|
||||
(reason) => {
|
||||
// Only cleanup if connection hasn't been fully established
|
||||
// Check if outgoing connection exists and is connected
|
||||
if (!record.outgoing || record.outgoing.readyState !== 'open') {
|
||||
logger.log('debug', `Connection ${connectionId} closed during immediate routing: ${reason}`, {
|
||||
// Always cleanup when incoming socket closes
|
||||
// This prevents connection accumulation in proxy chains
|
||||
logger.log('debug', `Connection ${connectionId} closed during immediate routing: ${reason}`, {
|
||||
connectionId,
|
||||
remoteIP: record.remoteIP,
|
||||
reason,
|
||||
hasOutgoing: !!record.outgoing,
|
||||
outgoingState: record.outgoing?.readyState,
|
||||
component: 'route-handler'
|
||||
});
|
||||
|
||||
// If there's a pending or established outgoing connection, destroy it
|
||||
if (record.outgoing && !record.outgoing.destroyed) {
|
||||
logger.log('debug', `Destroying outgoing connection for ${connectionId}`, {
|
||||
connectionId,
|
||||
remoteIP: record.remoteIP,
|
||||
reason,
|
||||
hasOutgoing: !!record.outgoing,
|
||||
outgoingState: record.outgoing?.readyState,
|
||||
outgoingState: record.outgoing.readyState,
|
||||
component: 'route-handler'
|
||||
});
|
||||
|
||||
// If there's a pending outgoing connection, destroy it
|
||||
if (record.outgoing && !record.outgoing.destroyed) {
|
||||
record.outgoing.destroy();
|
||||
}
|
||||
|
||||
this.connectionManager.cleanupConnection(record, reason);
|
||||
record.outgoing.destroy();
|
||||
}
|
||||
|
||||
// Always cleanup the connection record
|
||||
this.connectionManager.cleanupConnection(record, reason);
|
||||
},
|
||||
undefined, // Use default timeout handler
|
||||
'immediate-route-client'
|
||||
@ -636,6 +647,9 @@ export class RouteConnectionHandler {
|
||||
): void {
|
||||
const connectionId = record.id;
|
||||
const action = route.action as IRouteAction;
|
||||
|
||||
// Store the route config in the connection record for metrics and other uses
|
||||
record.routeConfig = route;
|
||||
|
||||
// Check if this route uses NFTables for forwarding
|
||||
if (action.forwardingEngine === 'nftables') {
|
||||
@ -953,6 +967,9 @@ export class RouteConnectionHandler {
|
||||
): Promise<void> {
|
||||
const connectionId = record.id;
|
||||
|
||||
// Store the route config in the connection record for metrics and other uses
|
||||
record.routeConfig = route;
|
||||
|
||||
if (!route.action.socketHandler) {
|
||||
logger.log('error', 'socket-handler action missing socketHandler function', {
|
||||
connectionId,
|
||||
@ -1121,6 +1138,7 @@ export class RouteConnectionHandler {
|
||||
const targetSocket = createSocketWithErrorHandler({
|
||||
port: finalTargetPort,
|
||||
host: finalTargetHost,
|
||||
timeout: this.settings.connectionTimeout || 30000, // Connection timeout (default: 30s)
|
||||
onError: (error) => {
|
||||
// Connection failed - clean up everything immediately
|
||||
// Check if connection record is still valid (client might have disconnected)
|
||||
|
@ -27,6 +27,10 @@ import { Mutex } from './utils/mutex.js';
|
||||
// Import ACME state manager
|
||||
import { AcmeStateManager } from './acme-state-manager.js';
|
||||
|
||||
// Import metrics collector
|
||||
import { MetricsCollector } from './metrics-collector.js';
|
||||
import type { IProxyStats } from './models/metrics-types.js';
|
||||
|
||||
/**
|
||||
* SmartProxy - Pure route-based API
|
||||
*
|
||||
@ -47,13 +51,13 @@ export class SmartProxy extends plugins.EventEmitter {
|
||||
private isShuttingDown: boolean = false;
|
||||
|
||||
// Component managers
|
||||
private connectionManager: ConnectionManager;
|
||||
public connectionManager: ConnectionManager;
|
||||
private securityManager: SecurityManager;
|
||||
private tlsManager: TlsManager;
|
||||
private httpProxyBridge: HttpProxyBridge;
|
||||
private timeoutManager: TimeoutManager;
|
||||
public routeManager: RouteManager; // Made public for route management
|
||||
private routeConnectionHandler: RouteConnectionHandler;
|
||||
public routeConnectionHandler: RouteConnectionHandler; // Made public for metrics
|
||||
private nftablesManager: NFTablesManager;
|
||||
|
||||
// Certificate manager for ACME and static certificates
|
||||
@ -64,6 +68,9 @@ export class SmartProxy extends plugins.EventEmitter {
|
||||
private routeUpdateLock: any = null; // Will be initialized as AsyncMutex
|
||||
private acmeStateManager: AcmeStateManager;
|
||||
|
||||
// Metrics collector
|
||||
private metricsCollector: MetricsCollector;
|
||||
|
||||
// Track port usage across route updates
|
||||
private portUsageMap: Map<number, Set<string>> = new Map();
|
||||
|
||||
@ -204,6 +211,9 @@ export class SmartProxy extends plugins.EventEmitter {
|
||||
|
||||
// Initialize ACME state manager
|
||||
this.acmeStateManager = new AcmeStateManager();
|
||||
|
||||
// Initialize metrics collector with reference to this SmartProxy instance
|
||||
this.metricsCollector = new MetricsCollector(this);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -383,6 +393,9 @@ export class SmartProxy extends plugins.EventEmitter {
|
||||
logger.log('info', 'Starting certificate provisioning now that ports are ready', { component: 'certificate-manager' });
|
||||
await this.certManager.provisionAllCertificates();
|
||||
}
|
||||
|
||||
// Start the metrics collector now that all components are initialized
|
||||
this.metricsCollector.start();
|
||||
|
||||
// Set up periodic connection logging and inactivity checks
|
||||
this.connectionLogger = setInterval(() => {
|
||||
@ -508,6 +521,9 @@ export class SmartProxy extends plugins.EventEmitter {
|
||||
|
||||
// Clear ACME state manager
|
||||
this.acmeStateManager.clear();
|
||||
|
||||
// Stop metrics collector
|
||||
this.metricsCollector.stop();
|
||||
|
||||
logger.log('info', 'SmartProxy shutdown complete.');
|
||||
}
|
||||
@ -905,6 +921,15 @@ export class SmartProxy extends plugins.EventEmitter {
|
||||
return this.certManager.getCertificateStatus(routeName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get proxy statistics and metrics
|
||||
*
|
||||
* @returns IProxyStats interface with various metrics methods
|
||||
*/
|
||||
public getStats(): IProxyStats {
|
||||
return this.metricsCollector;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates if a domain name is valid for certificate issuance
|
||||
*/
|
||||
|
Reference in New Issue
Block a user