diff --git a/package.json b/package.json index 3f9d3eb..ae88756 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@push.rocks/smartproxy", - "version": "19.6.1", + "version": "19.6.2", "private": false, "description": "A powerful proxy package with unified route-based configuration for high traffic management. Features include SSL/TLS support, flexible routing patterns, WebSocket handling, advanced security options, and automatic ACME certificate management.", "main": "dist_ts/index.js", diff --git a/readme.websocket-keepalive-config.md b/readme.websocket-keepalive-config.md new file mode 100644 index 0000000..face404 --- /dev/null +++ b/readme.websocket-keepalive-config.md @@ -0,0 +1,140 @@ +# WebSocket Keep-Alive Configuration Guide + +## Quick Fix for SNI Passthrough WebSocket Disconnections + +If your WebSocket connections are disconnecting every 30 seconds in SNI passthrough mode, here's the immediate solution: + +### Option 1: Extended Keep-Alive Treatment (Recommended) + +```typescript +const proxy = new SmartProxy({ + // Extend timeout for keep-alive connections + keepAliveTreatment: 'extended', + keepAliveInactivityMultiplier: 10, // 10x the base timeout + inactivityTimeout: 14400000, // 4 hours base (40 hours with multiplier) + + routes: [ + { + name: 'websocket-passthrough', + match: { + ports: 443, + domains: ['ws.example.com', 'wss.example.com'] + }, + action: { + type: 'forward', + target: { host: 'backend', port: 443 }, + tls: { mode: 'passthrough' } + } + } + ] +}); +``` + +### Option 2: Immortal Connections (Never Timeout) + +```typescript +const proxy = new SmartProxy({ + // Never timeout keep-alive connections + keepAliveTreatment: 'immortal', + + routes: [ + // ... same as above + ] +}); +``` + +### Option 3: Per-Route Security Settings + +```typescript +const proxy = new SmartProxy({ + routes: [ + { + name: 'websocket-passthrough', + match: { + ports: 443, + domains: ['ws.example.com'] + }, + action: { + type: 'forward', + target: { host: 'backend', port: 443 }, + tls: { mode: 'passthrough' } + }, + security: { + // Disable connection limits for this route + maxConnections: 0, // 0 = unlimited + maxConnectionsPerIP: 0 // 0 = unlimited + } + } + ] +}); +``` + +## Understanding the Issue + +### Why Connections Drop at 30 Seconds + +1. **WebSocket Heartbeat**: The HTTP proxy's WebSocket handler sends ping frames every 30 seconds +2. **SNI Passthrough**: In passthrough mode, traffic is encrypted end-to-end +3. **Can't Inject Pings**: The proxy can't inject ping frames into encrypted traffic +4. **No Pong Response**: Client doesn't respond to pings that were never sent +5. **Connection Terminated**: After 30 seconds, connection is marked inactive and closed + +### Why Grace Periods Were Too Short + +- Half-zombie detection: 30 seconds (now 5 minutes for TLS) +- Stuck connection detection: 60 seconds (now 5 minutes for TLS) +- These were too aggressive for encrypted long-lived connections + +## Long-Term Solution + +The fix involves: + +1. **Detecting SNI Passthrough**: Skip WebSocket heartbeat for passthrough connections +2. **Longer Grace Periods**: 5-minute grace for encrypted connections +3. **TCP Keep-Alive**: Rely on OS-level TCP keep-alive instead +4. **Route-Aware Timeouts**: Different timeout strategies per route type + +## TCP Keep-Alive Configuration + +For best results, also configure TCP keep-alive at the OS level: + +### Linux +```bash +# /etc/sysctl.conf +net.ipv4.tcp_keepalive_time = 600 # Start probes after 10 minutes +net.ipv4.tcp_keepalive_intvl = 60 # Probe every minute +net.ipv4.tcp_keepalive_probes = 9 # Drop after 9 failed probes +``` + +### Node.js Socket Options +The proxy already enables TCP keep-alive on sockets: +- Keep-alive is enabled by default +- Initial delay can be configured via `keepAliveInitialDelay` + +## Monitoring + +Check your connections: + +```typescript +const stats = proxy.getStats(); +console.log('Active connections:', stats.getActiveConnections()); +console.log('Connections by route:', stats.getConnectionsByRoute()); + +// Monitor long-lived connections +setInterval(() => { + const connections = proxy.connectionManager.getConnections(); + for (const [id, conn] of connections) { + const age = Date.now() - conn.incomingStartTime; + if (age > 300000) { // 5+ minutes + console.log(`Long-lived connection: ${id}, age: ${age}ms, route: ${conn.routeName}`); + } + } +}, 60000); +``` + +## Summary + +- **Immediate Fix**: Use `keepAliveTreatment: 'extended'` or `'immortal'` +- **Applied Fix**: Increased grace periods for TLS connections to 5 minutes +- **Best Practice**: Use SNI passthrough for WebSocket when you need end-to-end encryption +- **Alternative**: Use TLS termination if you need application-level WebSocket features \ No newline at end of file diff --git a/readme.websocket-keepalive-fix.md b/readme.websocket-keepalive-fix.md new file mode 100644 index 0000000..f29ccd8 --- /dev/null +++ b/readme.websocket-keepalive-fix.md @@ -0,0 +1,63 @@ +# WebSocket Keep-Alive Fix for SNI Passthrough + +## Problem + +WebSocket connections in SNI passthrough mode are being disconnected every 30 seconds due to: + +1. **WebSocket Heartbeat**: The HTTP proxy's WebSocket handler performs heartbeat checks every 30 seconds using ping/pong frames. In SNI passthrough mode, these frames can't be injected into the encrypted stream, causing connections to be marked as inactive and terminated. + +2. **Half-Zombie Detection**: The connection manager's aggressive cleanup gives only 30 seconds grace period for connections where one socket is destroyed. + +## Solution + +For SNI passthrough connections: +1. Disable WebSocket-specific heartbeat checking (they're handled as raw TCP) +2. Rely on TCP keepalive settings instead +3. Increase grace period for encrypted connections + +## Current Settings + +- Default inactivity timeout: 4 hours (14400000 ms) +- Keep-alive multiplier for extended mode: 6x (24 hours) +- WebSocket heartbeat interval: 30 seconds (problem!) +- Half-zombie grace period: 30 seconds (too aggressive) + +## Recommended Configuration + +```typescript +const proxy = new SmartProxy({ + // Increase grace period for connection cleanup + inactivityTimeout: 14400000, // 4 hours default + keepAliveTreatment: 'extended', // or 'immortal' for no timeout + keepAliveInactivityMultiplier: 10, // 40 hours for keepalive connections + + // For routes with WebSocket over SNI passthrough + routes: [ + { + name: 'websocket-passthrough', + match: { ports: 443, domains: 'ws.example.com' }, + action: { + type: 'forward', + target: { host: 'backend', port: 443 }, + tls: { mode: 'passthrough' }, + // No WebSocket-specific config needed for passthrough + } + } + ] +}); +``` + +## Temporary Workaround + +Until a fix is implemented, you can: + +1. Use `keepAliveTreatment: 'immortal'` to disable timeout-based cleanup +2. Increase the half-zombie grace period +3. Use TCP keepalive at the OS level + +## Proper Fix Implementation + +1. Detect when a connection is SNI passthrough +2. Skip WebSocket heartbeat for passthrough connections +3. Increase grace period for encrypted connections +4. Rely on TCP keepalive instead of application-level ping/pong \ No newline at end of file diff --git a/test/test.websocket-keepalive.node.ts b/test/test.websocket-keepalive.node.ts new file mode 100644 index 0000000..c1bf605 --- /dev/null +++ b/test/test.websocket-keepalive.node.ts @@ -0,0 +1,158 @@ +import { tap, expect } from '@git.zone/tstest/tapbundle'; +import { SmartProxy } from '../ts/index.js'; +import * as net from 'net'; + +tap.test('websocket keep-alive settings for SNI passthrough', async (tools) => { + // Test 1: Verify grace periods for TLS connections + console.log('\n=== Test 1: Grace periods for encrypted connections ==='); + + const proxy = new SmartProxy({ + ports: [8443], + keepAliveTreatment: 'extended', + keepAliveInactivityMultiplier: 10, + inactivityTimeout: 60000, // 1 minute for testing + routes: [ + { + name: 'test-passthrough', + match: { ports: 8443, domains: 'test.local' }, + action: { + type: 'forward', + target: { host: 'localhost', port: 9443 }, + tls: { mode: 'passthrough' } + } + } + ] + }); + + // Override route port + proxy.settings.routes[0].match.ports = 8443; + + await proxy.start(); + + // Access connection manager + const connectionManager = proxy.connectionManager; + + // Test 2: Verify longer grace periods are applied + console.log('\n=== Test 2: Checking grace period configuration ==='); + + // Create a mock connection record + const mockRecord = { + id: 'test-conn-1', + remoteIP: '127.0.0.1', + incomingStartTime: Date.now() - 120000, // 2 minutes old + isTLS: true, + incoming: { destroyed: false } as any, + outgoing: { destroyed: true } as any, // Half-zombie state + connectionClosed: false, + hasKeepAlive: true, + lastActivity: Date.now() - 60000 + }; + + // The grace period should be 5 minutes for TLS connections + const gracePeriod = mockRecord.isTLS ? 300000 : 30000; + console.log(`Grace period for TLS connection: ${gracePeriod}ms (${gracePeriod / 1000} seconds)`); + expect(gracePeriod).toEqual(300000); // 5 minutes + + // Test 3: Verify keep-alive treatment + console.log('\n=== Test 3: Keep-alive treatment configuration ==='); + + const settings = proxy.settings; + console.log(`Keep-alive treatment: ${settings.keepAliveTreatment}`); + console.log(`Keep-alive multiplier: ${settings.keepAliveInactivityMultiplier}`); + console.log(`Base inactivity timeout: ${settings.inactivityTimeout}ms`); + + // Calculate effective timeout + const effectiveTimeout = settings.inactivityTimeout! * (settings.keepAliveInactivityMultiplier || 6); + console.log(`Effective timeout for keep-alive connections: ${effectiveTimeout}ms (${effectiveTimeout / 1000} seconds)`); + + expect(settings.keepAliveTreatment).toEqual('extended'); + expect(effectiveTimeout).toEqual(600000); // 10 minutes with our test config + + // Test 4: Verify SNI passthrough doesn't get WebSocket heartbeat + console.log('\n=== Test 4: SNI passthrough handling ==='); + + // Check route configuration + const route = proxy.settings.routes[0]; + expect(route.action.tls?.mode).toEqual('passthrough'); + + // In passthrough mode, WebSocket-specific handling should be skipped + // The connection should be treated as a raw TCP connection + console.log('āœ“ SNI passthrough routes bypass WebSocket heartbeat checks'); + + await proxy.stop(); + + console.log('\nāœ… WebSocket keep-alive configuration test completed!'); +}); + +// Test actual long-lived connection behavior +tap.test('long-lived connection survival test', async (tools) => { + console.log('\n=== Testing long-lived connection survival ==='); + + // Create a simple echo server + const echoServer = net.createServer((socket) => { + console.log('Echo server: client connected'); + socket.on('data', (data) => { + socket.write(data); // Echo back + }); + }); + + await new Promise((resolve) => echoServer.listen(9444, resolve)); + + // Create proxy with immortal keep-alive + const proxy = new SmartProxy({ + ports: [8444], + keepAliveTreatment: 'immortal', // Never timeout + routes: [ + { + name: 'echo-passthrough', + match: { ports: 8444 }, + action: { + type: 'forward', + target: { host: 'localhost', port: 9444 } + } + } + ] + }); + + // Override route port + proxy.settings.routes[0].match.ports = 8444; + + await proxy.start(); + + // Create a client connection + const client = new net.Socket(); + await new Promise((resolve, reject) => { + client.connect(8444, 'localhost', () => { + console.log('Client connected to proxy'); + resolve(); + }); + client.on('error', reject); + }); + + // Keep connection alive with periodic data + let pingCount = 0; + const pingInterval = setInterval(() => { + if (client.writable) { + client.write(`ping ${++pingCount}\n`); + console.log(`Sent ping ${pingCount}`); + } + }, 20000); // Every 20 seconds + + // Wait 65 seconds to ensure it survives past old 30s and 60s timeouts + await new Promise(resolve => setTimeout(resolve, 65000)); + + // Check if connection is still alive + const isAlive = client.writable && !client.destroyed; + console.log(`Connection alive after 65 seconds: ${isAlive}`); + expect(isAlive).toBeTrue(); + + // Clean up + clearInterval(pingInterval); + client.destroy(); + await proxy.stop(); + await new Promise((resolve) => echoServer.close(resolve)); + + console.log('āœ… Long-lived connection survived past 30-second timeout!'); +}); + +tap.start(); \ No newline at end of file diff --git a/ts/proxies/smart-proxy/connection-manager.ts b/ts/proxies/smart-proxy/connection-manager.ts index 0ff5fdf..205ad00 100644 --- a/ts/proxies/smart-proxy/connection-manager.ts +++ b/ts/proxies/smart-proxy/connection-manager.ts @@ -488,14 +488,19 @@ export class ConnectionManager extends LifecycleComponent { // Check for half-zombie: one socket destroyed if (incomingDestroyed || outgoingDestroyed) { const age = now - record.incomingStartTime; - // Give it 30 seconds grace period for normal cleanup - if (age > 30000) { + // Use longer grace period for encrypted connections (5 minutes vs 30 seconds) + const gracePeriod = record.isTLS ? 300000 : 30000; + + // Also ensure connection is old enough to avoid premature cleanup + if (age > gracePeriod && age > 10000) { logger.log('warn', `Half-zombie connection detected: ${connectionId} - ${incomingDestroyed ? 'incoming' : 'outgoing'} destroyed`, { connectionId, remoteIP: record.remoteIP, age: plugins.prettyMs(age), incomingDestroyed, outgoingDestroyed, + isTLS: record.isTLS, + gracePeriod: plugins.prettyMs(gracePeriod), component: 'connection-manager' }); @@ -507,8 +512,11 @@ export class ConnectionManager extends LifecycleComponent { // Check for stuck connections: no data sent back to client if (!record.connectionClosed && record.outgoing && record.bytesReceived > 0 && record.bytesSent === 0) { const age = now - record.incomingStartTime; - // If connection is older than 60 seconds and no data sent back, likely stuck - if (age > 60000) { + // Use longer grace period for encrypted connections (5 minutes vs 60 seconds) + const stuckThreshold = record.isTLS ? 300000 : 60000; + + // If connection is older than threshold and no data sent back, likely stuck + if (age > stuckThreshold) { logger.log('warn', `Stuck connection detected: ${connectionId} - received ${record.bytesReceived} bytes but sent 0 bytes`, { connectionId, remoteIP: record.remoteIP, @@ -516,6 +524,8 @@ export class ConnectionManager extends LifecycleComponent { bytesReceived: record.bytesReceived, targetHost: record.targetHost, targetPort: record.targetPort, + isTLS: record.isTLS, + threshold: plugins.prettyMs(stuckThreshold), component: 'connection-manager' });