diff --git a/rust/crates/rustproxy-http/src/proxy_service.rs b/rust/crates/rustproxy-http/src/proxy_service.rs index ddd214f..6611090 100644 --- a/rust/crates/rustproxy-http/src/proxy_service.rs +++ b/rust/crates/rustproxy-http/src/proxy_service.rs @@ -34,12 +34,35 @@ use crate::upstream_selector::UpstreamSelector; /// Default upstream connect timeout (30 seconds). const DEFAULT_CONNECT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30); +/// Default HTTP keep-alive idle timeout (60 seconds). +/// If no new request arrives within this duration, the connection is closed. +const DEFAULT_HTTP_IDLE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60); + /// Default WebSocket inactivity timeout (1 hour). const DEFAULT_WS_INACTIVITY_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(3600); /// Default WebSocket max lifetime (24 hours). const DEFAULT_WS_MAX_LIFETIME: std::time::Duration = std::time::Duration::from_secs(86400); +/// RAII guard that decrements the active request counter on drop. +/// Ensures the counter is correct even if the request handler panics. +struct ActiveRequestGuard { + counter: Arc, +} + +impl ActiveRequestGuard { + fn new(counter: Arc) -> Self { + counter.fetch_add(1, Ordering::Relaxed); + Self { counter } + } +} + +impl Drop for ActiveRequestGuard { + fn drop(&mut self) { + self.counter.fetch_sub(1, Ordering::Relaxed); + } +} + /// Backend stream that can be either plain TCP or TLS-wrapped. /// Used for `terminate-and-reencrypt` mode where the backend requires TLS. pub(crate) enum BackendStream { @@ -125,6 +148,12 @@ pub struct HttpProxyService { backend_tls_config: Arc, /// Backend connection pool for reusing keep-alive connections. connection_pool: Arc, + /// HTTP keep-alive idle timeout: close connection if no new request arrives within this duration. + http_idle_timeout: std::time::Duration, + /// WebSocket inactivity timeout (no data in either direction). + ws_inactivity_timeout: std::time::Duration, + /// WebSocket maximum connection lifetime. + ws_max_lifetime: std::time::Duration, } impl HttpProxyService { @@ -139,6 +168,9 @@ impl HttpProxyService { regex_cache: DashMap::new(), backend_tls_config: Self::default_backend_tls_config(), connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()), + http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT, + ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT, + ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME, } } @@ -158,9 +190,25 @@ impl HttpProxyService { regex_cache: DashMap::new(), backend_tls_config: Self::default_backend_tls_config(), connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()), + http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT, + ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT, + ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME, } } + /// Set the HTTP keep-alive idle timeout, WebSocket inactivity timeout, and + /// WebSocket max lifetime from connection config values. + pub fn set_connection_timeouts( + &mut self, + http_idle_timeout: std::time::Duration, + ws_inactivity_timeout: std::time::Duration, + ws_max_lifetime: std::time::Duration, + ) { + self.http_idle_timeout = http_idle_timeout; + self.ws_inactivity_timeout = ws_inactivity_timeout; + self.ws_max_lifetime = ws_max_lifetime; + } + /// Set the shared backend TLS config (enables session resumption). /// Call this after construction to inject the shared config from tls_handler. pub fn set_backend_tls_config(&mut self, config: Arc) { @@ -192,6 +240,10 @@ impl HttpProxyService { /// based on ALPN negotiation (TLS) or connection preface (h2c). /// Supports HTTP/1.1 upgrades (WebSocket) and HTTP/2 CONNECT. /// Responds to graceful shutdown via the cancel token. + /// + /// An idle watchdog closes the connection if no new HTTP request arrives + /// within `http_idle_timeout` (default 60s). This prevents keep-alive + /// connections from accumulating indefinitely. pub async fn handle_io( self: Arc, stream: I, @@ -204,13 +256,34 @@ impl HttpProxyService { { let io = TokioIo::new(stream); + // Capture timeouts before `self` is moved into the service closure. + let idle_timeout = self.http_idle_timeout; + + // Activity tracker: updated at the START and END of each request. + // The idle watchdog checks this to determine if the connection is idle + // (no request in progress and none started recently). + let last_activity = Arc::new(AtomicU64::new(0)); + let active_requests = Arc::new(AtomicU64::new(0)); + let start = std::time::Instant::now(); + + let la_inner = Arc::clone(&last_activity); + let ar_inner = Arc::clone(&active_requests); let cancel_inner = cancel.clone(); let service = hyper::service::service_fn(move |req: Request| { + // Mark request start — RAII guard decrements on drop (panic-safe) + la_inner.store(start.elapsed().as_millis() as u64, Ordering::Relaxed); + let req_guard = ActiveRequestGuard::new(Arc::clone(&ar_inner)); let svc = Arc::clone(&self); let peer = peer_addr; let cn = cancel_inner.clone(); + let la = Arc::clone(&la_inner); + let st = start; async move { - svc.handle_request(req, peer, port, cn).await + let result = svc.handle_request(req, peer, port, cn).await; + // Mark request end — update activity timestamp before guard drops + la.store(st.elapsed().as_millis() as u64, Ordering::Relaxed); + drop(req_guard); // Explicitly drop to decrement active_requests + result } }); @@ -221,7 +294,7 @@ impl HttpProxyService { // Pin on the heap — auto::UpgradeableConnection is !Unpin let mut conn = Box::pin(conn); - // Use select to support graceful shutdown via cancellation token + // Use select to support graceful shutdown, cancellation, and idle timeout tokio::select! { result = conn.as_mut() => { if let Err(e) = result { @@ -235,6 +308,37 @@ impl HttpProxyService { debug!("HTTP connection error during shutdown from {}: {}", peer_addr, e); } } + _ = async { + // Idle watchdog: check every 5s whether the connection has been idle + // (no active requests AND no activity for idle_timeout). + // This avoids killing long-running requests or upgraded connections. + let check_interval = std::time::Duration::from_secs(5); + let mut last_seen = 0u64; + loop { + tokio::time::sleep(check_interval).await; + + // Never close while a request is in progress + if active_requests.load(Ordering::Relaxed) > 0 { + last_seen = last_activity.load(Ordering::Relaxed); + continue; + } + + let current = last_activity.load(Ordering::Relaxed); + if current == last_seen { + // No new activity since last check + let elapsed_since_activity = start.elapsed().as_millis() as u64 - current; + if elapsed_since_activity >= idle_timeout.as_millis() as u64 { + return; + } + } + last_seen = current; + } + } => { + debug!("HTTP connection idle timeout ({}s) from {}", idle_timeout.as_secs(), peer_addr); + conn.as_mut().graceful_shutdown(); + // Give any in-flight work 5s to drain after graceful shutdown + let _ = tokio::time::timeout(std::time::Duration::from_secs(5), conn).await; + } } } @@ -1022,6 +1126,8 @@ impl HttpProxyService { let source_ip_owned = source_ip.to_string(); let upstream_selector = self.upstream_selector.clone(); let upstream_key_owned = upstream_key.to_string(); + let ws_inactivity_timeout = self.ws_inactivity_timeout; + let ws_max_lifetime = self.ws_max_lifetime; tokio::spawn(async move { let client_upgraded = match on_client_upgrade.await { @@ -1084,8 +1190,8 @@ impl HttpProxyService { let la_watch = Arc::clone(&last_activity); let c2u_handle = c2u.abort_handle(); let u2c_handle = u2c.abort_handle(); - let inactivity_timeout = DEFAULT_WS_INACTIVITY_TIMEOUT; - let max_lifetime = DEFAULT_WS_MAX_LIFETIME; + let inactivity_timeout = ws_inactivity_timeout; + let max_lifetime = ws_max_lifetime; let watchdog = tokio::spawn(async move { let check_interval = std::time::Duration::from_secs(5); @@ -1391,6 +1497,9 @@ impl Default for HttpProxyService { regex_cache: DashMap::new(), backend_tls_config: Self::default_backend_tls_config(), connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()), + http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT, + ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT, + ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME, } } } diff --git a/rust/crates/rustproxy-passthrough/src/tcp_listener.rs b/rust/crates/rustproxy-passthrough/src/tcp_listener.rs index fc916ae..3c792e4 100644 --- a/rust/crates/rustproxy-passthrough/src/tcp_listener.rs +++ b/rust/crates/rustproxy-passthrough/src/tcp_listener.rs @@ -174,6 +174,11 @@ impl TcpListenerManager { std::time::Duration::from_millis(conn_config.connection_timeout_ms), ); http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config()); + http_proxy_svc.set_connection_timeouts( + std::time::Duration::from_millis(conn_config.socket_timeout_ms), + std::time::Duration::from_millis(conn_config.socket_timeout_ms), + std::time::Duration::from_millis(conn_config.max_connection_lifetime_ms), + ); let http_proxy = Arc::new(http_proxy_svc); let conn_tracker = Arc::new(ConnectionTracker::new( conn_config.max_connections_per_ip, @@ -204,6 +209,11 @@ impl TcpListenerManager { std::time::Duration::from_millis(conn_config.connection_timeout_ms), ); http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config()); + http_proxy_svc.set_connection_timeouts( + std::time::Duration::from_millis(conn_config.socket_timeout_ms), + std::time::Duration::from_millis(conn_config.socket_timeout_ms), + std::time::Duration::from_millis(conn_config.max_connection_lifetime_ms), + ); let http_proxy = Arc::new(http_proxy_svc); let conn_tracker = Arc::new(ConnectionTracker::new( conn_config.max_connections_per_ip, @@ -232,6 +242,22 @@ impl TcpListenerManager { config.connection_rate_limit_per_minute, )); self.conn_semaphore = Arc::new(tokio::sync::Semaphore::new(config.max_connections as usize)); + + // Rebuild http_proxy with updated timeouts + let rm = self.route_manager.load_full(); + let mut http_proxy_svc = HttpProxyService::with_connect_timeout( + rm, + Arc::clone(&self.metrics), + std::time::Duration::from_millis(config.connection_timeout_ms), + ); + http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config()); + http_proxy_svc.set_connection_timeouts( + std::time::Duration::from_millis(config.socket_timeout_ms), + std::time::Duration::from_millis(config.socket_timeout_ms), + std::time::Duration::from_millis(config.max_connection_lifetime_ms), + ); + self.http_proxy = Arc::new(http_proxy_svc); + self.conn_config = Arc::new(config); } @@ -336,13 +362,15 @@ impl TcpListenerManager { for (port, handle) in self.listeners.drain() { let remaining = deadline.saturating_duration_since(tokio::time::Instant::now()); + let abort_handle = handle.abort_handle(); if remaining.is_zero() { - handle.abort(); + abort_handle.abort(); warn!("Force-stopped listener on port {} (timeout exceeded)", port); } else { match tokio::time::timeout(remaining, handle).await { Ok(_) => info!("Listener on port {} stopped gracefully", port), Err(_) => { + abort_handle.abort(); warn!("Listener on port {} did not stop in time, aborting", port); } } @@ -791,7 +819,8 @@ impl TcpListenerManager { stream, n, port, peer_addr, &route_match, domain.as_deref(), is_tls, &relay_socket_path, - &metrics, route_id, + Arc::clone(&metrics), route_id, + &conn_config, cancel.clone(), ).await; } else { debug!("Socket-handler route matched but no relay path configured"); @@ -964,7 +993,7 @@ impl TcpListenerManager { let (_bytes_in, _bytes_out) = Self::forward_bidirectional_split_with_timeouts( tls_read, tls_write, backend_read, backend_write, - inactivity_timeout, max_lifetime, + inactivity_timeout, max_lifetime, cancel.clone(), Some(forwarder::ForwardMetricsCtx { collector: Arc::clone(&metrics), route_id: route_id.map(|s| s.to_string()), @@ -1023,7 +1052,7 @@ impl TcpListenerManager { Self::handle_tls_reencrypt_tunnel( buf_stream, &target_host, target_port, peer_addr, Arc::clone(&metrics), route_id, - &conn_config, + &conn_config, cancel.clone(), ).await?; } Ok(()) @@ -1100,8 +1129,10 @@ impl TcpListenerManager { domain: Option<&str>, is_tls: bool, relay_path: &str, - metrics: &MetricsCollector, + metrics: Arc, route_id: Option<&str>, + conn_config: &ConnectionConfig, + cancel: CancellationToken, ) -> Result<(), Box> { use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::net::UnixStream; @@ -1141,27 +1172,34 @@ impl TcpListenerManager { // Forward initial data to the Unix socket unix_stream.write_all(&initial_buf).await?; - // Bidirectional relay between TCP client and Unix socket handler + // Bidirectional relay with inactivity timeout, max lifetime, and cancellation. + // Split both streams and use the same watchdog pattern as other forwarding paths. let initial_len = initial_buf.len() as u64; - match tokio::io::copy_bidirectional(&mut stream, &mut unix_stream).await { - Ok((c2s, s2c)) => { - // Include initial data bytes that were forwarded before copy_bidirectional - let total_in = c2s + initial_len; - debug!("Socket handler relay complete for {}: {} bytes in, {} bytes out", - route_key, total_in, s2c); - let ip = peer_addr.ip().to_string(); - metrics.record_bytes(total_in, s2c, route_id, Some(&ip)); - } - Err(e) => { - // Still record the initial data even on error - if initial_len > 0 { - let ip = peer_addr.ip().to_string(); - metrics.record_bytes(initial_len, 0, route_id, Some(&ip)); - } - debug!("Socket handler relay ended for {}: {}", route_key, e); - } + let inactivity_timeout = std::time::Duration::from_millis(conn_config.socket_timeout_ms); + let max_lifetime = std::time::Duration::from_millis(conn_config.max_connection_lifetime_ms); + + let (tcp_read, tcp_write) = stream.into_split(); + let (unix_read, unix_write) = unix_stream.into_split(); + + let ip_str = peer_addr.ip().to_string(); + let (_bytes_in, _bytes_out) = Self::forward_bidirectional_split_with_timeouts( + tcp_read, tcp_write, unix_read, unix_write, + inactivity_timeout, max_lifetime, cancel, + Some(forwarder::ForwardMetricsCtx { + collector: Arc::clone(&metrics), + route_id: route_id.map(|s| s.to_string()), + source_ip: Some(ip_str.clone()), + }), + ).await; + + // Include the initial data that was forwarded before the bidirectional relay + if initial_len > 0 { + metrics.record_bytes(initial_len, 0, route_id, Some(&ip_str)); } + debug!("Socket handler relay complete for {}: {} bytes in, {} bytes out", + route_key, _bytes_in + initial_len, _bytes_out); + Ok(()) } @@ -1176,6 +1214,7 @@ impl TcpListenerManager { metrics: Arc, route_id: Option<&str>, conn_config: &ConnectionConfig, + cancel: CancellationToken, ) -> Result<(), Box> { // Connect to backend over TLS with timeout let backend_tls = match tokio::time::timeout( @@ -1220,7 +1259,7 @@ impl TcpListenerManager { let (_bytes_in, _bytes_out) = Self::forward_bidirectional_split_with_timeouts( client_read, client_write, backend_read, backend_write, - inactivity_timeout, max_lifetime, + inactivity_timeout, max_lifetime, cancel, Some(forwarder::ForwardMetricsCtx { collector: metrics, route_id: route_id.map(|s| s.to_string()), @@ -1295,6 +1334,7 @@ impl TcpListenerManager { mut backend_write: W2, inactivity_timeout: std::time::Duration, max_lifetime: std::time::Duration, + cancel: CancellationToken, metrics: Option, ) -> (u64, u64) where @@ -1362,7 +1402,7 @@ impl TcpListenerManager { total }); - // Watchdog task: check for inactivity and max lifetime + // Watchdog task: check for inactivity, max lifetime, and cancellation let la_watch = Arc::clone(&last_activity); let c2b_handle = c2b.abort_handle(); let b2c_handle = b2c.abort_handle(); @@ -1370,29 +1410,37 @@ impl TcpListenerManager { let check_interval = std::time::Duration::from_secs(5); let mut last_seen = 0u64; loop { - tokio::time::sleep(check_interval).await; - - // Check max lifetime - if start.elapsed() >= max_lifetime { - debug!("Connection exceeded max lifetime, closing"); - c2b_handle.abort(); - b2c_handle.abort(); - break; - } - - // Check inactivity - let current = la_watch.load(Ordering::Relaxed); - if current == last_seen { - // No activity since last check - let elapsed_since_activity = start.elapsed().as_millis() as u64 - current; - if elapsed_since_activity >= inactivity_timeout.as_millis() as u64 { - debug!("Connection inactive for {}ms, closing", elapsed_since_activity); + tokio::select! { + _ = cancel.cancelled() => { + debug!("Split-stream connection cancelled by shutdown"); c2b_handle.abort(); b2c_handle.abort(); break; } + _ = tokio::time::sleep(check_interval) => { + // Check max lifetime + if start.elapsed() >= max_lifetime { + debug!("Connection exceeded max lifetime, closing"); + c2b_handle.abort(); + b2c_handle.abort(); + break; + } + + // Check inactivity + let current = la_watch.load(Ordering::Relaxed); + if current == last_seen { + // No activity since last check + let elapsed_since_activity = start.elapsed().as_millis() as u64 - current; + if elapsed_since_activity >= inactivity_timeout.as_millis() as u64 { + debug!("Connection inactive for {}ms, closing", elapsed_since_activity); + c2b_handle.abort(); + b2c_handle.abort(); + break; + } + } + last_seen = current; + } } - last_seen = current; } }); diff --git a/ts/proxies/smart-proxy/socket-handler-server.ts b/ts/proxies/smart-proxy/socket-handler-server.ts index be0bd9e..dc3378a 100644 --- a/ts/proxies/smart-proxy/socket-handler-server.ts +++ b/ts/proxies/smart-proxy/socket-handler-server.ts @@ -92,6 +92,16 @@ export class SocketHandlerServer { let metadataBuffer = ''; let metadataParsed = false; + // 10s timeout for metadata parsing phase — if Rust connects but never + // sends the JSON metadata line, don't hold the socket open indefinitely. + socket.setTimeout(10_000); + socket.on('timeout', () => { + if (!metadataParsed) { + logger.log('warn', 'Socket handler metadata timeout, closing', { component: 'socket-handler-server' }); + socket.destroy(); + } + }); + const onData = (chunk: Buffer) => { if (metadataParsed) return; @@ -108,6 +118,7 @@ export class SocketHandlerServer { } metadataParsed = true; + socket.setTimeout(0); // Clear metadata timeout socket.removeListener('data', onData); socket.pause(); // Prevent data loss between handler removal and pipe setup