fix(proxy): close connection buildup vectors in HTTP idle, WebSocket, socket relay, and TLS forwarding paths

- Add HTTP keep-alive idle timeout (60s default) with periodic watchdog that
  skips active requests (panic-safe via RAII ActiveRequestGuard)
- Make WebSocket inactivity/max-lifetime timeouts configurable from ConnectionConfig
  instead of hardcoded 1h/24h
- Replace bare copy_bidirectional in socket handler relay with timeout+cancel-aware
  split forwarding (inactivity, max lifetime, graceful shutdown)
- Add CancellationToken to forward_bidirectional_split_with_timeouts so TLS-terminated
  TCP connections respond to graceful shutdown
- Fix graceful_stop to actually abort listener tasks that exceed the shutdown deadline
  (previously they detached and ran forever)
- Add 10s metadata parsing timeout on TS socket-handler-server to prevent stuck sockets
This commit is contained in:
2026-02-26 21:29:19 +00:00
parent ef060d5e79
commit 8db621657f
3 changed files with 215 additions and 47 deletions

View File

@@ -34,12 +34,35 @@ use crate::upstream_selector::UpstreamSelector;
/// Default upstream connect timeout (30 seconds).
const DEFAULT_CONNECT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30);
/// Default HTTP keep-alive idle timeout (60 seconds).
/// If no new request arrives within this duration, the connection is closed.
const DEFAULT_HTTP_IDLE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60);
/// Default WebSocket inactivity timeout (1 hour).
const DEFAULT_WS_INACTIVITY_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(3600);
/// Default WebSocket max lifetime (24 hours).
const DEFAULT_WS_MAX_LIFETIME: std::time::Duration = std::time::Duration::from_secs(86400);
/// RAII guard that decrements the active request counter on drop.
/// Ensures the counter is correct even if the request handler panics.
struct ActiveRequestGuard {
counter: Arc<AtomicU64>,
}
impl ActiveRequestGuard {
fn new(counter: Arc<AtomicU64>) -> Self {
counter.fetch_add(1, Ordering::Relaxed);
Self { counter }
}
}
impl Drop for ActiveRequestGuard {
fn drop(&mut self) {
self.counter.fetch_sub(1, Ordering::Relaxed);
}
}
/// Backend stream that can be either plain TCP or TLS-wrapped.
/// Used for `terminate-and-reencrypt` mode where the backend requires TLS.
pub(crate) enum BackendStream {
@@ -125,6 +148,12 @@ pub struct HttpProxyService {
backend_tls_config: Arc<rustls::ClientConfig>,
/// Backend connection pool for reusing keep-alive connections.
connection_pool: Arc<crate::connection_pool::ConnectionPool>,
/// HTTP keep-alive idle timeout: close connection if no new request arrives within this duration.
http_idle_timeout: std::time::Duration,
/// WebSocket inactivity timeout (no data in either direction).
ws_inactivity_timeout: std::time::Duration,
/// WebSocket maximum connection lifetime.
ws_max_lifetime: std::time::Duration,
}
impl HttpProxyService {
@@ -139,6 +168,9 @@ impl HttpProxyService {
regex_cache: DashMap::new(),
backend_tls_config: Self::default_backend_tls_config(),
connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()),
http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT,
ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT,
ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME,
}
}
@@ -158,9 +190,25 @@ impl HttpProxyService {
regex_cache: DashMap::new(),
backend_tls_config: Self::default_backend_tls_config(),
connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()),
http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT,
ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT,
ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME,
}
}
/// Set the HTTP keep-alive idle timeout, WebSocket inactivity timeout, and
/// WebSocket max lifetime from connection config values.
pub fn set_connection_timeouts(
&mut self,
http_idle_timeout: std::time::Duration,
ws_inactivity_timeout: std::time::Duration,
ws_max_lifetime: std::time::Duration,
) {
self.http_idle_timeout = http_idle_timeout;
self.ws_inactivity_timeout = ws_inactivity_timeout;
self.ws_max_lifetime = ws_max_lifetime;
}
/// Set the shared backend TLS config (enables session resumption).
/// Call this after construction to inject the shared config from tls_handler.
pub fn set_backend_tls_config(&mut self, config: Arc<rustls::ClientConfig>) {
@@ -192,6 +240,10 @@ impl HttpProxyService {
/// based on ALPN negotiation (TLS) or connection preface (h2c).
/// Supports HTTP/1.1 upgrades (WebSocket) and HTTP/2 CONNECT.
/// Responds to graceful shutdown via the cancel token.
///
/// An idle watchdog closes the connection if no new HTTP request arrives
/// within `http_idle_timeout` (default 60s). This prevents keep-alive
/// connections from accumulating indefinitely.
pub async fn handle_io<I>(
self: Arc<Self>,
stream: I,
@@ -204,13 +256,34 @@ impl HttpProxyService {
{
let io = TokioIo::new(stream);
// Capture timeouts before `self` is moved into the service closure.
let idle_timeout = self.http_idle_timeout;
// Activity tracker: updated at the START and END of each request.
// The idle watchdog checks this to determine if the connection is idle
// (no request in progress and none started recently).
let last_activity = Arc::new(AtomicU64::new(0));
let active_requests = Arc::new(AtomicU64::new(0));
let start = std::time::Instant::now();
let la_inner = Arc::clone(&last_activity);
let ar_inner = Arc::clone(&active_requests);
let cancel_inner = cancel.clone();
let service = hyper::service::service_fn(move |req: Request<Incoming>| {
// Mark request start — RAII guard decrements on drop (panic-safe)
la_inner.store(start.elapsed().as_millis() as u64, Ordering::Relaxed);
let req_guard = ActiveRequestGuard::new(Arc::clone(&ar_inner));
let svc = Arc::clone(&self);
let peer = peer_addr;
let cn = cancel_inner.clone();
let la = Arc::clone(&la_inner);
let st = start;
async move {
svc.handle_request(req, peer, port, cn).await
let result = svc.handle_request(req, peer, port, cn).await;
// Mark request end — update activity timestamp before guard drops
la.store(st.elapsed().as_millis() as u64, Ordering::Relaxed);
drop(req_guard); // Explicitly drop to decrement active_requests
result
}
});
@@ -221,7 +294,7 @@ impl HttpProxyService {
// Pin on the heap — auto::UpgradeableConnection is !Unpin
let mut conn = Box::pin(conn);
// Use select to support graceful shutdown via cancellation token
// Use select to support graceful shutdown, cancellation, and idle timeout
tokio::select! {
result = conn.as_mut() => {
if let Err(e) = result {
@@ -235,6 +308,37 @@ impl HttpProxyService {
debug!("HTTP connection error during shutdown from {}: {}", peer_addr, e);
}
}
_ = async {
// Idle watchdog: check every 5s whether the connection has been idle
// (no active requests AND no activity for idle_timeout).
// This avoids killing long-running requests or upgraded connections.
let check_interval = std::time::Duration::from_secs(5);
let mut last_seen = 0u64;
loop {
tokio::time::sleep(check_interval).await;
// Never close while a request is in progress
if active_requests.load(Ordering::Relaxed) > 0 {
last_seen = last_activity.load(Ordering::Relaxed);
continue;
}
let current = last_activity.load(Ordering::Relaxed);
if current == last_seen {
// No new activity since last check
let elapsed_since_activity = start.elapsed().as_millis() as u64 - current;
if elapsed_since_activity >= idle_timeout.as_millis() as u64 {
return;
}
}
last_seen = current;
}
} => {
debug!("HTTP connection idle timeout ({}s) from {}", idle_timeout.as_secs(), peer_addr);
conn.as_mut().graceful_shutdown();
// Give any in-flight work 5s to drain after graceful shutdown
let _ = tokio::time::timeout(std::time::Duration::from_secs(5), conn).await;
}
}
}
@@ -1022,6 +1126,8 @@ impl HttpProxyService {
let source_ip_owned = source_ip.to_string();
let upstream_selector = self.upstream_selector.clone();
let upstream_key_owned = upstream_key.to_string();
let ws_inactivity_timeout = self.ws_inactivity_timeout;
let ws_max_lifetime = self.ws_max_lifetime;
tokio::spawn(async move {
let client_upgraded = match on_client_upgrade.await {
@@ -1084,8 +1190,8 @@ impl HttpProxyService {
let la_watch = Arc::clone(&last_activity);
let c2u_handle = c2u.abort_handle();
let u2c_handle = u2c.abort_handle();
let inactivity_timeout = DEFAULT_WS_INACTIVITY_TIMEOUT;
let max_lifetime = DEFAULT_WS_MAX_LIFETIME;
let inactivity_timeout = ws_inactivity_timeout;
let max_lifetime = ws_max_lifetime;
let watchdog = tokio::spawn(async move {
let check_interval = std::time::Duration::from_secs(5);
@@ -1391,6 +1497,9 @@ impl Default for HttpProxyService {
regex_cache: DashMap::new(),
backend_tls_config: Self::default_backend_tls_config(),
connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()),
http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT,
ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT,
ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME,
}
}
}