fix(proxy): close connection buildup vectors in HTTP idle, WebSocket, socket relay, and TLS forwarding paths
- Add HTTP keep-alive idle timeout (60s default) with periodic watchdog that skips active requests (panic-safe via RAII ActiveRequestGuard) - Make WebSocket inactivity/max-lifetime timeouts configurable from ConnectionConfig instead of hardcoded 1h/24h - Replace bare copy_bidirectional in socket handler relay with timeout+cancel-aware split forwarding (inactivity, max lifetime, graceful shutdown) - Add CancellationToken to forward_bidirectional_split_with_timeouts so TLS-terminated TCP connections respond to graceful shutdown - Fix graceful_stop to actually abort listener tasks that exceed the shutdown deadline (previously they detached and ran forever) - Add 10s metadata parsing timeout on TS socket-handler-server to prevent stuck sockets
This commit is contained in:
@@ -34,12 +34,35 @@ use crate::upstream_selector::UpstreamSelector;
|
||||
/// Default upstream connect timeout (30 seconds).
|
||||
const DEFAULT_CONNECT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30);
|
||||
|
||||
/// Default HTTP keep-alive idle timeout (60 seconds).
|
||||
/// If no new request arrives within this duration, the connection is closed.
|
||||
const DEFAULT_HTTP_IDLE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60);
|
||||
|
||||
/// Default WebSocket inactivity timeout (1 hour).
|
||||
const DEFAULT_WS_INACTIVITY_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(3600);
|
||||
|
||||
/// Default WebSocket max lifetime (24 hours).
|
||||
const DEFAULT_WS_MAX_LIFETIME: std::time::Duration = std::time::Duration::from_secs(86400);
|
||||
|
||||
/// RAII guard that decrements the active request counter on drop.
|
||||
/// Ensures the counter is correct even if the request handler panics.
|
||||
struct ActiveRequestGuard {
|
||||
counter: Arc<AtomicU64>,
|
||||
}
|
||||
|
||||
impl ActiveRequestGuard {
|
||||
fn new(counter: Arc<AtomicU64>) -> Self {
|
||||
counter.fetch_add(1, Ordering::Relaxed);
|
||||
Self { counter }
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ActiveRequestGuard {
|
||||
fn drop(&mut self) {
|
||||
self.counter.fetch_sub(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
/// Backend stream that can be either plain TCP or TLS-wrapped.
|
||||
/// Used for `terminate-and-reencrypt` mode where the backend requires TLS.
|
||||
pub(crate) enum BackendStream {
|
||||
@@ -125,6 +148,12 @@ pub struct HttpProxyService {
|
||||
backend_tls_config: Arc<rustls::ClientConfig>,
|
||||
/// Backend connection pool for reusing keep-alive connections.
|
||||
connection_pool: Arc<crate::connection_pool::ConnectionPool>,
|
||||
/// HTTP keep-alive idle timeout: close connection if no new request arrives within this duration.
|
||||
http_idle_timeout: std::time::Duration,
|
||||
/// WebSocket inactivity timeout (no data in either direction).
|
||||
ws_inactivity_timeout: std::time::Duration,
|
||||
/// WebSocket maximum connection lifetime.
|
||||
ws_max_lifetime: std::time::Duration,
|
||||
}
|
||||
|
||||
impl HttpProxyService {
|
||||
@@ -139,6 +168,9 @@ impl HttpProxyService {
|
||||
regex_cache: DashMap::new(),
|
||||
backend_tls_config: Self::default_backend_tls_config(),
|
||||
connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()),
|
||||
http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT,
|
||||
ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT,
|
||||
ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -158,9 +190,25 @@ impl HttpProxyService {
|
||||
regex_cache: DashMap::new(),
|
||||
backend_tls_config: Self::default_backend_tls_config(),
|
||||
connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()),
|
||||
http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT,
|
||||
ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT,
|
||||
ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the HTTP keep-alive idle timeout, WebSocket inactivity timeout, and
|
||||
/// WebSocket max lifetime from connection config values.
|
||||
pub fn set_connection_timeouts(
|
||||
&mut self,
|
||||
http_idle_timeout: std::time::Duration,
|
||||
ws_inactivity_timeout: std::time::Duration,
|
||||
ws_max_lifetime: std::time::Duration,
|
||||
) {
|
||||
self.http_idle_timeout = http_idle_timeout;
|
||||
self.ws_inactivity_timeout = ws_inactivity_timeout;
|
||||
self.ws_max_lifetime = ws_max_lifetime;
|
||||
}
|
||||
|
||||
/// Set the shared backend TLS config (enables session resumption).
|
||||
/// Call this after construction to inject the shared config from tls_handler.
|
||||
pub fn set_backend_tls_config(&mut self, config: Arc<rustls::ClientConfig>) {
|
||||
@@ -192,6 +240,10 @@ impl HttpProxyService {
|
||||
/// based on ALPN negotiation (TLS) or connection preface (h2c).
|
||||
/// Supports HTTP/1.1 upgrades (WebSocket) and HTTP/2 CONNECT.
|
||||
/// Responds to graceful shutdown via the cancel token.
|
||||
///
|
||||
/// An idle watchdog closes the connection if no new HTTP request arrives
|
||||
/// within `http_idle_timeout` (default 60s). This prevents keep-alive
|
||||
/// connections from accumulating indefinitely.
|
||||
pub async fn handle_io<I>(
|
||||
self: Arc<Self>,
|
||||
stream: I,
|
||||
@@ -204,13 +256,34 @@ impl HttpProxyService {
|
||||
{
|
||||
let io = TokioIo::new(stream);
|
||||
|
||||
// Capture timeouts before `self` is moved into the service closure.
|
||||
let idle_timeout = self.http_idle_timeout;
|
||||
|
||||
// Activity tracker: updated at the START and END of each request.
|
||||
// The idle watchdog checks this to determine if the connection is idle
|
||||
// (no request in progress and none started recently).
|
||||
let last_activity = Arc::new(AtomicU64::new(0));
|
||||
let active_requests = Arc::new(AtomicU64::new(0));
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let la_inner = Arc::clone(&last_activity);
|
||||
let ar_inner = Arc::clone(&active_requests);
|
||||
let cancel_inner = cancel.clone();
|
||||
let service = hyper::service::service_fn(move |req: Request<Incoming>| {
|
||||
// Mark request start — RAII guard decrements on drop (panic-safe)
|
||||
la_inner.store(start.elapsed().as_millis() as u64, Ordering::Relaxed);
|
||||
let req_guard = ActiveRequestGuard::new(Arc::clone(&ar_inner));
|
||||
let svc = Arc::clone(&self);
|
||||
let peer = peer_addr;
|
||||
let cn = cancel_inner.clone();
|
||||
let la = Arc::clone(&la_inner);
|
||||
let st = start;
|
||||
async move {
|
||||
svc.handle_request(req, peer, port, cn).await
|
||||
let result = svc.handle_request(req, peer, port, cn).await;
|
||||
// Mark request end — update activity timestamp before guard drops
|
||||
la.store(st.elapsed().as_millis() as u64, Ordering::Relaxed);
|
||||
drop(req_guard); // Explicitly drop to decrement active_requests
|
||||
result
|
||||
}
|
||||
});
|
||||
|
||||
@@ -221,7 +294,7 @@ impl HttpProxyService {
|
||||
// Pin on the heap — auto::UpgradeableConnection is !Unpin
|
||||
let mut conn = Box::pin(conn);
|
||||
|
||||
// Use select to support graceful shutdown via cancellation token
|
||||
// Use select to support graceful shutdown, cancellation, and idle timeout
|
||||
tokio::select! {
|
||||
result = conn.as_mut() => {
|
||||
if let Err(e) = result {
|
||||
@@ -235,6 +308,37 @@ impl HttpProxyService {
|
||||
debug!("HTTP connection error during shutdown from {}: {}", peer_addr, e);
|
||||
}
|
||||
}
|
||||
_ = async {
|
||||
// Idle watchdog: check every 5s whether the connection has been idle
|
||||
// (no active requests AND no activity for idle_timeout).
|
||||
// This avoids killing long-running requests or upgraded connections.
|
||||
let check_interval = std::time::Duration::from_secs(5);
|
||||
let mut last_seen = 0u64;
|
||||
loop {
|
||||
tokio::time::sleep(check_interval).await;
|
||||
|
||||
// Never close while a request is in progress
|
||||
if active_requests.load(Ordering::Relaxed) > 0 {
|
||||
last_seen = last_activity.load(Ordering::Relaxed);
|
||||
continue;
|
||||
}
|
||||
|
||||
let current = last_activity.load(Ordering::Relaxed);
|
||||
if current == last_seen {
|
||||
// No new activity since last check
|
||||
let elapsed_since_activity = start.elapsed().as_millis() as u64 - current;
|
||||
if elapsed_since_activity >= idle_timeout.as_millis() as u64 {
|
||||
return;
|
||||
}
|
||||
}
|
||||
last_seen = current;
|
||||
}
|
||||
} => {
|
||||
debug!("HTTP connection idle timeout ({}s) from {}", idle_timeout.as_secs(), peer_addr);
|
||||
conn.as_mut().graceful_shutdown();
|
||||
// Give any in-flight work 5s to drain after graceful shutdown
|
||||
let _ = tokio::time::timeout(std::time::Duration::from_secs(5), conn).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1022,6 +1126,8 @@ impl HttpProxyService {
|
||||
let source_ip_owned = source_ip.to_string();
|
||||
let upstream_selector = self.upstream_selector.clone();
|
||||
let upstream_key_owned = upstream_key.to_string();
|
||||
let ws_inactivity_timeout = self.ws_inactivity_timeout;
|
||||
let ws_max_lifetime = self.ws_max_lifetime;
|
||||
|
||||
tokio::spawn(async move {
|
||||
let client_upgraded = match on_client_upgrade.await {
|
||||
@@ -1084,8 +1190,8 @@ impl HttpProxyService {
|
||||
let la_watch = Arc::clone(&last_activity);
|
||||
let c2u_handle = c2u.abort_handle();
|
||||
let u2c_handle = u2c.abort_handle();
|
||||
let inactivity_timeout = DEFAULT_WS_INACTIVITY_TIMEOUT;
|
||||
let max_lifetime = DEFAULT_WS_MAX_LIFETIME;
|
||||
let inactivity_timeout = ws_inactivity_timeout;
|
||||
let max_lifetime = ws_max_lifetime;
|
||||
|
||||
let watchdog = tokio::spawn(async move {
|
||||
let check_interval = std::time::Duration::from_secs(5);
|
||||
@@ -1391,6 +1497,9 @@ impl Default for HttpProxyService {
|
||||
regex_cache: DashMap::new(),
|
||||
backend_tls_config: Self::default_backend_tls_config(),
|
||||
connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()),
|
||||
http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT,
|
||||
ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT,
|
||||
ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user