fix(proxy): close connection buildup vectors in HTTP idle, WebSocket, socket relay, and TLS forwarding paths

- Add HTTP keep-alive idle timeout (60s default) with periodic watchdog that
  skips active requests (panic-safe via RAII ActiveRequestGuard)
- Make WebSocket inactivity/max-lifetime timeouts configurable from ConnectionConfig
  instead of hardcoded 1h/24h
- Replace bare copy_bidirectional in socket handler relay with timeout+cancel-aware
  split forwarding (inactivity, max lifetime, graceful shutdown)
- Add CancellationToken to forward_bidirectional_split_with_timeouts so TLS-terminated
  TCP connections respond to graceful shutdown
- Fix graceful_stop to actually abort listener tasks that exceed the shutdown deadline
  (previously they detached and ran forever)
- Add 10s metadata parsing timeout on TS socket-handler-server to prevent stuck sockets
This commit is contained in:
2026-02-26 21:29:19 +00:00
parent ef060d5e79
commit 8db621657f
3 changed files with 215 additions and 47 deletions

View File

@@ -174,6 +174,11 @@ impl TcpListenerManager {
std::time::Duration::from_millis(conn_config.connection_timeout_ms),
);
http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config());
http_proxy_svc.set_connection_timeouts(
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
std::time::Duration::from_millis(conn_config.max_connection_lifetime_ms),
);
let http_proxy = Arc::new(http_proxy_svc);
let conn_tracker = Arc::new(ConnectionTracker::new(
conn_config.max_connections_per_ip,
@@ -204,6 +209,11 @@ impl TcpListenerManager {
std::time::Duration::from_millis(conn_config.connection_timeout_ms),
);
http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config());
http_proxy_svc.set_connection_timeouts(
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
std::time::Duration::from_millis(conn_config.max_connection_lifetime_ms),
);
let http_proxy = Arc::new(http_proxy_svc);
let conn_tracker = Arc::new(ConnectionTracker::new(
conn_config.max_connections_per_ip,
@@ -232,6 +242,22 @@ impl TcpListenerManager {
config.connection_rate_limit_per_minute,
));
self.conn_semaphore = Arc::new(tokio::sync::Semaphore::new(config.max_connections as usize));
// Rebuild http_proxy with updated timeouts
let rm = self.route_manager.load_full();
let mut http_proxy_svc = HttpProxyService::with_connect_timeout(
rm,
Arc::clone(&self.metrics),
std::time::Duration::from_millis(config.connection_timeout_ms),
);
http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config());
http_proxy_svc.set_connection_timeouts(
std::time::Duration::from_millis(config.socket_timeout_ms),
std::time::Duration::from_millis(config.socket_timeout_ms),
std::time::Duration::from_millis(config.max_connection_lifetime_ms),
);
self.http_proxy = Arc::new(http_proxy_svc);
self.conn_config = Arc::new(config);
}
@@ -336,13 +362,15 @@ impl TcpListenerManager {
for (port, handle) in self.listeners.drain() {
let remaining = deadline.saturating_duration_since(tokio::time::Instant::now());
let abort_handle = handle.abort_handle();
if remaining.is_zero() {
handle.abort();
abort_handle.abort();
warn!("Force-stopped listener on port {} (timeout exceeded)", port);
} else {
match tokio::time::timeout(remaining, handle).await {
Ok(_) => info!("Listener on port {} stopped gracefully", port),
Err(_) => {
abort_handle.abort();
warn!("Listener on port {} did not stop in time, aborting", port);
}
}
@@ -791,7 +819,8 @@ impl TcpListenerManager {
stream, n, port, peer_addr,
&route_match, domain.as_deref(), is_tls,
&relay_socket_path,
&metrics, route_id,
Arc::clone(&metrics), route_id,
&conn_config, cancel.clone(),
).await;
} else {
debug!("Socket-handler route matched but no relay path configured");
@@ -964,7 +993,7 @@ impl TcpListenerManager {
let (_bytes_in, _bytes_out) = Self::forward_bidirectional_split_with_timeouts(
tls_read, tls_write, backend_read, backend_write,
inactivity_timeout, max_lifetime,
inactivity_timeout, max_lifetime, cancel.clone(),
Some(forwarder::ForwardMetricsCtx {
collector: Arc::clone(&metrics),
route_id: route_id.map(|s| s.to_string()),
@@ -1023,7 +1052,7 @@ impl TcpListenerManager {
Self::handle_tls_reencrypt_tunnel(
buf_stream, &target_host, target_port,
peer_addr, Arc::clone(&metrics), route_id,
&conn_config,
&conn_config, cancel.clone(),
).await?;
}
Ok(())
@@ -1100,8 +1129,10 @@ impl TcpListenerManager {
domain: Option<&str>,
is_tls: bool,
relay_path: &str,
metrics: &MetricsCollector,
metrics: Arc<MetricsCollector>,
route_id: Option<&str>,
conn_config: &ConnectionConfig,
cancel: CancellationToken,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::net::UnixStream;
@@ -1141,27 +1172,34 @@ impl TcpListenerManager {
// Forward initial data to the Unix socket
unix_stream.write_all(&initial_buf).await?;
// Bidirectional relay between TCP client and Unix socket handler
// Bidirectional relay with inactivity timeout, max lifetime, and cancellation.
// Split both streams and use the same watchdog pattern as other forwarding paths.
let initial_len = initial_buf.len() as u64;
match tokio::io::copy_bidirectional(&mut stream, &mut unix_stream).await {
Ok((c2s, s2c)) => {
// Include initial data bytes that were forwarded before copy_bidirectional
let total_in = c2s + initial_len;
debug!("Socket handler relay complete for {}: {} bytes in, {} bytes out",
route_key, total_in, s2c);
let ip = peer_addr.ip().to_string();
metrics.record_bytes(total_in, s2c, route_id, Some(&ip));
}
Err(e) => {
// Still record the initial data even on error
if initial_len > 0 {
let ip = peer_addr.ip().to_string();
metrics.record_bytes(initial_len, 0, route_id, Some(&ip));
}
debug!("Socket handler relay ended for {}: {}", route_key, e);
}
let inactivity_timeout = std::time::Duration::from_millis(conn_config.socket_timeout_ms);
let max_lifetime = std::time::Duration::from_millis(conn_config.max_connection_lifetime_ms);
let (tcp_read, tcp_write) = stream.into_split();
let (unix_read, unix_write) = unix_stream.into_split();
let ip_str = peer_addr.ip().to_string();
let (_bytes_in, _bytes_out) = Self::forward_bidirectional_split_with_timeouts(
tcp_read, tcp_write, unix_read, unix_write,
inactivity_timeout, max_lifetime, cancel,
Some(forwarder::ForwardMetricsCtx {
collector: Arc::clone(&metrics),
route_id: route_id.map(|s| s.to_string()),
source_ip: Some(ip_str.clone()),
}),
).await;
// Include the initial data that was forwarded before the bidirectional relay
if initial_len > 0 {
metrics.record_bytes(initial_len, 0, route_id, Some(&ip_str));
}
debug!("Socket handler relay complete for {}: {} bytes in, {} bytes out",
route_key, _bytes_in + initial_len, _bytes_out);
Ok(())
}
@@ -1176,6 +1214,7 @@ impl TcpListenerManager {
metrics: Arc<MetricsCollector>,
route_id: Option<&str>,
conn_config: &ConnectionConfig,
cancel: CancellationToken,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
// Connect to backend over TLS with timeout
let backend_tls = match tokio::time::timeout(
@@ -1220,7 +1259,7 @@ impl TcpListenerManager {
let (_bytes_in, _bytes_out) = Self::forward_bidirectional_split_with_timeouts(
client_read, client_write, backend_read, backend_write,
inactivity_timeout, max_lifetime,
inactivity_timeout, max_lifetime, cancel,
Some(forwarder::ForwardMetricsCtx {
collector: metrics,
route_id: route_id.map(|s| s.to_string()),
@@ -1295,6 +1334,7 @@ impl TcpListenerManager {
mut backend_write: W2,
inactivity_timeout: std::time::Duration,
max_lifetime: std::time::Duration,
cancel: CancellationToken,
metrics: Option<forwarder::ForwardMetricsCtx>,
) -> (u64, u64)
where
@@ -1362,7 +1402,7 @@ impl TcpListenerManager {
total
});
// Watchdog task: check for inactivity and max lifetime
// Watchdog task: check for inactivity, max lifetime, and cancellation
let la_watch = Arc::clone(&last_activity);
let c2b_handle = c2b.abort_handle();
let b2c_handle = b2c.abort_handle();
@@ -1370,29 +1410,37 @@ impl TcpListenerManager {
let check_interval = std::time::Duration::from_secs(5);
let mut last_seen = 0u64;
loop {
tokio::time::sleep(check_interval).await;
// Check max lifetime
if start.elapsed() >= max_lifetime {
debug!("Connection exceeded max lifetime, closing");
c2b_handle.abort();
b2c_handle.abort();
break;
}
// Check inactivity
let current = la_watch.load(Ordering::Relaxed);
if current == last_seen {
// No activity since last check
let elapsed_since_activity = start.elapsed().as_millis() as u64 - current;
if elapsed_since_activity >= inactivity_timeout.as_millis() as u64 {
debug!("Connection inactive for {}ms, closing", elapsed_since_activity);
tokio::select! {
_ = cancel.cancelled() => {
debug!("Split-stream connection cancelled by shutdown");
c2b_handle.abort();
b2c_handle.abort();
break;
}
_ = tokio::time::sleep(check_interval) => {
// Check max lifetime
if start.elapsed() >= max_lifetime {
debug!("Connection exceeded max lifetime, closing");
c2b_handle.abort();
b2c_handle.abort();
break;
}
// Check inactivity
let current = la_watch.load(Ordering::Relaxed);
if current == last_seen {
// No activity since last check
let elapsed_since_activity = start.elapsed().as_millis() as u64 - current;
if elapsed_since_activity >= inactivity_timeout.as_millis() as u64 {
debug!("Connection inactive for {}ms, closing", elapsed_since_activity);
c2b_handle.abort();
b2c_handle.abort();
break;
}
}
last_seen = current;
}
}
last_seen = current;
}
});