fix(proxy): close connection buildup vectors in HTTP idle, WebSocket, socket relay, and TLS forwarding paths
- Add HTTP keep-alive idle timeout (60s default) with periodic watchdog that skips active requests (panic-safe via RAII ActiveRequestGuard) - Make WebSocket inactivity/max-lifetime timeouts configurable from ConnectionConfig instead of hardcoded 1h/24h - Replace bare copy_bidirectional in socket handler relay with timeout+cancel-aware split forwarding (inactivity, max lifetime, graceful shutdown) - Add CancellationToken to forward_bidirectional_split_with_timeouts so TLS-terminated TCP connections respond to graceful shutdown - Fix graceful_stop to actually abort listener tasks that exceed the shutdown deadline (previously they detached and ran forever) - Add 10s metadata parsing timeout on TS socket-handler-server to prevent stuck sockets
This commit is contained in:
@@ -174,6 +174,11 @@ impl TcpListenerManager {
|
||||
std::time::Duration::from_millis(conn_config.connection_timeout_ms),
|
||||
);
|
||||
http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config());
|
||||
http_proxy_svc.set_connection_timeouts(
|
||||
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
|
||||
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
|
||||
std::time::Duration::from_millis(conn_config.max_connection_lifetime_ms),
|
||||
);
|
||||
let http_proxy = Arc::new(http_proxy_svc);
|
||||
let conn_tracker = Arc::new(ConnectionTracker::new(
|
||||
conn_config.max_connections_per_ip,
|
||||
@@ -204,6 +209,11 @@ impl TcpListenerManager {
|
||||
std::time::Duration::from_millis(conn_config.connection_timeout_ms),
|
||||
);
|
||||
http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config());
|
||||
http_proxy_svc.set_connection_timeouts(
|
||||
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
|
||||
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
|
||||
std::time::Duration::from_millis(conn_config.max_connection_lifetime_ms),
|
||||
);
|
||||
let http_proxy = Arc::new(http_proxy_svc);
|
||||
let conn_tracker = Arc::new(ConnectionTracker::new(
|
||||
conn_config.max_connections_per_ip,
|
||||
@@ -232,6 +242,22 @@ impl TcpListenerManager {
|
||||
config.connection_rate_limit_per_minute,
|
||||
));
|
||||
self.conn_semaphore = Arc::new(tokio::sync::Semaphore::new(config.max_connections as usize));
|
||||
|
||||
// Rebuild http_proxy with updated timeouts
|
||||
let rm = self.route_manager.load_full();
|
||||
let mut http_proxy_svc = HttpProxyService::with_connect_timeout(
|
||||
rm,
|
||||
Arc::clone(&self.metrics),
|
||||
std::time::Duration::from_millis(config.connection_timeout_ms),
|
||||
);
|
||||
http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config());
|
||||
http_proxy_svc.set_connection_timeouts(
|
||||
std::time::Duration::from_millis(config.socket_timeout_ms),
|
||||
std::time::Duration::from_millis(config.socket_timeout_ms),
|
||||
std::time::Duration::from_millis(config.max_connection_lifetime_ms),
|
||||
);
|
||||
self.http_proxy = Arc::new(http_proxy_svc);
|
||||
|
||||
self.conn_config = Arc::new(config);
|
||||
}
|
||||
|
||||
@@ -336,13 +362,15 @@ impl TcpListenerManager {
|
||||
|
||||
for (port, handle) in self.listeners.drain() {
|
||||
let remaining = deadline.saturating_duration_since(tokio::time::Instant::now());
|
||||
let abort_handle = handle.abort_handle();
|
||||
if remaining.is_zero() {
|
||||
handle.abort();
|
||||
abort_handle.abort();
|
||||
warn!("Force-stopped listener on port {} (timeout exceeded)", port);
|
||||
} else {
|
||||
match tokio::time::timeout(remaining, handle).await {
|
||||
Ok(_) => info!("Listener on port {} stopped gracefully", port),
|
||||
Err(_) => {
|
||||
abort_handle.abort();
|
||||
warn!("Listener on port {} did not stop in time, aborting", port);
|
||||
}
|
||||
}
|
||||
@@ -791,7 +819,8 @@ impl TcpListenerManager {
|
||||
stream, n, port, peer_addr,
|
||||
&route_match, domain.as_deref(), is_tls,
|
||||
&relay_socket_path,
|
||||
&metrics, route_id,
|
||||
Arc::clone(&metrics), route_id,
|
||||
&conn_config, cancel.clone(),
|
||||
).await;
|
||||
} else {
|
||||
debug!("Socket-handler route matched but no relay path configured");
|
||||
@@ -964,7 +993,7 @@ impl TcpListenerManager {
|
||||
|
||||
let (_bytes_in, _bytes_out) = Self::forward_bidirectional_split_with_timeouts(
|
||||
tls_read, tls_write, backend_read, backend_write,
|
||||
inactivity_timeout, max_lifetime,
|
||||
inactivity_timeout, max_lifetime, cancel.clone(),
|
||||
Some(forwarder::ForwardMetricsCtx {
|
||||
collector: Arc::clone(&metrics),
|
||||
route_id: route_id.map(|s| s.to_string()),
|
||||
@@ -1023,7 +1052,7 @@ impl TcpListenerManager {
|
||||
Self::handle_tls_reencrypt_tunnel(
|
||||
buf_stream, &target_host, target_port,
|
||||
peer_addr, Arc::clone(&metrics), route_id,
|
||||
&conn_config,
|
||||
&conn_config, cancel.clone(),
|
||||
).await?;
|
||||
}
|
||||
Ok(())
|
||||
@@ -1100,8 +1129,10 @@ impl TcpListenerManager {
|
||||
domain: Option<&str>,
|
||||
is_tls: bool,
|
||||
relay_path: &str,
|
||||
metrics: &MetricsCollector,
|
||||
metrics: Arc<MetricsCollector>,
|
||||
route_id: Option<&str>,
|
||||
conn_config: &ConnectionConfig,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use tokio::net::UnixStream;
|
||||
@@ -1141,27 +1172,34 @@ impl TcpListenerManager {
|
||||
// Forward initial data to the Unix socket
|
||||
unix_stream.write_all(&initial_buf).await?;
|
||||
|
||||
// Bidirectional relay between TCP client and Unix socket handler
|
||||
// Bidirectional relay with inactivity timeout, max lifetime, and cancellation.
|
||||
// Split both streams and use the same watchdog pattern as other forwarding paths.
|
||||
let initial_len = initial_buf.len() as u64;
|
||||
match tokio::io::copy_bidirectional(&mut stream, &mut unix_stream).await {
|
||||
Ok((c2s, s2c)) => {
|
||||
// Include initial data bytes that were forwarded before copy_bidirectional
|
||||
let total_in = c2s + initial_len;
|
||||
debug!("Socket handler relay complete for {}: {} bytes in, {} bytes out",
|
||||
route_key, total_in, s2c);
|
||||
let ip = peer_addr.ip().to_string();
|
||||
metrics.record_bytes(total_in, s2c, route_id, Some(&ip));
|
||||
}
|
||||
Err(e) => {
|
||||
// Still record the initial data even on error
|
||||
if initial_len > 0 {
|
||||
let ip = peer_addr.ip().to_string();
|
||||
metrics.record_bytes(initial_len, 0, route_id, Some(&ip));
|
||||
}
|
||||
debug!("Socket handler relay ended for {}: {}", route_key, e);
|
||||
}
|
||||
let inactivity_timeout = std::time::Duration::from_millis(conn_config.socket_timeout_ms);
|
||||
let max_lifetime = std::time::Duration::from_millis(conn_config.max_connection_lifetime_ms);
|
||||
|
||||
let (tcp_read, tcp_write) = stream.into_split();
|
||||
let (unix_read, unix_write) = unix_stream.into_split();
|
||||
|
||||
let ip_str = peer_addr.ip().to_string();
|
||||
let (_bytes_in, _bytes_out) = Self::forward_bidirectional_split_with_timeouts(
|
||||
tcp_read, tcp_write, unix_read, unix_write,
|
||||
inactivity_timeout, max_lifetime, cancel,
|
||||
Some(forwarder::ForwardMetricsCtx {
|
||||
collector: Arc::clone(&metrics),
|
||||
route_id: route_id.map(|s| s.to_string()),
|
||||
source_ip: Some(ip_str.clone()),
|
||||
}),
|
||||
).await;
|
||||
|
||||
// Include the initial data that was forwarded before the bidirectional relay
|
||||
if initial_len > 0 {
|
||||
metrics.record_bytes(initial_len, 0, route_id, Some(&ip_str));
|
||||
}
|
||||
|
||||
debug!("Socket handler relay complete for {}: {} bytes in, {} bytes out",
|
||||
route_key, _bytes_in + initial_len, _bytes_out);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1176,6 +1214,7 @@ impl TcpListenerManager {
|
||||
metrics: Arc<MetricsCollector>,
|
||||
route_id: Option<&str>,
|
||||
conn_config: &ConnectionConfig,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
// Connect to backend over TLS with timeout
|
||||
let backend_tls = match tokio::time::timeout(
|
||||
@@ -1220,7 +1259,7 @@ impl TcpListenerManager {
|
||||
|
||||
let (_bytes_in, _bytes_out) = Self::forward_bidirectional_split_with_timeouts(
|
||||
client_read, client_write, backend_read, backend_write,
|
||||
inactivity_timeout, max_lifetime,
|
||||
inactivity_timeout, max_lifetime, cancel,
|
||||
Some(forwarder::ForwardMetricsCtx {
|
||||
collector: metrics,
|
||||
route_id: route_id.map(|s| s.to_string()),
|
||||
@@ -1295,6 +1334,7 @@ impl TcpListenerManager {
|
||||
mut backend_write: W2,
|
||||
inactivity_timeout: std::time::Duration,
|
||||
max_lifetime: std::time::Duration,
|
||||
cancel: CancellationToken,
|
||||
metrics: Option<forwarder::ForwardMetricsCtx>,
|
||||
) -> (u64, u64)
|
||||
where
|
||||
@@ -1362,7 +1402,7 @@ impl TcpListenerManager {
|
||||
total
|
||||
});
|
||||
|
||||
// Watchdog task: check for inactivity and max lifetime
|
||||
// Watchdog task: check for inactivity, max lifetime, and cancellation
|
||||
let la_watch = Arc::clone(&last_activity);
|
||||
let c2b_handle = c2b.abort_handle();
|
||||
let b2c_handle = b2c.abort_handle();
|
||||
@@ -1370,29 +1410,37 @@ impl TcpListenerManager {
|
||||
let check_interval = std::time::Duration::from_secs(5);
|
||||
let mut last_seen = 0u64;
|
||||
loop {
|
||||
tokio::time::sleep(check_interval).await;
|
||||
|
||||
// Check max lifetime
|
||||
if start.elapsed() >= max_lifetime {
|
||||
debug!("Connection exceeded max lifetime, closing");
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
|
||||
// Check inactivity
|
||||
let current = la_watch.load(Ordering::Relaxed);
|
||||
if current == last_seen {
|
||||
// No activity since last check
|
||||
let elapsed_since_activity = start.elapsed().as_millis() as u64 - current;
|
||||
if elapsed_since_activity >= inactivity_timeout.as_millis() as u64 {
|
||||
debug!("Connection inactive for {}ms, closing", elapsed_since_activity);
|
||||
tokio::select! {
|
||||
_ = cancel.cancelled() => {
|
||||
debug!("Split-stream connection cancelled by shutdown");
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
_ = tokio::time::sleep(check_interval) => {
|
||||
// Check max lifetime
|
||||
if start.elapsed() >= max_lifetime {
|
||||
debug!("Connection exceeded max lifetime, closing");
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
|
||||
// Check inactivity
|
||||
let current = la_watch.load(Ordering::Relaxed);
|
||||
if current == last_seen {
|
||||
// No activity since last check
|
||||
let elapsed_since_activity = start.elapsed().as_millis() as u64 - current;
|
||||
if elapsed_since_activity >= inactivity_timeout.as_millis() as u64 {
|
||||
debug!("Connection inactive for {}ms, closing", elapsed_since_activity);
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
last_seen = current;
|
||||
}
|
||||
}
|
||||
last_seen = current;
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user