fix(proxy): improve proxy robustness: add connect timeouts, graceful shutdown, WebSocket watchdog, and metrics guard

This commit is contained in:
2026-02-13 16:57:46 +00:00
parent 07e464fdac
commit a8f8946a4d
6 changed files with 199 additions and 39 deletions

View File

@@ -15,6 +15,38 @@ use crate::forwarder;
use crate::tls_handler;
use crate::connection_tracker::ConnectionTracker;
/// RAII guard that decrements the active connection metric on drop.
/// Ensures connection_closed is called on ALL exit paths — normal, error, or panic.
struct ConnectionGuard {
metrics: Arc<MetricsCollector>,
route_id: Option<String>,
disarmed: bool,
}
impl ConnectionGuard {
fn new(metrics: Arc<MetricsCollector>, route_id: Option<&str>) -> Self {
Self {
metrics,
route_id: route_id.map(|s| s.to_string()),
disarmed: false,
}
}
/// Disarm the guard — prevents the Drop from running.
/// Use when handing off to a path that manages its own cleanup (e.g., HTTP proxy).
fn disarm(mut self) {
self.disarmed = true;
}
}
impl Drop for ConnectionGuard {
fn drop(&mut self) {
if !self.disarmed {
self.metrics.connection_closed(self.route_id.as_deref());
}
}
}
#[derive(Debug, Error)]
pub enum ListenerError {
#[error("Failed to bind port {port}: {source}")]
@@ -105,11 +137,12 @@ pub struct TcpListenerManager {
impl TcpListenerManager {
pub fn new(route_manager: Arc<RouteManager>) -> Self {
let metrics = Arc::new(MetricsCollector::new());
let http_proxy = Arc::new(HttpProxyService::new(
let conn_config = ConnectionConfig::default();
let http_proxy = Arc::new(HttpProxyService::with_connect_timeout(
Arc::clone(&route_manager),
Arc::clone(&metrics),
std::time::Duration::from_millis(conn_config.connection_timeout_ms),
));
let conn_config = ConnectionConfig::default();
let conn_tracker = Arc::new(ConnectionTracker::new(
conn_config.max_connections_per_ip,
conn_config.connection_rate_limit_per_minute,
@@ -129,11 +162,12 @@ impl TcpListenerManager {
/// Create with a metrics collector.
pub fn with_metrics(route_manager: Arc<RouteManager>, metrics: Arc<MetricsCollector>) -> Self {
let http_proxy = Arc::new(HttpProxyService::new(
let conn_config = ConnectionConfig::default();
let http_proxy = Arc::new(HttpProxyService::with_connect_timeout(
Arc::clone(&route_manager),
Arc::clone(&metrics),
std::time::Duration::from_millis(conn_config.connection_timeout_ms),
));
let conn_config = ConnectionConfig::default();
let conn_tracker = Arc::new(ConnectionTracker::new(
conn_config.max_connections_per_ip,
conn_config.connection_rate_limit_per_minute,
@@ -427,6 +461,7 @@ impl TcpListenerManager {
}
metrics.connection_opened(route_id);
let _fast_guard = ConnectionGuard::new(Arc::clone(&metrics), route_id);
let connect_timeout = std::time::Duration::from_millis(conn_config.connection_timeout_ms);
let inactivity_timeout = std::time::Duration::from_millis(conn_config.socket_timeout_ms);
@@ -442,14 +477,8 @@ impl TcpListenerManager {
tokio::net::TcpStream::connect(format!("{}:{}", target_host, target_port)),
).await {
Ok(Ok(s)) => s,
Ok(Err(e)) => {
metrics.connection_closed(route_id);
return Err(e.into());
}
Err(_) => {
metrics.connection_closed(route_id);
return Err("Backend connection timeout".into());
}
Ok(Err(e)) => return Err(e.into()),
Err(_) => return Err("Backend connection timeout".into()),
};
backend.set_nodelay(true)?;
@@ -480,7 +509,6 @@ impl TcpListenerManager {
metrics.record_bytes(bytes_in, bytes_out, route_id);
}
metrics.connection_closed(route_id);
return Ok(());
}
}
@@ -617,8 +645,9 @@ impl TcpListenerManager {
}
}
// Track connection in metrics
// Track connection in metrics — guard ensures connection_closed on all exit paths
metrics.connection_opened(route_id);
let _conn_guard = ConnectionGuard::new(Arc::clone(&metrics), route_id);
// Check if this is a socket-handler route that should be relayed to TypeScript
if route_match.route.action.action_type == RouteActionType::SocketHandler {
@@ -628,16 +657,13 @@ impl TcpListenerManager {
};
if let Some(relay_socket_path) = relay_path {
let result = Self::relay_to_socket_handler(
return Self::relay_to_socket_handler(
stream, n, port, peer_addr,
&route_match, domain.as_deref(), is_tls,
&relay_socket_path,
).await;
metrics.connection_closed(route_id);
return result;
} else {
debug!("Socket-handler route matched but no relay path configured");
metrics.connection_closed(route_id);
return Ok(());
}
}
@@ -646,7 +672,6 @@ impl TcpListenerManager {
Some(t) => t,
None => {
debug!("Route matched but no target available");
metrics.connection_closed(route_id);
return Ok(());
}
};
@@ -765,7 +790,9 @@ impl TcpListenerManager {
"TLS Terminate + HTTP: {} -> {}:{} (domain: {:?})",
peer_addr, target_host, target_port, domain
);
http_proxy.handle_io(buf_stream, peer_addr, port).await;
// HTTP proxy manages its own per-request metrics — disarm TCP-level guard
_conn_guard.disarm();
http_proxy.handle_io(buf_stream, peer_addr, port, cancel.clone()).await;
} else {
debug!(
"TLS Terminate + TCP: {} -> {}:{} (domain: {:?})",
@@ -805,7 +832,9 @@ impl TcpListenerManager {
if is_http {
// Plain HTTP - use HTTP proxy for request-level routing
debug!("HTTP proxy: {} on port {}", peer_addr, port);
http_proxy.handle_connection(stream, peer_addr, port).await;
// HTTP proxy manages its own per-request metrics — disarm TCP-level guard
_conn_guard.disarm();
http_proxy.handle_connection(stream, peer_addr, port, cancel.clone()).await;
Ok(())
} else {
// Plain TCP forwarding (non-HTTP)
@@ -843,7 +872,7 @@ impl TcpListenerManager {
}
};
metrics.connection_closed(route_id);
// ConnectionGuard handles metrics.connection_closed() on drop
result
}