fix(metrics): correct frontend and backend protocol connection tracking across h1, h2, h3, and websocket traffic
This commit is contained in:
@@ -18,7 +18,7 @@ use tracing::{debug, warn};
|
||||
use rustproxy_config::RouteConfig;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::proxy_service::{ConnActivity, HttpProxyService};
|
||||
use crate::proxy_service::{ConnActivity, HttpProxyService, ProtocolGuard};
|
||||
|
||||
/// HTTP/3 proxy service.
|
||||
///
|
||||
@@ -48,6 +48,9 @@ impl H3ProxyService {
|
||||
let remote_addr = real_client_addr.unwrap_or_else(|| connection.remote_address());
|
||||
debug!("HTTP/3 connection from {} on port {}", remote_addr, port);
|
||||
|
||||
// Track frontend H3 connection for the QUIC connection's lifetime.
|
||||
let _frontend_h3_guard = ProtocolGuard::frontend(Arc::clone(self.http_proxy.metrics()), "h3");
|
||||
|
||||
let mut h3_conn: h3::server::Connection<h3_quinn::Connection, Bytes> =
|
||||
h3::server::builder()
|
||||
.send_grease(false)
|
||||
|
||||
@@ -140,6 +140,38 @@ impl Drop for ProtocolGuard {
|
||||
}
|
||||
}
|
||||
|
||||
/// Connection-level frontend protocol tracker.
|
||||
///
|
||||
/// In `handle_io`, the HTTP protocol (h1 vs h2) is unknown until the first request
|
||||
/// arrives. This struct uses `OnceLock` so the first request detects the protocol
|
||||
/// and opens the counter; subsequent requests on the same connection are no-ops.
|
||||
/// On Drop (when the connection ends), the counter is closed.
|
||||
pub(crate) struct FrontendProtocolTracker {
|
||||
metrics: Arc<MetricsCollector>,
|
||||
proto: std::sync::OnceLock<&'static str>,
|
||||
}
|
||||
|
||||
impl FrontendProtocolTracker {
|
||||
fn new(metrics: Arc<MetricsCollector>) -> Self {
|
||||
Self { metrics, proto: std::sync::OnceLock::new() }
|
||||
}
|
||||
|
||||
/// Set the frontend protocol. Only the first call opens the counter.
|
||||
fn set(&self, proto: &'static str) {
|
||||
if self.proto.set(proto).is_ok() {
|
||||
self.metrics.frontend_protocol_opened(proto);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for FrontendProtocolTracker {
|
||||
fn drop(&mut self) {
|
||||
if let Some(proto) = self.proto.get() {
|
||||
self.metrics.frontend_protocol_closed(proto);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Backend stream that can be either plain TCP or TLS-wrapped.
|
||||
/// Used for `terminate-and-reencrypt` mode where the backend requires TLS.
|
||||
pub(crate) enum BackendStream {
|
||||
@@ -365,6 +397,11 @@ impl HttpProxyService {
|
||||
self.protocol_cache.snapshot()
|
||||
}
|
||||
|
||||
/// Access the shared metrics collector (used by H3ProxyService for protocol tracking).
|
||||
pub fn metrics(&self) -> &Arc<MetricsCollector> {
|
||||
&self.metrics
|
||||
}
|
||||
|
||||
/// Handle an incoming HTTP connection on a plain TCP stream.
|
||||
pub async fn handle_connection(
|
||||
self: Arc<Self>,
|
||||
@@ -409,10 +446,24 @@ impl HttpProxyService {
|
||||
let active_requests = Arc::new(AtomicU64::new(0));
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// Connection-level frontend protocol tracker: the first request detects
|
||||
// h1 vs h2 from req.version() and opens the counter. On connection close
|
||||
// (when handle_io returns), Drop closes the counter.
|
||||
let frontend_tracker = Arc::new(FrontendProtocolTracker::new(Arc::clone(&self.metrics)));
|
||||
let ft_inner = Arc::clone(&frontend_tracker);
|
||||
|
||||
let la_inner = Arc::clone(&last_activity);
|
||||
let ar_inner = Arc::clone(&active_requests);
|
||||
let cancel_inner = cancel.clone();
|
||||
let service = hyper::service::service_fn(move |req: Request<Incoming>| {
|
||||
// Detect frontend protocol from the first request on this connection.
|
||||
// OnceLock ensures only the first call opens the counter.
|
||||
let proto: &'static str = match req.version() {
|
||||
hyper::Version::HTTP_2 => "h2",
|
||||
_ => "h1",
|
||||
};
|
||||
ft_inner.set(proto);
|
||||
|
||||
// Mark request start — RAII guard decrements on drop (panic-safe)
|
||||
la_inner.store(start.elapsed().as_millis() as u64, Ordering::Relaxed);
|
||||
let req_guard = ActiveRequestGuard::new(Arc::clone(&ar_inner));
|
||||
@@ -655,17 +706,8 @@ impl HttpProxyService {
|
||||
.map(|p| p.as_str().eq_ignore_ascii_case("websocket"))
|
||||
.unwrap_or(false);
|
||||
|
||||
// Track frontend protocol for distribution metrics (h1/h2/h3/ws)
|
||||
let frontend_proto: &'static str = if is_h1_websocket || is_h2_websocket {
|
||||
"ws"
|
||||
} else {
|
||||
match req.version() {
|
||||
hyper::Version::HTTP_2 => "h2",
|
||||
hyper::Version::HTTP_3 => "h3",
|
||||
_ => "h1", // HTTP/1.0, HTTP/1.1
|
||||
}
|
||||
};
|
||||
let _frontend_proto_guard = ProtocolGuard::frontend(Arc::clone(&self.metrics), frontend_proto);
|
||||
// Frontend protocol is tracked at the connection level (handle_io / h3_service).
|
||||
// WebSocket tunnels additionally get their own "ws" guards in the spawned task.
|
||||
|
||||
if is_h1_websocket || is_h2_websocket {
|
||||
let result = self.handle_websocket_upgrade(
|
||||
@@ -1275,13 +1317,18 @@ impl HttpProxyService {
|
||||
}
|
||||
};
|
||||
|
||||
tokio::spawn(async move {
|
||||
match tokio::time::timeout(std::time::Duration::from_secs(300), conn).await {
|
||||
Ok(Err(e)) => debug!("Upstream connection error: {}", e),
|
||||
Err(_) => debug!("H1 connection driver timed out after 300s"),
|
||||
_ => {}
|
||||
}
|
||||
});
|
||||
{
|
||||
let driver_metrics = Arc::clone(&self.metrics);
|
||||
tokio::spawn(async move {
|
||||
// Track backend H1 connection for the driver's lifetime
|
||||
let _proto_guard = ProtocolGuard::backend(driver_metrics, "h1");
|
||||
match tokio::time::timeout(std::time::Duration::from_secs(300), conn).await {
|
||||
Ok(Err(e)) => debug!("Upstream connection error: {}", e),
|
||||
Err(_) => debug!("H1 connection driver timed out after 300s"),
|
||||
_ => {}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
self.forward_h1_with_sender(sender, parts, body, upstream_headers, upstream_path, route, route_id, source_ip, domain, conn_activity, backend_key).await
|
||||
}
|
||||
@@ -1402,7 +1449,10 @@ impl HttpProxyService {
|
||||
let pool = Arc::clone(&self.connection_pool);
|
||||
let key = pool_key.clone();
|
||||
let gen = Arc::clone(&gen_holder);
|
||||
let driver_metrics = Arc::clone(&self.metrics);
|
||||
tokio::spawn(async move {
|
||||
// Track backend H2 connection for the driver's lifetime
|
||||
let _proto_guard = ProtocolGuard::backend(driver_metrics, "h2");
|
||||
if let Err(e) = conn.await {
|
||||
warn!("HTTP/2 upstream connection error: {} ({:?})", e, e);
|
||||
}
|
||||
@@ -1701,7 +1751,10 @@ impl HttpProxyService {
|
||||
let pool = Arc::clone(&self.connection_pool);
|
||||
let key = pool_key.clone();
|
||||
let gen = Arc::clone(&gen_holder);
|
||||
let driver_metrics = Arc::clone(&self.metrics);
|
||||
tokio::spawn(async move {
|
||||
// Track backend H2 connection for the driver's lifetime
|
||||
let _proto_guard = ProtocolGuard::backend(driver_metrics, "h2");
|
||||
if let Err(e) = conn.await {
|
||||
warn!("HTTP/2 upstream connection error: {} ({:?})", e, e);
|
||||
}
|
||||
@@ -1871,13 +1924,18 @@ impl HttpProxyService {
|
||||
}
|
||||
};
|
||||
|
||||
tokio::spawn(async move {
|
||||
match tokio::time::timeout(std::time::Duration::from_secs(300), conn).await {
|
||||
Ok(Err(e)) => debug!("H1 fallback: upstream connection error: {}", e),
|
||||
Err(_) => debug!("H1 fallback: connection driver timed out after 300s"),
|
||||
_ => {}
|
||||
}
|
||||
});
|
||||
{
|
||||
let driver_metrics = Arc::clone(&self.metrics);
|
||||
tokio::spawn(async move {
|
||||
// Track backend H1 connection for the driver's lifetime
|
||||
let _proto_guard = ProtocolGuard::backend(driver_metrics, "h1");
|
||||
match tokio::time::timeout(std::time::Duration::from_secs(300), conn).await {
|
||||
Ok(Err(e)) => debug!("H1 fallback: upstream connection error: {}", e),
|
||||
Err(_) => debug!("H1 fallback: connection driver timed out after 300s"),
|
||||
_ => {}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
let mut upstream_req = Request::builder()
|
||||
.method(method)
|
||||
@@ -2425,7 +2483,10 @@ impl HttpProxyService {
|
||||
selector: upstream_selector,
|
||||
key: upstream_key_owned.clone(),
|
||||
};
|
||||
// Track backend WebSocket connection — guard decrements on tunnel close
|
||||
// Track WebSocket tunnel as "ws" on both frontend and backend.
|
||||
// Frontend h1/h2 is tracked at the connection level (handle_io); this
|
||||
// additional "ws" guard captures the tunnel's lifetime independently.
|
||||
let _frontend_ws_guard = ProtocolGuard::frontend(Arc::clone(&metrics), "ws");
|
||||
let _backend_ws_guard = ProtocolGuard::backend(Arc::clone(&metrics), "ws");
|
||||
|
||||
let client_upgraded = match on_client_upgrade.await {
|
||||
@@ -2889,7 +2950,10 @@ impl HttpProxyService {
|
||||
let driver_pool = Arc::clone(&self.connection_pool);
|
||||
let driver_pool_key = pool_key.clone();
|
||||
let driver_gen = Arc::clone(&gen_holder);
|
||||
let driver_metrics = Arc::clone(&self.metrics);
|
||||
tokio::spawn(async move {
|
||||
// Track backend H3 connection for the driver's lifetime
|
||||
let _proto_guard = ProtocolGuard::backend(driver_metrics, "h3");
|
||||
let close_err = std::future::poll_fn(|cx| driver.poll_close(cx)).await;
|
||||
debug!("H3 connection driver closed: {:?}", close_err);
|
||||
let g = driver_gen.load(std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
@@ -506,13 +506,14 @@ impl MetricsCollector {
|
||||
total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Record a frontend request/connection closed with a given protocol.
|
||||
/// Record a frontend connection closed with a given protocol.
|
||||
pub fn frontend_protocol_closed(&self, proto: &str) {
|
||||
let (active, _) = self.frontend_proto_counters(proto);
|
||||
let val = active.load(Ordering::Relaxed);
|
||||
if val > 0 {
|
||||
active.fetch_sub(1, Ordering::Relaxed);
|
||||
}
|
||||
// Atomic saturating decrement — avoids TOCTOU race where concurrent
|
||||
// closes could both read val=1, both subtract, wrapping to u64::MAX.
|
||||
active.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |v| {
|
||||
if v > 0 { Some(v - 1) } else { None }
|
||||
}).ok();
|
||||
}
|
||||
|
||||
/// Record a backend connection opened with a given protocol.
|
||||
@@ -525,10 +526,10 @@ impl MetricsCollector {
|
||||
/// Record a backend connection closed with a given protocol.
|
||||
pub fn backend_protocol_closed(&self, proto: &str) {
|
||||
let (active, _) = self.backend_proto_counters(proto);
|
||||
let val = active.load(Ordering::Relaxed);
|
||||
if val > 0 {
|
||||
active.fetch_sub(1, Ordering::Relaxed);
|
||||
}
|
||||
// Atomic saturating decrement — see frontend_protocol_closed for rationale.
|
||||
active.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |v| {
|
||||
if v > 0 { Some(v - 1) } else { None }
|
||||
}).ok();
|
||||
}
|
||||
|
||||
// ── Per-backend recording methods ──
|
||||
|
||||
Reference in New Issue
Block a user