From 5271447264d7abda89e1a58534b122cd6d17ebd1 Mon Sep 17 00:00:00 2001 From: Juergen Kunz Date: Wed, 11 Mar 2026 11:28:57 +0000 Subject: [PATCH] fix(rustproxy-http): Evict stale HTTP/2 pooled senders and retry bodyless requests with fresh backend connections to avoid 502s --- changelog.md | 10 + .../rustproxy-http/src/connection_pool.rs | 18 +- .../rustproxy-http/src/proxy_service.rs | 232 +++++++++++++++++- ts/00_commitinfo_data.ts | 2 +- 4 files changed, 247 insertions(+), 15 deletions(-) diff --git a/changelog.md b/changelog.md index 8888416..a8252f3 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,15 @@ # Changelog +## 2026-03-11 - 25.9.3 - fix(rustproxy-http) +Evict stale HTTP/2 pooled senders and retry bodyless requests with fresh backend connections to avoid 502s + +- Introduce MAX_H2_AGE (120s) and evict HTTP/2 senders older than this or closed +- Check MAX_H2_AGE on checkout and during background eviction to prevent reuse of stale h2 connections +- Add connection_pool.remove_h2() to explicitly remove dead H2 senders from the pool +- When a pooled H2 request returns a 502 and the original request had an empty body, retry using a fresh H2 connection (retry_h2_with_fresh_connection) +- On H2 auto-detect failures, retry as HTTP/1.1 for bodyless requests via forward_h1_empty_body; return 502 for requests with bodies +- Evict dead H2 senders on backend request failures in reconnect_backend so subsequent attempts create fresh connections + ## 2026-03-08 - 25.9.2 - fix(protocol-cache) Include requested_host in protocol detection cache key to avoid cache oscillation when multiple frontend domains share the same backend diff --git a/rust/crates/rustproxy-http/src/connection_pool.rs b/rust/crates/rustproxy-http/src/connection_pool.rs index 98980e6..0f466ba 100644 --- a/rust/crates/rustproxy-http/src/connection_pool.rs +++ b/rust/crates/rustproxy-http/src/connection_pool.rs @@ -18,6 +18,9 @@ const MAX_IDLE_PER_KEY: usize = 16; const IDLE_TIMEOUT: Duration = Duration::from_secs(90); /// Background eviction interval. const EVICTION_INTERVAL: Duration = Duration::from_secs(30); +/// Maximum age for pooled HTTP/2 connections before proactive eviction. +/// Prevents staleness from backends that close idle connections (e.g. nginx GOAWAY). +const MAX_H2_AGE: Duration = Duration::from_secs(120); /// Identifies a unique backend endpoint. #[derive(Clone, Debug, Hash, Eq, PartialEq)] @@ -37,7 +40,6 @@ struct IdleH1 { /// A pooled HTTP/2 sender (multiplexed, Clone-able). struct PooledH2 { sender: http2::SendRequest>, - #[allow(dead_code)] // Reserved for future age-based eviction created_at: Instant, } @@ -116,8 +118,8 @@ impl ConnectionPool { let entry = self.h2_pool.get(key)?; let pooled = entry.value(); - // Check if the h2 connection is still alive - if pooled.sender.is_closed() { + // Check if the h2 connection is still alive and not too old + if pooled.sender.is_closed() || pooled.created_at.elapsed() >= MAX_H2_AGE { drop(entry); self.h2_pool.remove(key); return None; @@ -130,6 +132,12 @@ impl ConnectionPool { None } + /// Remove a dead HTTP/2 sender from the pool. + /// Called when `send_request` fails to prevent subsequent requests from reusing the stale sender. + pub fn remove_h2(&self, key: &PoolKey) { + self.h2_pool.remove(key); + } + /// Register an HTTP/2 sender in the pool. Since h2 is multiplexed, /// only one sender per key is stored (it's Clone-able). pub fn register_h2(&self, key: PoolKey, sender: http2::SendRequest>) { @@ -165,10 +173,10 @@ impl ConnectionPool { h1_pool.remove(&key); } - // Evict dead H2 connections + // Evict dead or aged-out H2 connections let mut dead_h2 = Vec::new(); for entry in h2_pool.iter() { - if entry.value().sender.is_closed() { + if entry.value().sender.is_closed() || entry.value().created_at.elapsed() >= MAX_H2_AGE { dead_h2.push(entry.key().clone()); } } diff --git a/rust/crates/rustproxy-http/src/proxy_service.rs b/rust/crates/rustproxy-http/src/proxy_service.rs index e633cc3..2cc1035 100644 --- a/rust/crates/rustproxy-http/src/proxy_service.rs +++ b/rust/crates/rustproxy-http/src/proxy_service.rs @@ -11,6 +11,7 @@ use std::sync::atomic::{AtomicU64, Ordering}; use arc_swap::ArcSwap; use bytes::Bytes; use dashmap::DashMap; +use http_body::Body as HttpBody; use http_body_util::{BodyExt, Full, combinators::BoxBody}; use hyper::body::Incoming; use hyper::{Request, Response, StatusCode}; @@ -872,10 +873,12 @@ impl HttpProxyService { // Register for multiplexed reuse self.connection_pool.register_h2(pool_key.clone(), sender.clone()); - self.forward_h2_with_sender(sender, parts, body, upstream_headers, upstream_path, route, route_id, source_ip).await + self.forward_h2_with_sender(sender, parts, body, upstream_headers, upstream_path, route, route_id, source_ip, Some(pool_key)).await } /// Forward request using an existing (pooled) HTTP/2 sender. + /// If the pooled sender is stale (GOAWAY, connection closed), evicts it and retries + /// with a fresh connection for bodyless requests (GET/HEAD/DELETE). async fn forward_h2_pooled( &self, sender: hyper::client::conn::http2::SendRequest>, @@ -886,9 +889,129 @@ impl HttpProxyService { route: &rustproxy_config::RouteConfig, route_id: Option<&str>, source_ip: &str, - _pool_key: &crate::connection_pool::PoolKey, + pool_key: &crate::connection_pool::PoolKey, ) -> Result>, hyper::Error> { - self.forward_h2_with_sender(sender, parts, body, upstream_headers, upstream_path, route, route_id, source_ip).await + // Save retry state for bodyless requests (cheap: Method is an enum, HeaderMap clones Arc-backed Bytes) + let retry_state = if body.is_end_stream() { + Some((parts.method.clone(), upstream_headers.clone())) + } else { + None + }; + + let result = self.forward_h2_with_sender( + sender, parts, body, upstream_headers, upstream_path, + route, route_id, source_ip, Some(pool_key), + ).await; + + // If the request failed (502) and we can retry with an empty body, do so + let is_502 = matches!(&result, Ok(resp) if resp.status() == StatusCode::BAD_GATEWAY); + if is_502 { + if let Some((method, headers)) = retry_state { + warn!("Stale pooled H2 sender for {}:{}, retrying with fresh connection", + pool_key.host, pool_key.port); + return self.retry_h2_with_fresh_connection( + method, headers, upstream_path, + pool_key, route, route_id, source_ip, + ).await; + } + } + result + } + + /// Retry an H2 request with a fresh backend connection and empty body. + /// Used when a pooled sender was stale (GOAWAY/closed) and the original body was empty. + async fn retry_h2_with_fresh_connection( + &self, + method: hyper::Method, + upstream_headers: hyper::HeaderMap, + upstream_path: &str, + pool_key: &crate::connection_pool::PoolKey, + route: &rustproxy_config::RouteConfig, + route_id: Option<&str>, + source_ip: &str, + ) -> Result>, hyper::Error> { + // Establish fresh backend connection + let backend = if pool_key.use_tls { + match tokio::time::timeout( + self.connect_timeout, + connect_tls_backend(&self.backend_tls_config, &pool_key.host, pool_key.port), + ).await { + Ok(Ok(tls)) => BackendStream::Tls(tls), + Ok(Err(e)) => { + error!("H2 retry: TLS connect failed for {}:{}: {}", pool_key.host, pool_key.port, e); + return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend unavailable on H2 retry")); + } + Err(_) => { + error!("H2 retry: TLS connect timeout for {}:{}", pool_key.host, pool_key.port); + return Ok(error_response(StatusCode::GATEWAY_TIMEOUT, "Backend timeout on H2 retry")); + } + } + } else { + match tokio::time::timeout( + self.connect_timeout, + TcpStream::connect(format!("{}:{}", pool_key.host, pool_key.port)), + ).await { + Ok(Ok(s)) => { + s.set_nodelay(true).ok(); + BackendStream::Plain(s) + } + Ok(Err(e)) => { + error!("H2 retry: connect failed for {}:{}: {}", pool_key.host, pool_key.port, e); + return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend unavailable on H2 retry")); + } + Err(_) => { + error!("H2 retry: connect timeout for {}:{}", pool_key.host, pool_key.port); + return Ok(error_response(StatusCode::GATEWAY_TIMEOUT, "Backend timeout on H2 retry")); + } + } + }; + + let io = TokioIo::new(backend); + let exec = hyper_util::rt::TokioExecutor::new(); + let (mut sender, conn): ( + hyper::client::conn::http2::SendRequest>, + hyper::client::conn::http2::Connection, BoxBody, hyper_util::rt::TokioExecutor>, + ) = match hyper::client::conn::http2::handshake(exec, io).await { + Ok(h) => h, + Err(e) => { + error!("H2 retry: handshake failed for {}:{}: {}", pool_key.host, pool_key.port, e); + return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H2 retry handshake failed")); + } + }; + + tokio::spawn(async move { + if let Err(e) = conn.await { + debug!("H2 retry: upstream connection error: {}", e); + } + }); + + // Register fresh sender in pool for future requests + self.connection_pool.register_h2(pool_key.clone(), sender.clone()); + + // Build request with empty body + let mut upstream_req = Request::builder() + .method(method) + .uri(upstream_path); + + if let Some(headers) = upstream_req.headers_mut() { + *headers = upstream_headers; + } + + let empty_body: BoxBody = BoxBody::new( + http_body_util::Empty::new().map_err(|never| match never {}) + ); + let upstream_req = upstream_req.body(empty_body).unwrap(); + + match sender.send_request(upstream_req).await { + Ok(resp) => { + self.build_streaming_response(resp, route, route_id, source_ip).await + } + Err(e) => { + error!("H2 retry: request failed for {}:{}: {}", pool_key.host, pool_key.port, e); + self.connection_pool.remove_h2(pool_key); + Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H2 request failed on retry")) + } + } } /// Forward via HTTP/2 with fallback to HTTP/1.1 (auto-detect mode). @@ -896,8 +1019,8 @@ impl HttpProxyService { /// Handles two failure scenarios: /// 1. H2 handshake fails → reconnects and falls back to H1 (body not consumed yet). /// 2. H2 handshake "succeeds" but request fails (backend advertises h2 via ALPN but - /// doesn't actually speak h2) → updates cache to H1. The request body is consumed - /// so this request fails, but all subsequent requests will correctly use H1. + /// doesn't actually speak h2) → updates cache to H1, retries as H1 for bodyless + /// requests, or returns 502 for requests with bodies. async fn forward_h2_with_fallback( &self, io: TokioIo, @@ -926,6 +1049,13 @@ impl HttpProxyService { } }); + // Save retry state before consuming parts/body (for bodyless requests like GET) + let retry_state = if body.is_end_stream() { + Some((parts.method.clone(), upstream_headers.clone())) + } else { + None + }; + // Build and send the h2 request inline (don't register in pool yet — // we need to verify the request actually succeeds first, because some // backends advertise h2 via ALPN but don't speak the h2 binary protocol). @@ -956,10 +1086,8 @@ impl HttpProxyService { Err(e) => { // H2 request failed — backend advertises h2 via ALPN but doesn't // actually speak it. Update cache so future requests use H1. - // The request body is consumed so this request can't be retried, - // but all subsequent requests will correctly use H1. warn!( - "Auto-detect: H2 request failed for {}:{}, updating cache to H1: {}", + "Auto-detect: H2 request failed for {}:{}, falling back to H1: {}", upstream.host, upstream.port, e ); let cache_key = crate::protocol_cache::ProtocolCacheKey { @@ -968,7 +1096,30 @@ impl HttpProxyService { requested_host: requested_host.clone(), }; self.protocol_cache.insert(cache_key, crate::protocol_cache::DetectedProtocol::H1); - Ok(error_response(StatusCode::BAD_GATEWAY, "Backend protocol mismatch, retrying with H1")) + + // Retry as H1 for bodyless requests; return 502 for requests with bodies + if let Some((method, headers)) = retry_state { + match self.reconnect_backend(upstream).await { + Some(fallback_backend) => { + let h1_pool_key = crate::connection_pool::PoolKey { + host: upstream.host.clone(), + port: upstream.port, + use_tls: upstream.use_tls, + h2: false, + }; + let fallback_io = TokioIo::new(fallback_backend); + self.forward_h1_empty_body( + fallback_io, method, headers, upstream_path, + route, route_id, source_ip, &h1_pool_key, + ).await + } + None => { + Ok(error_response(StatusCode::BAD_GATEWAY, "Backend unavailable after H2 fallback")) + } + } + } else { + Ok(error_response(StatusCode::BAD_GATEWAY, "Backend protocol mismatch")) + } } } } @@ -1011,6 +1162,64 @@ impl HttpProxyService { } } + /// Forward a request with an empty body via HTTP/1.1. + /// Used when retrying after a failed H2 attempt where the original body was consumed. + async fn forward_h1_empty_body( + &self, + io: TokioIo, + method: hyper::Method, + upstream_headers: hyper::HeaderMap, + upstream_path: &str, + route: &rustproxy_config::RouteConfig, + route_id: Option<&str>, + source_ip: &str, + pool_key: &crate::connection_pool::PoolKey, + ) -> Result>, hyper::Error> { + let (mut sender, conn): ( + hyper::client::conn::http1::SendRequest>, + hyper::client::conn::http1::Connection, BoxBody>, + ) = match hyper::client::conn::http1::handshake(io).await { + Ok(h) => h, + Err(e) => { + error!("H1 fallback: handshake failed: {}", e); + return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H1 fallback handshake failed")); + } + }; + + tokio::spawn(async move { + if let Err(e) = conn.await { + debug!("H1 fallback: upstream connection error: {}", e); + } + }); + + let mut upstream_req = Request::builder() + .method(method) + .uri(upstream_path) + .version(hyper::Version::HTTP_11); + + if let Some(headers) = upstream_req.headers_mut() { + *headers = upstream_headers; + } + + let empty_body: BoxBody = BoxBody::new( + http_body_util::Empty::new().map_err(|never| match never {}) + ); + let upstream_req = upstream_req.body(empty_body).unwrap(); + + let upstream_response = match sender.send_request(upstream_req).await { + Ok(resp) => resp, + Err(e) => { + error!("H1 fallback: request failed: {}", e); + return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H1 fallback request failed")); + } + }; + + // Return sender to pool for keep-alive reuse + self.connection_pool.checkin_h1(pool_key.clone(), sender); + + self.build_streaming_response(upstream_response, route, route_id, source_ip).await + } + /// Reconnect to a backend (used for H2→H1 fallback). async fn reconnect_backend( &self, @@ -1066,6 +1275,7 @@ impl HttpProxyService { route: &rustproxy_config::RouteConfig, route_id: Option<&str>, source_ip: &str, + pool_key: Option<&crate::connection_pool::PoolKey>, ) -> Result>, hyper::Error> { let mut upstream_req = Request::builder() .method(parts.method) @@ -1091,6 +1301,10 @@ impl HttpProxyService { Ok(resp) => resp, Err(e) => { error!("HTTP/2 upstream request failed: {}", e); + // Evict the dead sender so subsequent requests get fresh connections + if let Some(key) = pool_key { + self.connection_pool.remove_h2(key); + } return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H2 request failed")); } }; diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts index 4bb252c..f77a18d 100644 --- a/ts/00_commitinfo_data.ts +++ b/ts/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: '@push.rocks/smartproxy', - version: '25.9.2', + version: '25.9.3', description: 'A powerful proxy package with unified route-based configuration for high traffic management. Features include SSL/TLS support, flexible routing patterns, WebSocket handling, advanced security options, and automatic ACME certificate management.' }