fix(rustproxy-http): Evict stale HTTP/2 pooled senders and retry bodyless requests with fresh backend connections to avoid 502s

This commit is contained in:
2026-03-11 11:28:57 +00:00
parent be9898805f
commit 5271447264
4 changed files with 247 additions and 15 deletions

View File

@@ -1,5 +1,15 @@
# Changelog
## 2026-03-11 - 25.9.3 - fix(rustproxy-http)
Evict stale HTTP/2 pooled senders and retry bodyless requests with fresh backend connections to avoid 502s
- Introduce MAX_H2_AGE (120s) and evict HTTP/2 senders older than this or closed
- Check MAX_H2_AGE on checkout and during background eviction to prevent reuse of stale h2 connections
- Add connection_pool.remove_h2() to explicitly remove dead H2 senders from the pool
- When a pooled H2 request returns a 502 and the original request had an empty body, retry using a fresh H2 connection (retry_h2_with_fresh_connection)
- On H2 auto-detect failures, retry as HTTP/1.1 for bodyless requests via forward_h1_empty_body; return 502 for requests with bodies
- Evict dead H2 senders on backend request failures in reconnect_backend so subsequent attempts create fresh connections
## 2026-03-08 - 25.9.2 - fix(protocol-cache)
Include requested_host in protocol detection cache key to avoid cache oscillation when multiple frontend domains share the same backend

View File

@@ -18,6 +18,9 @@ const MAX_IDLE_PER_KEY: usize = 16;
const IDLE_TIMEOUT: Duration = Duration::from_secs(90);
/// Background eviction interval.
const EVICTION_INTERVAL: Duration = Duration::from_secs(30);
/// Maximum age for pooled HTTP/2 connections before proactive eviction.
/// Prevents staleness from backends that close idle connections (e.g. nginx GOAWAY).
const MAX_H2_AGE: Duration = Duration::from_secs(120);
/// Identifies a unique backend endpoint.
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
@@ -37,7 +40,6 @@ struct IdleH1 {
/// A pooled HTTP/2 sender (multiplexed, Clone-able).
struct PooledH2 {
sender: http2::SendRequest<BoxBody<Bytes, hyper::Error>>,
#[allow(dead_code)] // Reserved for future age-based eviction
created_at: Instant,
}
@@ -116,8 +118,8 @@ impl ConnectionPool {
let entry = self.h2_pool.get(key)?;
let pooled = entry.value();
// Check if the h2 connection is still alive
if pooled.sender.is_closed() {
// Check if the h2 connection is still alive and not too old
if pooled.sender.is_closed() || pooled.created_at.elapsed() >= MAX_H2_AGE {
drop(entry);
self.h2_pool.remove(key);
return None;
@@ -130,6 +132,12 @@ impl ConnectionPool {
None
}
/// Remove a dead HTTP/2 sender from the pool.
/// Called when `send_request` fails to prevent subsequent requests from reusing the stale sender.
pub fn remove_h2(&self, key: &PoolKey) {
self.h2_pool.remove(key);
}
/// Register an HTTP/2 sender in the pool. Since h2 is multiplexed,
/// only one sender per key is stored (it's Clone-able).
pub fn register_h2(&self, key: PoolKey, sender: http2::SendRequest<BoxBody<Bytes, hyper::Error>>) {
@@ -165,10 +173,10 @@ impl ConnectionPool {
h1_pool.remove(&key);
}
// Evict dead H2 connections
// Evict dead or aged-out H2 connections
let mut dead_h2 = Vec::new();
for entry in h2_pool.iter() {
if entry.value().sender.is_closed() {
if entry.value().sender.is_closed() || entry.value().created_at.elapsed() >= MAX_H2_AGE {
dead_h2.push(entry.key().clone());
}
}

View File

@@ -11,6 +11,7 @@ use std::sync::atomic::{AtomicU64, Ordering};
use arc_swap::ArcSwap;
use bytes::Bytes;
use dashmap::DashMap;
use http_body::Body as HttpBody;
use http_body_util::{BodyExt, Full, combinators::BoxBody};
use hyper::body::Incoming;
use hyper::{Request, Response, StatusCode};
@@ -872,10 +873,12 @@ impl HttpProxyService {
// Register for multiplexed reuse
self.connection_pool.register_h2(pool_key.clone(), sender.clone());
self.forward_h2_with_sender(sender, parts, body, upstream_headers, upstream_path, route, route_id, source_ip).await
self.forward_h2_with_sender(sender, parts, body, upstream_headers, upstream_path, route, route_id, source_ip, Some(pool_key)).await
}
/// Forward request using an existing (pooled) HTTP/2 sender.
/// If the pooled sender is stale (GOAWAY, connection closed), evicts it and retries
/// with a fresh connection for bodyless requests (GET/HEAD/DELETE).
async fn forward_h2_pooled(
&self,
sender: hyper::client::conn::http2::SendRequest<BoxBody<Bytes, hyper::Error>>,
@@ -886,9 +889,129 @@ impl HttpProxyService {
route: &rustproxy_config::RouteConfig,
route_id: Option<&str>,
source_ip: &str,
_pool_key: &crate::connection_pool::PoolKey,
pool_key: &crate::connection_pool::PoolKey,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
self.forward_h2_with_sender(sender, parts, body, upstream_headers, upstream_path, route, route_id, source_ip).await
// Save retry state for bodyless requests (cheap: Method is an enum, HeaderMap clones Arc-backed Bytes)
let retry_state = if body.is_end_stream() {
Some((parts.method.clone(), upstream_headers.clone()))
} else {
None
};
let result = self.forward_h2_with_sender(
sender, parts, body, upstream_headers, upstream_path,
route, route_id, source_ip, Some(pool_key),
).await;
// If the request failed (502) and we can retry with an empty body, do so
let is_502 = matches!(&result, Ok(resp) if resp.status() == StatusCode::BAD_GATEWAY);
if is_502 {
if let Some((method, headers)) = retry_state {
warn!("Stale pooled H2 sender for {}:{}, retrying with fresh connection",
pool_key.host, pool_key.port);
return self.retry_h2_with_fresh_connection(
method, headers, upstream_path,
pool_key, route, route_id, source_ip,
).await;
}
}
result
}
/// Retry an H2 request with a fresh backend connection and empty body.
/// Used when a pooled sender was stale (GOAWAY/closed) and the original body was empty.
async fn retry_h2_with_fresh_connection(
&self,
method: hyper::Method,
upstream_headers: hyper::HeaderMap,
upstream_path: &str,
pool_key: &crate::connection_pool::PoolKey,
route: &rustproxy_config::RouteConfig,
route_id: Option<&str>,
source_ip: &str,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
// Establish fresh backend connection
let backend = if pool_key.use_tls {
match tokio::time::timeout(
self.connect_timeout,
connect_tls_backend(&self.backend_tls_config, &pool_key.host, pool_key.port),
).await {
Ok(Ok(tls)) => BackendStream::Tls(tls),
Ok(Err(e)) => {
error!("H2 retry: TLS connect failed for {}:{}: {}", pool_key.host, pool_key.port, e);
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend unavailable on H2 retry"));
}
Err(_) => {
error!("H2 retry: TLS connect timeout for {}:{}", pool_key.host, pool_key.port);
return Ok(error_response(StatusCode::GATEWAY_TIMEOUT, "Backend timeout on H2 retry"));
}
}
} else {
match tokio::time::timeout(
self.connect_timeout,
TcpStream::connect(format!("{}:{}", pool_key.host, pool_key.port)),
).await {
Ok(Ok(s)) => {
s.set_nodelay(true).ok();
BackendStream::Plain(s)
}
Ok(Err(e)) => {
error!("H2 retry: connect failed for {}:{}: {}", pool_key.host, pool_key.port, e);
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend unavailable on H2 retry"));
}
Err(_) => {
error!("H2 retry: connect timeout for {}:{}", pool_key.host, pool_key.port);
return Ok(error_response(StatusCode::GATEWAY_TIMEOUT, "Backend timeout on H2 retry"));
}
}
};
let io = TokioIo::new(backend);
let exec = hyper_util::rt::TokioExecutor::new();
let (mut sender, conn): (
hyper::client::conn::http2::SendRequest<BoxBody<Bytes, hyper::Error>>,
hyper::client::conn::http2::Connection<TokioIo<BackendStream>, BoxBody<Bytes, hyper::Error>, hyper_util::rt::TokioExecutor>,
) = match hyper::client::conn::http2::handshake(exec, io).await {
Ok(h) => h,
Err(e) => {
error!("H2 retry: handshake failed for {}:{}: {}", pool_key.host, pool_key.port, e);
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H2 retry handshake failed"));
}
};
tokio::spawn(async move {
if let Err(e) = conn.await {
debug!("H2 retry: upstream connection error: {}", e);
}
});
// Register fresh sender in pool for future requests
self.connection_pool.register_h2(pool_key.clone(), sender.clone());
// Build request with empty body
let mut upstream_req = Request::builder()
.method(method)
.uri(upstream_path);
if let Some(headers) = upstream_req.headers_mut() {
*headers = upstream_headers;
}
let empty_body: BoxBody<Bytes, hyper::Error> = BoxBody::new(
http_body_util::Empty::new().map_err(|never| match never {})
);
let upstream_req = upstream_req.body(empty_body).unwrap();
match sender.send_request(upstream_req).await {
Ok(resp) => {
self.build_streaming_response(resp, route, route_id, source_ip).await
}
Err(e) => {
error!("H2 retry: request failed for {}:{}: {}", pool_key.host, pool_key.port, e);
self.connection_pool.remove_h2(pool_key);
Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H2 request failed on retry"))
}
}
}
/// Forward via HTTP/2 with fallback to HTTP/1.1 (auto-detect mode).
@@ -896,8 +1019,8 @@ impl HttpProxyService {
/// Handles two failure scenarios:
/// 1. H2 handshake fails → reconnects and falls back to H1 (body not consumed yet).
/// 2. H2 handshake "succeeds" but request fails (backend advertises h2 via ALPN but
/// doesn't actually speak h2) → updates cache to H1. The request body is consumed
/// so this request fails, but all subsequent requests will correctly use H1.
/// doesn't actually speak h2) → updates cache to H1, retries as H1 for bodyless
/// requests, or returns 502 for requests with bodies.
async fn forward_h2_with_fallback(
&self,
io: TokioIo<BackendStream>,
@@ -926,6 +1049,13 @@ impl HttpProxyService {
}
});
// Save retry state before consuming parts/body (for bodyless requests like GET)
let retry_state = if body.is_end_stream() {
Some((parts.method.clone(), upstream_headers.clone()))
} else {
None
};
// Build and send the h2 request inline (don't register in pool yet —
// we need to verify the request actually succeeds first, because some
// backends advertise h2 via ALPN but don't speak the h2 binary protocol).
@@ -956,10 +1086,8 @@ impl HttpProxyService {
Err(e) => {
// H2 request failed — backend advertises h2 via ALPN but doesn't
// actually speak it. Update cache so future requests use H1.
// The request body is consumed so this request can't be retried,
// but all subsequent requests will correctly use H1.
warn!(
"Auto-detect: H2 request failed for {}:{}, updating cache to H1: {}",
"Auto-detect: H2 request failed for {}:{}, falling back to H1: {}",
upstream.host, upstream.port, e
);
let cache_key = crate::protocol_cache::ProtocolCacheKey {
@@ -968,7 +1096,30 @@ impl HttpProxyService {
requested_host: requested_host.clone(),
};
self.protocol_cache.insert(cache_key, crate::protocol_cache::DetectedProtocol::H1);
Ok(error_response(StatusCode::BAD_GATEWAY, "Backend protocol mismatch, retrying with H1"))
// Retry as H1 for bodyless requests; return 502 for requests with bodies
if let Some((method, headers)) = retry_state {
match self.reconnect_backend(upstream).await {
Some(fallback_backend) => {
let h1_pool_key = crate::connection_pool::PoolKey {
host: upstream.host.clone(),
port: upstream.port,
use_tls: upstream.use_tls,
h2: false,
};
let fallback_io = TokioIo::new(fallback_backend);
self.forward_h1_empty_body(
fallback_io, method, headers, upstream_path,
route, route_id, source_ip, &h1_pool_key,
).await
}
None => {
Ok(error_response(StatusCode::BAD_GATEWAY, "Backend unavailable after H2 fallback"))
}
}
} else {
Ok(error_response(StatusCode::BAD_GATEWAY, "Backend protocol mismatch"))
}
}
}
}
@@ -1011,6 +1162,64 @@ impl HttpProxyService {
}
}
/// Forward a request with an empty body via HTTP/1.1.
/// Used when retrying after a failed H2 attempt where the original body was consumed.
async fn forward_h1_empty_body(
&self,
io: TokioIo<BackendStream>,
method: hyper::Method,
upstream_headers: hyper::HeaderMap,
upstream_path: &str,
route: &rustproxy_config::RouteConfig,
route_id: Option<&str>,
source_ip: &str,
pool_key: &crate::connection_pool::PoolKey,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
let (mut sender, conn): (
hyper::client::conn::http1::SendRequest<BoxBody<Bytes, hyper::Error>>,
hyper::client::conn::http1::Connection<TokioIo<BackendStream>, BoxBody<Bytes, hyper::Error>>,
) = match hyper::client::conn::http1::handshake(io).await {
Ok(h) => h,
Err(e) => {
error!("H1 fallback: handshake failed: {}", e);
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H1 fallback handshake failed"));
}
};
tokio::spawn(async move {
if let Err(e) = conn.await {
debug!("H1 fallback: upstream connection error: {}", e);
}
});
let mut upstream_req = Request::builder()
.method(method)
.uri(upstream_path)
.version(hyper::Version::HTTP_11);
if let Some(headers) = upstream_req.headers_mut() {
*headers = upstream_headers;
}
let empty_body: BoxBody<Bytes, hyper::Error> = BoxBody::new(
http_body_util::Empty::new().map_err(|never| match never {})
);
let upstream_req = upstream_req.body(empty_body).unwrap();
let upstream_response = match sender.send_request(upstream_req).await {
Ok(resp) => resp,
Err(e) => {
error!("H1 fallback: request failed: {}", e);
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H1 fallback request failed"));
}
};
// Return sender to pool for keep-alive reuse
self.connection_pool.checkin_h1(pool_key.clone(), sender);
self.build_streaming_response(upstream_response, route, route_id, source_ip).await
}
/// Reconnect to a backend (used for H2→H1 fallback).
async fn reconnect_backend(
&self,
@@ -1066,6 +1275,7 @@ impl HttpProxyService {
route: &rustproxy_config::RouteConfig,
route_id: Option<&str>,
source_ip: &str,
pool_key: Option<&crate::connection_pool::PoolKey>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
let mut upstream_req = Request::builder()
.method(parts.method)
@@ -1091,6 +1301,10 @@ impl HttpProxyService {
Ok(resp) => resp,
Err(e) => {
error!("HTTP/2 upstream request failed: {}", e);
// Evict the dead sender so subsequent requests get fresh connections
if let Some(key) = pool_key {
self.connection_pool.remove_h2(key);
}
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H2 request failed"));
}
};

View File

@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@push.rocks/smartproxy',
version: '25.9.2',
version: '25.9.3',
description: 'A powerful proxy package with unified route-based configuration for high traffic management. Features include SSL/TLS support, flexible routing patterns, WebSocket handling, advanced security options, and automatic ACME certificate management.'
}