feat(rustproxy-http): add HTTP/2 auto-detection via ALPN with TTL-backed protocol cache and h1-only/h2 ALPN client configs

This commit is contained in:
2026-03-03 11:04:01 +00:00
parent e69de246e9
commit c52128f12d
10 changed files with 492 additions and 31 deletions

View File

@@ -5,6 +5,7 @@
pub mod connection_pool;
pub mod counting_body;
pub mod protocol_cache;
pub mod proxy_service;
pub mod request_filter;
pub mod response_filter;

View File

@@ -0,0 +1,136 @@
//! Bounded, TTL-based protocol detection cache for HTTP/2 auto-detection.
//!
//! Caches the ALPN-negotiated protocol (H1 or H2) per backend endpoint (host:port).
//! Prevents repeated ALPN probes for backends whose protocol is already known.
use std::sync::Arc;
use std::time::{Duration, Instant};
use dashmap::DashMap;
use tracing::debug;
/// TTL for cached protocol detection results.
/// After this duration, the next request will re-probe the backend.
const PROTOCOL_CACHE_TTL: Duration = Duration::from_secs(300); // 5 minutes
/// Maximum number of entries in the protocol cache.
/// Prevents unbounded growth when backends come and go.
const PROTOCOL_CACHE_MAX_ENTRIES: usize = 4096;
/// Background cleanup interval for the protocol cache.
const PROTOCOL_CACHE_CLEANUP_INTERVAL: Duration = Duration::from_secs(60);
/// Detected backend protocol.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DetectedProtocol {
H1,
H2,
}
/// Key for the protocol cache: (host, port).
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
pub struct ProtocolCacheKey {
pub host: String,
pub port: u16,
}
/// A cached protocol detection result with a timestamp.
struct CachedEntry {
protocol: DetectedProtocol,
detected_at: Instant,
}
/// Bounded, TTL-based protocol detection cache.
///
/// Memory safety guarantees:
/// - Hard cap at `PROTOCOL_CACHE_MAX_ENTRIES` — cannot grow unboundedly.
/// - TTL expiry — stale entries naturally age out on lookup.
/// - Background cleanup task — proactively removes expired entries every 60s.
/// - `clear()` — called on route updates to discard stale detections.
/// - `Drop` — aborts the background task to prevent dangling tokio tasks.
pub struct ProtocolCache {
cache: Arc<DashMap<ProtocolCacheKey, CachedEntry>>,
cleanup_handle: Option<tokio::task::JoinHandle<()>>,
}
impl ProtocolCache {
/// Create a new protocol cache and start the background cleanup task.
pub fn new() -> Self {
let cache: Arc<DashMap<ProtocolCacheKey, CachedEntry>> = Arc::new(DashMap::new());
let cache_clone = Arc::clone(&cache);
let cleanup_handle = tokio::spawn(async move {
Self::cleanup_loop(cache_clone).await;
});
Self {
cache,
cleanup_handle: Some(cleanup_handle),
}
}
/// Look up the cached protocol for a backend endpoint.
/// Returns `None` if not cached or expired (caller should probe via ALPN).
pub fn get(&self, key: &ProtocolCacheKey) -> Option<DetectedProtocol> {
let entry = self.cache.get(key)?;
if entry.detected_at.elapsed() < PROTOCOL_CACHE_TTL {
debug!("Protocol cache hit: {:?} for {}:{}", entry.protocol, key.host, key.port);
Some(entry.protocol)
} else {
// Expired — remove and return None to trigger re-probe
drop(entry); // release DashMap ref before remove
self.cache.remove(key);
None
}
}
/// Insert a detected protocol into the cache.
/// If the cache is at capacity, evict the oldest entry first.
pub fn insert(&self, key: ProtocolCacheKey, protocol: DetectedProtocol) {
if self.cache.len() >= PROTOCOL_CACHE_MAX_ENTRIES && !self.cache.contains_key(&key) {
// Evict the oldest entry to stay within bounds
let oldest = self.cache.iter()
.min_by_key(|entry| entry.value().detected_at)
.map(|entry| entry.key().clone());
if let Some(oldest_key) = oldest {
self.cache.remove(&oldest_key);
}
}
self.cache.insert(key, CachedEntry {
protocol,
detected_at: Instant::now(),
});
}
/// Clear all entries. Called on route updates to discard stale detections.
pub fn clear(&self) {
self.cache.clear();
}
/// Background cleanup loop — removes expired entries every `PROTOCOL_CACHE_CLEANUP_INTERVAL`.
async fn cleanup_loop(cache: Arc<DashMap<ProtocolCacheKey, CachedEntry>>) {
let mut interval = tokio::time::interval(PROTOCOL_CACHE_CLEANUP_INTERVAL);
loop {
interval.tick().await;
let expired: Vec<ProtocolCacheKey> = cache.iter()
.filter(|entry| entry.value().detected_at.elapsed() >= PROTOCOL_CACHE_TTL)
.map(|entry| entry.key().clone())
.collect();
if !expired.is_empty() {
debug!("Protocol cache cleanup: removing {} expired entries", expired.len());
for key in expired {
cache.remove(&key);
}
}
}
}
}
impl Drop for ProtocolCache {
fn drop(&mut self) {
if let Some(handle) = self.cleanup_handle.take() {
handle.abort();
}
}
}

View File

@@ -146,8 +146,12 @@ pub struct HttpProxyService {
regex_cache: DashMap<String, Regex>,
/// Shared backend TLS config for session resumption across connections.
backend_tls_config: Arc<rustls::ClientConfig>,
/// Backend TLS config with ALPN h2+http/1.1 for auto-detection mode.
backend_tls_config_alpn: Arc<rustls::ClientConfig>,
/// Backend connection pool for reusing keep-alive connections.
connection_pool: Arc<crate::connection_pool::ConnectionPool>,
/// Protocol detection cache for auto mode (caches ALPN-detected protocol per backend).
protocol_cache: Arc<crate::protocol_cache::ProtocolCache>,
/// HTTP keep-alive idle timeout: close connection if no new request arrives within this duration.
http_idle_timeout: std::time::Duration,
/// WebSocket inactivity timeout (no data in either direction).
@@ -167,7 +171,9 @@ impl HttpProxyService {
request_counter: AtomicU64::new(0),
regex_cache: DashMap::new(),
backend_tls_config: Self::default_backend_tls_config(),
backend_tls_config_alpn: Self::default_backend_tls_config_with_alpn(),
connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()),
protocol_cache: Arc::new(crate::protocol_cache::ProtocolCache::new()),
http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT,
ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT,
ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME,
@@ -189,7 +195,9 @@ impl HttpProxyService {
request_counter: AtomicU64::new(0),
regex_cache: DashMap::new(),
backend_tls_config: Self::default_backend_tls_config(),
backend_tls_config_alpn: Self::default_backend_tls_config_with_alpn(),
connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()),
protocol_cache: Arc::new(crate::protocol_cache::ProtocolCache::new()),
http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT,
ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT,
ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME,
@@ -215,12 +223,18 @@ impl HttpProxyService {
self.backend_tls_config = config;
}
/// Set the shared backend TLS config with ALPN h2+http/1.1 (for auto-detection mode).
pub fn set_backend_tls_config_alpn(&mut self, config: Arc<rustls::ClientConfig>) {
self.backend_tls_config_alpn = config;
}
/// Prune caches for route IDs that are no longer active.
/// Call after route updates to prevent unbounded growth.
pub fn prune_stale_routes(&self, active_route_ids: &std::collections::HashSet<String>) {
self.route_rate_limiters.retain(|k, _| active_route_ids.contains(k));
self.regex_cache.clear();
self.upstream_selector.reset_round_robin();
self.protocol_cache.clear();
}
/// Handle an incoming HTTP connection on a plain TCP stream.
@@ -480,11 +494,11 @@ impl HttpProxyService {
return result;
}
// Determine backend protocol
let use_h2 = route_match.route.action.options.as_ref()
// Determine backend protocol mode
let backend_protocol_mode = route_match.route.action.options.as_ref()
.and_then(|o| o.backend_protocol.as_ref())
.map(|p| *p == rustproxy_config::BackendProtocol::Http2)
.unwrap_or(false);
.cloned()
.unwrap_or(rustproxy_config::BackendProtocol::Auto);
// Build the upstream path (path + query), applying URL rewriting if configured
let upstream_path = {
@@ -564,34 +578,96 @@ impl HttpProxyService {
}
}
// --- Connection pooling: try reusing an existing connection first ---
let pool_key = crate::connection_pool::PoolKey {
host: upstream.host.clone(),
port: upstream.port,
use_tls: upstream.use_tls,
h2: use_h2,
// --- Resolve protocol decision based on backend protocol mode ---
let is_auto_detect_mode = matches!(backend_protocol_mode, rustproxy_config::BackendProtocol::Auto);
let (use_h2, needs_alpn_probe) = match backend_protocol_mode {
rustproxy_config::BackendProtocol::Http1 => (false, false),
rustproxy_config::BackendProtocol::Http2 => (true, false),
rustproxy_config::BackendProtocol::Auto => {
if !upstream.use_tls {
// No ALPN without TLS — default to H1
(false, false)
} else {
let cache_key = crate::protocol_cache::ProtocolCacheKey {
host: upstream.host.clone(),
port: upstream.port,
};
match self.protocol_cache.get(&cache_key) {
Some(crate::protocol_cache::DetectedProtocol::H2) => (true, false),
Some(crate::protocol_cache::DetectedProtocol::H1) => (false, false),
None => (false, true), // needs ALPN probe
}
}
}
};
// Try pooled connection first (H2 only — H2 senders are Clone and multiplexed,
// so checkout doesn't consume request parts. For H1, we try pool inside forward_h1.)
if use_h2 {
if let Some(sender) = self.connection_pool.checkout_h2(&pool_key) {
let result = self.forward_h2_pooled(
sender, parts, body, upstream_headers, &upstream_path,
route_match.route, route_id, &ip_str, &pool_key,
).await;
self.upstream_selector.connection_ended(&upstream_key);
return result;
// --- Connection pooling: try reusing an existing connection first ---
// For ALPN probe mode, skip pool checkout (we don't know the protocol yet)
if !needs_alpn_probe {
let pool_key = crate::connection_pool::PoolKey {
host: upstream.host.clone(),
port: upstream.port,
use_tls: upstream.use_tls,
h2: use_h2,
};
// H2 pool checkout (H2 senders are Clone and multiplexed)
if use_h2 {
if let Some(sender) = self.connection_pool.checkout_h2(&pool_key) {
let result = self.forward_h2_pooled(
sender, parts, body, upstream_headers, &upstream_path,
route_match.route, route_id, &ip_str, &pool_key,
).await;
self.upstream_selector.connection_ended(&upstream_key);
return result;
}
}
}
// Fresh connection path
let backend = if upstream.use_tls {
// --- Fresh connection path ---
// Choose TLS config: use ALPN config for auto-detect probe, plain config otherwise
let tls_config = if needs_alpn_probe {
&self.backend_tls_config_alpn
} else {
&self.backend_tls_config
};
// Establish backend connection
let (backend, detected_h2) = if upstream.use_tls {
match tokio::time::timeout(
self.connect_timeout,
connect_tls_backend(&self.backend_tls_config, &upstream.host, upstream.port),
connect_tls_backend(tls_config, &upstream.host, upstream.port),
).await {
Ok(Ok(tls)) => BackendStream::Tls(tls),
Ok(Ok(tls)) => {
let final_h2 = if needs_alpn_probe {
// Read the ALPN-negotiated protocol from the TLS connection
let alpn = tls.get_ref().1.alpn_protocol();
let is_h2 = alpn.map(|p| p == b"h2").unwrap_or(false);
// Cache the result
let cache_key = crate::protocol_cache::ProtocolCacheKey {
host: upstream.host.clone(),
port: upstream.port,
};
let detected = if is_h2 {
crate::protocol_cache::DetectedProtocol::H2
} else {
crate::protocol_cache::DetectedProtocol::H1
};
self.protocol_cache.insert(cache_key, detected);
debug!(
"Auto-detected {} for backend {}:{}",
if is_h2 { "HTTP/2" } else { "HTTP/1.1" },
upstream.host, upstream.port
);
is_h2
} else {
use_h2
};
(BackendStream::Tls(tls), final_h2)
}
Ok(Err(e)) => {
error!("Failed TLS connect to upstream {}:{}: {}", upstream.host, upstream.port, e);
self.upstream_selector.connection_ended(&upstream_key);
@@ -613,7 +689,7 @@ impl HttpProxyService {
let _ = socket2::SockRef::from(&s).set_tcp_keepalive(
&socket2::TcpKeepalive::new().with_time(std::time::Duration::from_secs(60))
);
BackendStream::Plain(s)
(BackendStream::Plain(s), use_h2)
}
Ok(Err(e)) => {
error!("Failed to connect to upstream {}:{}: {}", upstream.host, upstream.port, e);
@@ -628,12 +704,34 @@ impl HttpProxyService {
}
};
let final_pool_key = crate::connection_pool::PoolKey {
host: upstream.host.clone(),
port: upstream.port,
use_tls: upstream.use_tls,
h2: detected_h2,
};
let io = TokioIo::new(backend);
let result = if use_h2 {
self.forward_h2(io, parts, body, upstream_headers, &upstream_path, &upstream, route_match.route, route_id, &ip_str, &pool_key).await
let result = if detected_h2 {
if is_auto_detect_mode {
// Auto-detect mode: use fallback-capable H2 forwarding
self.forward_h2_with_fallback(
io, parts, body, upstream_headers, &upstream_path,
&upstream, route_match.route, route_id, &ip_str, &final_pool_key,
).await
} else {
// Explicit H2 mode: hard-fail on handshake error (preserved behavior)
self.forward_h2(
io, parts, body, upstream_headers, &upstream_path,
&upstream, route_match.route, route_id, &ip_str, &final_pool_key,
).await
}
} else {
self.forward_h1(io, parts, body, upstream_headers, &upstream_path, &upstream, route_match.route, route_id, &ip_str, &pool_key).await
self.forward_h1(
io, parts, body, upstream_headers, &upstream_path,
&upstream, route_match.route, route_id, &ip_str, &final_pool_key,
).await
};
self.upstream_selector.connection_ended(&upstream_key);
result
@@ -788,6 +886,167 @@ impl HttpProxyService {
self.forward_h2_with_sender(sender, parts, body, upstream_headers, upstream_path, route, route_id, source_ip).await
}
/// Forward via HTTP/2 with fallback to HTTP/1.1 (auto-detect mode).
///
/// Handles two failure scenarios:
/// 1. H2 handshake fails → reconnects and falls back to H1 (body not consumed yet).
/// 2. H2 handshake "succeeds" but request fails (backend advertises h2 via ALPN but
/// doesn't actually speak h2) → updates cache to H1. The request body is consumed
/// so this request fails, but all subsequent requests will correctly use H1.
async fn forward_h2_with_fallback(
&self,
io: TokioIo<BackendStream>,
parts: hyper::http::request::Parts,
body: Incoming,
upstream_headers: hyper::HeaderMap,
upstream_path: &str,
upstream: &crate::upstream_selector::UpstreamSelection,
route: &rustproxy_config::RouteConfig,
route_id: Option<&str>,
source_ip: &str,
pool_key: &crate::connection_pool::PoolKey,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
let exec = hyper_util::rt::TokioExecutor::new();
let handshake_result: Result<(
hyper::client::conn::http2::SendRequest<BoxBody<Bytes, hyper::Error>>,
hyper::client::conn::http2::Connection<TokioIo<BackendStream>, BoxBody<Bytes, hyper::Error>, hyper_util::rt::TokioExecutor>,
), hyper::Error> = hyper::client::conn::http2::handshake(exec, io).await;
match handshake_result {
Ok((mut sender, conn)) => {
tokio::spawn(async move {
if let Err(e) = conn.await {
debug!("HTTP/2 upstream connection error: {}", e);
}
});
// Build and send the h2 request inline (don't register in pool yet —
// we need to verify the request actually succeeds first, because some
// backends advertise h2 via ALPN but don't speak the h2 binary protocol).
let mut upstream_req = Request::builder()
.method(parts.method)
.uri(upstream_path);
if let Some(headers) = upstream_req.headers_mut() {
*headers = upstream_headers;
}
let counting_req_body = CountingBody::new(
body,
Arc::clone(&self.metrics),
route_id.map(|s| s.to_string()),
Some(source_ip.to_string()),
Direction::In,
);
let boxed_body: BoxBody<Bytes, hyper::Error> = BoxBody::new(counting_req_body);
let upstream_req = upstream_req.body(boxed_body).unwrap();
match sender.send_request(upstream_req).await {
Ok(upstream_response) => {
// H2 works! Register sender in pool for multiplexed reuse
self.connection_pool.register_h2(pool_key.clone(), sender);
self.build_streaming_response(upstream_response, route, route_id, source_ip).await
}
Err(e) => {
// H2 request failed — backend advertises h2 via ALPN but doesn't
// actually speak it. Update cache so future requests use H1.
// The request body is consumed so this request can't be retried,
// but all subsequent requests will correctly use H1.
warn!(
"Auto-detect: H2 request failed for {}:{}, updating cache to H1: {}",
upstream.host, upstream.port, e
);
let cache_key = crate::protocol_cache::ProtocolCacheKey {
host: upstream.host.clone(),
port: upstream.port,
};
self.protocol_cache.insert(cache_key, crate::protocol_cache::DetectedProtocol::H1);
Ok(error_response(StatusCode::BAD_GATEWAY, "Backend protocol mismatch, retrying with H1"))
}
}
}
Err(e) => {
// H2 handshake truly failed — fall back to H1
// Body is NOT consumed yet, so we can retry the full request.
warn!(
"H2 handshake failed for {}:{}, falling back to H1: {}",
upstream.host, upstream.port, e
);
// Update cache to H1 so subsequent requests skip H2
let cache_key = crate::protocol_cache::ProtocolCacheKey {
host: upstream.host.clone(),
port: upstream.port,
};
self.protocol_cache.insert(cache_key, crate::protocol_cache::DetectedProtocol::H1);
// Reconnect for H1 (the original io was consumed by the failed h2 handshake)
match self.reconnect_backend(upstream).await {
Some(fallback_backend) => {
let h1_pool_key = crate::connection_pool::PoolKey {
host: upstream.host.clone(),
port: upstream.port,
use_tls: upstream.use_tls,
h2: false,
};
let fallback_io = TokioIo::new(fallback_backend);
self.forward_h1(
fallback_io, parts, body, upstream_headers, upstream_path,
upstream, route, route_id, source_ip, &h1_pool_key,
).await
}
None => {
Ok(error_response(StatusCode::BAD_GATEWAY, "Backend unavailable after H2 fallback"))
}
}
}
}
}
/// Reconnect to a backend (used for H2→H1 fallback).
async fn reconnect_backend(
&self,
upstream: &crate::upstream_selector::UpstreamSelection,
) -> Option<BackendStream> {
if upstream.use_tls {
match tokio::time::timeout(
self.connect_timeout,
connect_tls_backend(&self.backend_tls_config, &upstream.host, upstream.port),
).await {
Ok(Ok(tls)) => Some(BackendStream::Tls(tls)),
Ok(Err(e)) => {
error!("H1 fallback: TLS reconnect failed for {}:{}: {}", upstream.host, upstream.port, e);
None
}
Err(_) => {
error!("H1 fallback: TLS reconnect timeout for {}:{}", upstream.host, upstream.port);
None
}
}
} else {
match tokio::time::timeout(
self.connect_timeout,
TcpStream::connect(format!("{}:{}", upstream.host, upstream.port)),
).await {
Ok(Ok(s)) => {
s.set_nodelay(true).ok();
let _ = socket2::SockRef::from(&s).set_tcp_keepalive(
&socket2::TcpKeepalive::new().with_time(std::time::Duration::from_secs(60))
);
Some(BackendStream::Plain(s))
}
Ok(Err(e)) => {
error!("H1 fallback: reconnect failed for {}:{}: {}", upstream.host, upstream.port, e);
None
}
Err(_) => {
error!("H1 fallback: reconnect timeout for {}:{}", upstream.host, upstream.port);
None
}
}
}
}
/// Common H2 forwarding logic used by both fresh and pooled paths.
async fn forward_h2_with_sender(
&self,
@@ -1432,6 +1691,18 @@ impl HttpProxyService {
.with_no_client_auth();
Arc::new(config)
}
/// Build a default backend TLS config with ALPN h2+http/1.1 for auto-detection.
/// Used as fallback when no shared ALPN config is injected from tls_handler.
fn default_backend_tls_config_with_alpn() -> Arc<rustls::ClientConfig> {
let _ = rustls::crypto::ring::default_provider().install_default();
let mut config = rustls::ClientConfig::builder()
.dangerous()
.with_custom_certificate_verifier(Arc::new(InsecureBackendVerifier))
.with_no_client_auth();
config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
Arc::new(config)
}
}
/// Insecure certificate verifier for backend TLS connections (fallback only).
@@ -1496,7 +1767,9 @@ impl Default for HttpProxyService {
request_counter: AtomicU64::new(0),
regex_cache: DashMap::new(),
backend_tls_config: Self::default_backend_tls_config(),
backend_tls_config_alpn: Self::default_backend_tls_config_with_alpn(),
connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()),
protocol_cache: Arc::new(crate::protocol_cache::ProtocolCache::new()),
http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT,
ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT,
ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME,