Compare commits
14 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 90b83a9dbe | |||
| 508621e231 | |||
| 9ef21dcb41 | |||
| 0acd907431 | |||
| 80276a70e8 | |||
| 0d4399d7f1 | |||
| 0380a957d0 | |||
| 5271447264 | |||
| be9898805f | |||
| d4aa46aed7 | |||
| 4f1c5c919f | |||
| d51b2c5890 | |||
| bb471a8cc9 | |||
| c52128f12d |
53
changelog.md
53
changelog.md
@@ -1,5 +1,58 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## 2026-03-12 - 25.10.2 - fix(repo)
|
||||||
|
no code changes to release
|
||||||
|
|
||||||
|
|
||||||
|
## 2026-03-12 - 25.10.1 - fix(repo)
|
||||||
|
no changes to commit
|
||||||
|
|
||||||
|
|
||||||
|
## 2026-03-12 - 25.10.0 - feat(metrics)
|
||||||
|
add per-backend connection, error, protocol, and pool metrics with stale backend pruning
|
||||||
|
|
||||||
|
- tracks backend connection lifecycle, connect timing, protocol detection, pool hit/miss rates, handshake/request errors, and h2 fallback failures in Rust metrics
|
||||||
|
- exposes backend metrics through the TypeScript metrics adapter with backend listings, protocol lookup, and top error summaries
|
||||||
|
- prunes backend metrics for backends no longer referenced by active routes, including preserved-port targets expanded across listening ports
|
||||||
|
|
||||||
|
## 2026-03-11 - 25.9.3 - fix(rustproxy-http)
|
||||||
|
Evict stale HTTP/2 pooled senders and retry bodyless requests with fresh backend connections to avoid 502s
|
||||||
|
|
||||||
|
- Introduce MAX_H2_AGE (120s) and evict HTTP/2 senders older than this or closed
|
||||||
|
- Check MAX_H2_AGE on checkout and during background eviction to prevent reuse of stale h2 connections
|
||||||
|
- Add connection_pool.remove_h2() to explicitly remove dead H2 senders from the pool
|
||||||
|
- When a pooled H2 request returns a 502 and the original request had an empty body, retry using a fresh H2 connection (retry_h2_with_fresh_connection)
|
||||||
|
- On H2 auto-detect failures, retry as HTTP/1.1 for bodyless requests via forward_h1_empty_body; return 502 for requests with bodies
|
||||||
|
- Evict dead H2 senders on backend request failures in reconnect_backend so subsequent attempts create fresh connections
|
||||||
|
|
||||||
|
## 2026-03-08 - 25.9.2 - fix(protocol-cache)
|
||||||
|
Include requested_host in protocol detection cache key to avoid cache oscillation when multiple frontend domains share the same backend
|
||||||
|
|
||||||
|
- Add ProtocolCacheKey.requested_host: Option<String> to distinguish cache entries by incoming request Host/:authority
|
||||||
|
- Update protocol cache lookups/inserts in proxy_service to populate requested_host
|
||||||
|
- Enhance debug logging to show requested_host on cache hits
|
||||||
|
- Fixes repeated ALPN probing / cache oscillation when different frontend domains share a backend with differing HTTP/2 support
|
||||||
|
|
||||||
|
## 2026-03-03 - 25.9.1 - fix(rustproxy)
|
||||||
|
Cancel connections for routes removed/disabled by adding per-route cancellation tokens and make RouteManager swappable (ArcSwap) for runtime updates
|
||||||
|
|
||||||
|
- Add per-route CancellationToken map (DashMap) to TcpListenerManager and call token.cancel() when routes are removed (invalidate_removed_routes)
|
||||||
|
- Propagate Arc<ArcSwap<RouteManager>> into HttpProxyService and passthrough listener so the route manager can be hot-swapped without restarting listeners
|
||||||
|
- Use per-route child cancellation tokens in accept/connection handling and forwarders to terminate existing connections when a route is removed
|
||||||
|
- Prune HTTP proxy caches and retain/cleanup per-route tokens when routes are active/removed
|
||||||
|
- Update test.test.sni-requirement.node.ts to allocate unique free ports via findFreePorts to avoid port conflicts during tests
|
||||||
|
|
||||||
|
## 2026-03-03 - 25.9.0 - feat(rustproxy-http)
|
||||||
|
add HTTP/2 auto-detection via ALPN with TTL-backed protocol cache and h1-only/h2 ALPN client configs
|
||||||
|
|
||||||
|
- Add protocol_cache module: bounded, TTL-based cache (5min TTL), max entries (4096), background cleanup task and clear() to discard stale detections.
|
||||||
|
- Introduce BackendProtocol::Auto and expose 'auto' in TypeScript route types to allow ALPN-based protocol auto-detection.
|
||||||
|
- Add build_tls_acceptor_h1_only() to create a TLS acceptor that advertises only http/1.1 (used for backends/tests that speak plain HTTP/1.1).
|
||||||
|
- Add shared_backend_tls_config_alpn() and default_backend_tls_config_with_alpn() to provide client TLS configs advertising h2+http/1.1 for auto-detection.
|
||||||
|
- Wire backend_tls_config_alpn and protocol_cache into proxy_service, tcp_listener and passthrough paths; add set_backend_tls_config_alpn() and prune protocol_cache on route updates.
|
||||||
|
- Update passthrough tests to use h1-only acceptor to avoid false HTTP/2 detection when backends speak plain HTTP/1.1.
|
||||||
|
- Include reconnection/fallback handling and ensure ALPN-enabled client config is used for auto-detection mode.
|
||||||
|
|
||||||
## 2026-02-26 - 25.8.5 - fix(release)
|
## 2026-02-26 - 25.8.5 - fix(release)
|
||||||
bump patch version (no source changes)
|
bump patch version (no source changes)
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@push.rocks/smartproxy",
|
"name": "@push.rocks/smartproxy",
|
||||||
"version": "25.8.5",
|
"version": "25.10.2",
|
||||||
"private": false,
|
"private": false,
|
||||||
"description": "A powerful proxy package with unified route-based configuration for high traffic management. Features include SSL/TLS support, flexible routing patterns, WebSocket handling, advanced security options, and automatic ACME certificate management.",
|
"description": "A powerful proxy package with unified route-based configuration for high traffic management. Features include SSL/TLS support, flexible routing patterns, WebSocket handling, advanced security options, and automatic ACME certificate management.",
|
||||||
"main": "dist_ts/index.js",
|
"main": "dist_ts/index.js",
|
||||||
|
|||||||
@@ -367,6 +367,7 @@ pub struct NfTablesOptions {
|
|||||||
pub enum BackendProtocol {
|
pub enum BackendProtocol {
|
||||||
Http1,
|
Http1,
|
||||||
Http2,
|
Http2,
|
||||||
|
Auto,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Action options.
|
/// Action options.
|
||||||
|
|||||||
@@ -18,6 +18,9 @@ const MAX_IDLE_PER_KEY: usize = 16;
|
|||||||
const IDLE_TIMEOUT: Duration = Duration::from_secs(90);
|
const IDLE_TIMEOUT: Duration = Duration::from_secs(90);
|
||||||
/// Background eviction interval.
|
/// Background eviction interval.
|
||||||
const EVICTION_INTERVAL: Duration = Duration::from_secs(30);
|
const EVICTION_INTERVAL: Duration = Duration::from_secs(30);
|
||||||
|
/// Maximum age for pooled HTTP/2 connections before proactive eviction.
|
||||||
|
/// Prevents staleness from backends that close idle connections (e.g. nginx GOAWAY).
|
||||||
|
const MAX_H2_AGE: Duration = Duration::from_secs(120);
|
||||||
|
|
||||||
/// Identifies a unique backend endpoint.
|
/// Identifies a unique backend endpoint.
|
||||||
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
|
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
|
||||||
@@ -37,7 +40,6 @@ struct IdleH1 {
|
|||||||
/// A pooled HTTP/2 sender (multiplexed, Clone-able).
|
/// A pooled HTTP/2 sender (multiplexed, Clone-able).
|
||||||
struct PooledH2 {
|
struct PooledH2 {
|
||||||
sender: http2::SendRequest<BoxBody<Bytes, hyper::Error>>,
|
sender: http2::SendRequest<BoxBody<Bytes, hyper::Error>>,
|
||||||
#[allow(dead_code)] // Reserved for future age-based eviction
|
|
||||||
created_at: Instant,
|
created_at: Instant,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -116,8 +118,8 @@ impl ConnectionPool {
|
|||||||
let entry = self.h2_pool.get(key)?;
|
let entry = self.h2_pool.get(key)?;
|
||||||
let pooled = entry.value();
|
let pooled = entry.value();
|
||||||
|
|
||||||
// Check if the h2 connection is still alive
|
// Check if the h2 connection is still alive and not too old
|
||||||
if pooled.sender.is_closed() {
|
if pooled.sender.is_closed() || pooled.created_at.elapsed() >= MAX_H2_AGE {
|
||||||
drop(entry);
|
drop(entry);
|
||||||
self.h2_pool.remove(key);
|
self.h2_pool.remove(key);
|
||||||
return None;
|
return None;
|
||||||
@@ -130,6 +132,12 @@ impl ConnectionPool {
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Remove a dead HTTP/2 sender from the pool.
|
||||||
|
/// Called when `send_request` fails to prevent subsequent requests from reusing the stale sender.
|
||||||
|
pub fn remove_h2(&self, key: &PoolKey) {
|
||||||
|
self.h2_pool.remove(key);
|
||||||
|
}
|
||||||
|
|
||||||
/// Register an HTTP/2 sender in the pool. Since h2 is multiplexed,
|
/// Register an HTTP/2 sender in the pool. Since h2 is multiplexed,
|
||||||
/// only one sender per key is stored (it's Clone-able).
|
/// only one sender per key is stored (it's Clone-able).
|
||||||
pub fn register_h2(&self, key: PoolKey, sender: http2::SendRequest<BoxBody<Bytes, hyper::Error>>) {
|
pub fn register_h2(&self, key: PoolKey, sender: http2::SendRequest<BoxBody<Bytes, hyper::Error>>) {
|
||||||
@@ -165,10 +173,10 @@ impl ConnectionPool {
|
|||||||
h1_pool.remove(&key);
|
h1_pool.remove(&key);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Evict dead H2 connections
|
// Evict dead or aged-out H2 connections
|
||||||
let mut dead_h2 = Vec::new();
|
let mut dead_h2 = Vec::new();
|
||||||
for entry in h2_pool.iter() {
|
for entry in h2_pool.iter() {
|
||||||
if entry.value().sender.is_closed() {
|
if entry.value().sender.is_closed() || entry.value().created_at.elapsed() >= MAX_H2_AGE {
|
||||||
dead_h2.push(entry.key().clone());
|
dead_h2.push(entry.key().clone());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
pub mod connection_pool;
|
pub mod connection_pool;
|
||||||
pub mod counting_body;
|
pub mod counting_body;
|
||||||
|
pub mod protocol_cache;
|
||||||
pub mod proxy_service;
|
pub mod proxy_service;
|
||||||
pub mod request_filter;
|
pub mod request_filter;
|
||||||
pub mod response_filter;
|
pub mod response_filter;
|
||||||
|
|||||||
140
rust/crates/rustproxy-http/src/protocol_cache.rs
Normal file
140
rust/crates/rustproxy-http/src/protocol_cache.rs
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
//! Bounded, TTL-based protocol detection cache for HTTP/2 auto-detection.
|
||||||
|
//!
|
||||||
|
//! Caches the ALPN-negotiated protocol (H1 or H2) per backend endpoint and requested
|
||||||
|
//! domain (host:port + requested_host). This prevents cache oscillation when multiple
|
||||||
|
//! frontend domains share the same backend but differ in HTTP/2 support.
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
|
use dashmap::DashMap;
|
||||||
|
use tracing::debug;
|
||||||
|
|
||||||
|
/// TTL for cached protocol detection results.
|
||||||
|
/// After this duration, the next request will re-probe the backend.
|
||||||
|
const PROTOCOL_CACHE_TTL: Duration = Duration::from_secs(300); // 5 minutes
|
||||||
|
|
||||||
|
/// Maximum number of entries in the protocol cache.
|
||||||
|
/// Prevents unbounded growth when backends come and go.
|
||||||
|
const PROTOCOL_CACHE_MAX_ENTRIES: usize = 4096;
|
||||||
|
|
||||||
|
/// Background cleanup interval for the protocol cache.
|
||||||
|
const PROTOCOL_CACHE_CLEANUP_INTERVAL: Duration = Duration::from_secs(60);
|
||||||
|
|
||||||
|
/// Detected backend protocol.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum DetectedProtocol {
|
||||||
|
H1,
|
||||||
|
H2,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Key for the protocol cache: (host, port, requested_host).
|
||||||
|
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
|
||||||
|
pub struct ProtocolCacheKey {
|
||||||
|
pub host: String,
|
||||||
|
pub port: u16,
|
||||||
|
/// The incoming request's domain (Host header / :authority).
|
||||||
|
/// Distinguishes protocol detection when multiple domains share the same backend.
|
||||||
|
pub requested_host: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A cached protocol detection result with a timestamp.
|
||||||
|
struct CachedEntry {
|
||||||
|
protocol: DetectedProtocol,
|
||||||
|
detected_at: Instant,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Bounded, TTL-based protocol detection cache.
|
||||||
|
///
|
||||||
|
/// Memory safety guarantees:
|
||||||
|
/// - Hard cap at `PROTOCOL_CACHE_MAX_ENTRIES` — cannot grow unboundedly.
|
||||||
|
/// - TTL expiry — stale entries naturally age out on lookup.
|
||||||
|
/// - Background cleanup task — proactively removes expired entries every 60s.
|
||||||
|
/// - `clear()` — called on route updates to discard stale detections.
|
||||||
|
/// - `Drop` — aborts the background task to prevent dangling tokio tasks.
|
||||||
|
pub struct ProtocolCache {
|
||||||
|
cache: Arc<DashMap<ProtocolCacheKey, CachedEntry>>,
|
||||||
|
cleanup_handle: Option<tokio::task::JoinHandle<()>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ProtocolCache {
|
||||||
|
/// Create a new protocol cache and start the background cleanup task.
|
||||||
|
pub fn new() -> Self {
|
||||||
|
let cache: Arc<DashMap<ProtocolCacheKey, CachedEntry>> = Arc::new(DashMap::new());
|
||||||
|
let cache_clone = Arc::clone(&cache);
|
||||||
|
let cleanup_handle = tokio::spawn(async move {
|
||||||
|
Self::cleanup_loop(cache_clone).await;
|
||||||
|
});
|
||||||
|
|
||||||
|
Self {
|
||||||
|
cache,
|
||||||
|
cleanup_handle: Some(cleanup_handle),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Look up the cached protocol for a backend endpoint.
|
||||||
|
/// Returns `None` if not cached or expired (caller should probe via ALPN).
|
||||||
|
pub fn get(&self, key: &ProtocolCacheKey) -> Option<DetectedProtocol> {
|
||||||
|
let entry = self.cache.get(key)?;
|
||||||
|
if entry.detected_at.elapsed() < PROTOCOL_CACHE_TTL {
|
||||||
|
debug!("Protocol cache hit: {:?} for {}:{} (requested: {:?})", entry.protocol, key.host, key.port, key.requested_host);
|
||||||
|
Some(entry.protocol)
|
||||||
|
} else {
|
||||||
|
// Expired — remove and return None to trigger re-probe
|
||||||
|
drop(entry); // release DashMap ref before remove
|
||||||
|
self.cache.remove(key);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Insert a detected protocol into the cache.
|
||||||
|
/// If the cache is at capacity, evict the oldest entry first.
|
||||||
|
pub fn insert(&self, key: ProtocolCacheKey, protocol: DetectedProtocol) {
|
||||||
|
if self.cache.len() >= PROTOCOL_CACHE_MAX_ENTRIES && !self.cache.contains_key(&key) {
|
||||||
|
// Evict the oldest entry to stay within bounds
|
||||||
|
let oldest = self.cache.iter()
|
||||||
|
.min_by_key(|entry| entry.value().detected_at)
|
||||||
|
.map(|entry| entry.key().clone());
|
||||||
|
if let Some(oldest_key) = oldest {
|
||||||
|
self.cache.remove(&oldest_key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.cache.insert(key, CachedEntry {
|
||||||
|
protocol,
|
||||||
|
detected_at: Instant::now(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Clear all entries. Called on route updates to discard stale detections.
|
||||||
|
pub fn clear(&self) {
|
||||||
|
self.cache.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Background cleanup loop — removes expired entries every `PROTOCOL_CACHE_CLEANUP_INTERVAL`.
|
||||||
|
async fn cleanup_loop(cache: Arc<DashMap<ProtocolCacheKey, CachedEntry>>) {
|
||||||
|
let mut interval = tokio::time::interval(PROTOCOL_CACHE_CLEANUP_INTERVAL);
|
||||||
|
loop {
|
||||||
|
interval.tick().await;
|
||||||
|
|
||||||
|
let expired: Vec<ProtocolCacheKey> = cache.iter()
|
||||||
|
.filter(|entry| entry.value().detected_at.elapsed() >= PROTOCOL_CACHE_TTL)
|
||||||
|
.map(|entry| entry.key().clone())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if !expired.is_empty() {
|
||||||
|
debug!("Protocol cache cleanup: removing {} expired entries", expired.len());
|
||||||
|
for key in expired {
|
||||||
|
cache.remove(&key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for ProtocolCache {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
if let Some(handle) = self.cleanup_handle.take() {
|
||||||
|
handle.abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -8,8 +8,10 @@ use std::collections::HashMap;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::sync::atomic::{AtomicU64, Ordering};
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
|
|
||||||
|
use arc_swap::ArcSwap;
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use dashmap::DashMap;
|
use dashmap::DashMap;
|
||||||
|
use http_body::Body as HttpBody;
|
||||||
use http_body_util::{BodyExt, Full, combinators::BoxBody};
|
use http_body_util::{BodyExt, Full, combinators::BoxBody};
|
||||||
use hyper::body::Incoming;
|
use hyper::body::Incoming;
|
||||||
use hyper::{Request, Response, StatusCode};
|
use hyper::{Request, Response, StatusCode};
|
||||||
@@ -133,7 +135,7 @@ async fn connect_tls_backend(
|
|||||||
|
|
||||||
/// HTTP proxy service that processes HTTP traffic.
|
/// HTTP proxy service that processes HTTP traffic.
|
||||||
pub struct HttpProxyService {
|
pub struct HttpProxyService {
|
||||||
route_manager: Arc<RouteManager>,
|
route_manager: Arc<ArcSwap<RouteManager>>,
|
||||||
metrics: Arc<MetricsCollector>,
|
metrics: Arc<MetricsCollector>,
|
||||||
upstream_selector: UpstreamSelector,
|
upstream_selector: UpstreamSelector,
|
||||||
/// Timeout for connecting to upstream backends.
|
/// Timeout for connecting to upstream backends.
|
||||||
@@ -146,8 +148,12 @@ pub struct HttpProxyService {
|
|||||||
regex_cache: DashMap<String, Regex>,
|
regex_cache: DashMap<String, Regex>,
|
||||||
/// Shared backend TLS config for session resumption across connections.
|
/// Shared backend TLS config for session resumption across connections.
|
||||||
backend_tls_config: Arc<rustls::ClientConfig>,
|
backend_tls_config: Arc<rustls::ClientConfig>,
|
||||||
|
/// Backend TLS config with ALPN h2+http/1.1 for auto-detection mode.
|
||||||
|
backend_tls_config_alpn: Arc<rustls::ClientConfig>,
|
||||||
/// Backend connection pool for reusing keep-alive connections.
|
/// Backend connection pool for reusing keep-alive connections.
|
||||||
connection_pool: Arc<crate::connection_pool::ConnectionPool>,
|
connection_pool: Arc<crate::connection_pool::ConnectionPool>,
|
||||||
|
/// Protocol detection cache for auto mode (caches ALPN-detected protocol per backend).
|
||||||
|
protocol_cache: Arc<crate::protocol_cache::ProtocolCache>,
|
||||||
/// HTTP keep-alive idle timeout: close connection if no new request arrives within this duration.
|
/// HTTP keep-alive idle timeout: close connection if no new request arrives within this duration.
|
||||||
http_idle_timeout: std::time::Duration,
|
http_idle_timeout: std::time::Duration,
|
||||||
/// WebSocket inactivity timeout (no data in either direction).
|
/// WebSocket inactivity timeout (no data in either direction).
|
||||||
@@ -157,7 +163,7 @@ pub struct HttpProxyService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl HttpProxyService {
|
impl HttpProxyService {
|
||||||
pub fn new(route_manager: Arc<RouteManager>, metrics: Arc<MetricsCollector>) -> Self {
|
pub fn new(route_manager: Arc<ArcSwap<RouteManager>>, metrics: Arc<MetricsCollector>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
route_manager,
|
route_manager,
|
||||||
metrics,
|
metrics,
|
||||||
@@ -167,7 +173,9 @@ impl HttpProxyService {
|
|||||||
request_counter: AtomicU64::new(0),
|
request_counter: AtomicU64::new(0),
|
||||||
regex_cache: DashMap::new(),
|
regex_cache: DashMap::new(),
|
||||||
backend_tls_config: Self::default_backend_tls_config(),
|
backend_tls_config: Self::default_backend_tls_config(),
|
||||||
|
backend_tls_config_alpn: Self::default_backend_tls_config_with_alpn(),
|
||||||
connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()),
|
connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()),
|
||||||
|
protocol_cache: Arc::new(crate::protocol_cache::ProtocolCache::new()),
|
||||||
http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT,
|
http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT,
|
||||||
ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT,
|
ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT,
|
||||||
ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME,
|
ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME,
|
||||||
@@ -176,7 +184,7 @@ impl HttpProxyService {
|
|||||||
|
|
||||||
/// Create with a custom connect timeout.
|
/// Create with a custom connect timeout.
|
||||||
pub fn with_connect_timeout(
|
pub fn with_connect_timeout(
|
||||||
route_manager: Arc<RouteManager>,
|
route_manager: Arc<ArcSwap<RouteManager>>,
|
||||||
metrics: Arc<MetricsCollector>,
|
metrics: Arc<MetricsCollector>,
|
||||||
connect_timeout: std::time::Duration,
|
connect_timeout: std::time::Duration,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
@@ -189,7 +197,9 @@ impl HttpProxyService {
|
|||||||
request_counter: AtomicU64::new(0),
|
request_counter: AtomicU64::new(0),
|
||||||
regex_cache: DashMap::new(),
|
regex_cache: DashMap::new(),
|
||||||
backend_tls_config: Self::default_backend_tls_config(),
|
backend_tls_config: Self::default_backend_tls_config(),
|
||||||
|
backend_tls_config_alpn: Self::default_backend_tls_config_with_alpn(),
|
||||||
connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()),
|
connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()),
|
||||||
|
protocol_cache: Arc::new(crate::protocol_cache::ProtocolCache::new()),
|
||||||
http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT,
|
http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT,
|
||||||
ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT,
|
ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT,
|
||||||
ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME,
|
ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME,
|
||||||
@@ -215,12 +225,18 @@ impl HttpProxyService {
|
|||||||
self.backend_tls_config = config;
|
self.backend_tls_config = config;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the shared backend TLS config with ALPN h2+http/1.1 (for auto-detection mode).
|
||||||
|
pub fn set_backend_tls_config_alpn(&mut self, config: Arc<rustls::ClientConfig>) {
|
||||||
|
self.backend_tls_config_alpn = config;
|
||||||
|
}
|
||||||
|
|
||||||
/// Prune caches for route IDs that are no longer active.
|
/// Prune caches for route IDs that are no longer active.
|
||||||
/// Call after route updates to prevent unbounded growth.
|
/// Call after route updates to prevent unbounded growth.
|
||||||
pub fn prune_stale_routes(&self, active_route_ids: &std::collections::HashSet<String>) {
|
pub fn prune_stale_routes(&self, active_route_ids: &std::collections::HashSet<String>) {
|
||||||
self.route_rate_limiters.retain(|k, _| active_route_ids.contains(k));
|
self.route_rate_limiters.retain(|k, _| active_route_ids.contains(k));
|
||||||
self.regex_cache.clear();
|
self.regex_cache.clear();
|
||||||
self.upstream_selector.reset_round_robin();
|
self.upstream_selector.reset_round_robin();
|
||||||
|
self.protocol_cache.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handle an incoming HTTP connection on a plain TCP stream.
|
/// Handle an incoming HTTP connection on a plain TCP stream.
|
||||||
@@ -391,7 +407,8 @@ impl HttpProxyService {
|
|||||||
protocol: Some("http"),
|
protocol: Some("http"),
|
||||||
};
|
};
|
||||||
|
|
||||||
let route_match = match self.route_manager.find_route(&ctx) {
|
let current_rm = self.route_manager.load();
|
||||||
|
let route_match = match current_rm.find_route(&ctx) {
|
||||||
Some(rm) => rm,
|
Some(rm) => rm,
|
||||||
None => {
|
None => {
|
||||||
debug!("No route matched for HTTP request to {:?}{}", host, path);
|
debug!("No route matched for HTTP request to {:?}{}", host, path);
|
||||||
@@ -480,11 +497,11 @@ impl HttpProxyService {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine backend protocol
|
// Determine backend protocol mode
|
||||||
let use_h2 = route_match.route.action.options.as_ref()
|
let backend_protocol_mode = route_match.route.action.options.as_ref()
|
||||||
.and_then(|o| o.backend_protocol.as_ref())
|
.and_then(|o| o.backend_protocol.as_ref())
|
||||||
.map(|p| *p == rustproxy_config::BackendProtocol::Http2)
|
.cloned()
|
||||||
.unwrap_or(false);
|
.unwrap_or(rustproxy_config::BackendProtocol::Auto);
|
||||||
|
|
||||||
// Build the upstream path (path + query), applying URL rewriting if configured
|
// Build the upstream path (path + query), applying URL rewriting if configured
|
||||||
let upstream_path = {
|
let upstream_path = {
|
||||||
@@ -564,7 +581,33 @@ impl HttpProxyService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Resolve protocol decision based on backend protocol mode ---
|
||||||
|
let is_auto_detect_mode = matches!(backend_protocol_mode, rustproxy_config::BackendProtocol::Auto);
|
||||||
|
let (use_h2, needs_alpn_probe) = match backend_protocol_mode {
|
||||||
|
rustproxy_config::BackendProtocol::Http1 => (false, false),
|
||||||
|
rustproxy_config::BackendProtocol::Http2 => (true, false),
|
||||||
|
rustproxy_config::BackendProtocol::Auto => {
|
||||||
|
if !upstream.use_tls {
|
||||||
|
// No ALPN without TLS — default to H1
|
||||||
|
(false, false)
|
||||||
|
} else {
|
||||||
|
let cache_key = crate::protocol_cache::ProtocolCacheKey {
|
||||||
|
host: upstream.host.clone(),
|
||||||
|
port: upstream.port,
|
||||||
|
requested_host: host.clone(),
|
||||||
|
};
|
||||||
|
match self.protocol_cache.get(&cache_key) {
|
||||||
|
Some(crate::protocol_cache::DetectedProtocol::H2) => (true, false),
|
||||||
|
Some(crate::protocol_cache::DetectedProtocol::H1) => (false, false),
|
||||||
|
None => (false, true), // needs ALPN probe
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// --- Connection pooling: try reusing an existing connection first ---
|
// --- Connection pooling: try reusing an existing connection first ---
|
||||||
|
// For ALPN probe mode, skip pool checkout (we don't know the protocol yet)
|
||||||
|
if !needs_alpn_probe {
|
||||||
let pool_key = crate::connection_pool::PoolKey {
|
let pool_key = crate::connection_pool::PoolKey {
|
||||||
host: upstream.host.clone(),
|
host: upstream.host.clone(),
|
||||||
port: upstream.port,
|
port: upstream.port,
|
||||||
@@ -572,10 +615,11 @@ impl HttpProxyService {
|
|||||||
h2: use_h2,
|
h2: use_h2,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Try pooled connection first (H2 only — H2 senders are Clone and multiplexed,
|
// H2 pool checkout (H2 senders are Clone and multiplexed)
|
||||||
// so checkout doesn't consume request parts. For H1, we try pool inside forward_h1.)
|
|
||||||
if use_h2 {
|
if use_h2 {
|
||||||
if let Some(sender) = self.connection_pool.checkout_h2(&pool_key) {
|
if let Some(sender) = self.connection_pool.checkout_h2(&pool_key) {
|
||||||
|
self.metrics.backend_pool_hit(&upstream_key);
|
||||||
|
self.metrics.set_backend_protocol(&upstream_key, "h2");
|
||||||
let result = self.forward_h2_pooled(
|
let result = self.forward_h2_pooled(
|
||||||
sender, parts, body, upstream_headers, &upstream_path,
|
sender, parts, body, upstream_headers, &upstream_path,
|
||||||
route_match.route, route_id, &ip_str, &pool_key,
|
route_match.route, route_id, &ip_str, &pool_key,
|
||||||
@@ -584,21 +628,77 @@ impl HttpProxyService {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Fresh connection path
|
// --- Fresh connection path ---
|
||||||
let backend = if upstream.use_tls {
|
self.metrics.backend_pool_miss(&upstream_key);
|
||||||
|
|
||||||
|
// Choose TLS config: use ALPN config for auto-detect probe, plain config otherwise
|
||||||
|
let tls_config = if needs_alpn_probe {
|
||||||
|
&self.backend_tls_config_alpn
|
||||||
|
} else {
|
||||||
|
&self.backend_tls_config
|
||||||
|
};
|
||||||
|
|
||||||
|
// Establish backend connection
|
||||||
|
let connect_start = std::time::Instant::now();
|
||||||
|
let (backend, detected_h2) = if upstream.use_tls {
|
||||||
match tokio::time::timeout(
|
match tokio::time::timeout(
|
||||||
self.connect_timeout,
|
self.connect_timeout,
|
||||||
connect_tls_backend(&self.backend_tls_config, &upstream.host, upstream.port),
|
connect_tls_backend(tls_config, &upstream.host, upstream.port),
|
||||||
).await {
|
).await {
|
||||||
Ok(Ok(tls)) => BackendStream::Tls(tls),
|
Ok(Ok(tls)) => {
|
||||||
|
let final_h2 = if needs_alpn_probe {
|
||||||
|
// Read the ALPN-negotiated protocol from the TLS connection
|
||||||
|
let alpn = tls.get_ref().1.alpn_protocol();
|
||||||
|
let is_h2 = alpn.map(|p| p == b"h2").unwrap_or(false);
|
||||||
|
|
||||||
|
// Cache the result
|
||||||
|
let cache_key = crate::protocol_cache::ProtocolCacheKey {
|
||||||
|
host: upstream.host.clone(),
|
||||||
|
port: upstream.port,
|
||||||
|
requested_host: host.clone(),
|
||||||
|
};
|
||||||
|
let detected = if is_h2 {
|
||||||
|
crate::protocol_cache::DetectedProtocol::H2
|
||||||
|
} else {
|
||||||
|
crate::protocol_cache::DetectedProtocol::H1
|
||||||
|
};
|
||||||
|
self.protocol_cache.insert(cache_key, detected);
|
||||||
|
|
||||||
|
info!(
|
||||||
|
backend = %upstream_key,
|
||||||
|
protocol = if is_h2 { "h2" } else { "h1" },
|
||||||
|
connect_time_ms = %connect_start.elapsed().as_millis(),
|
||||||
|
"Backend protocol detected via ALPN"
|
||||||
|
);
|
||||||
|
|
||||||
|
is_h2
|
||||||
|
} else {
|
||||||
|
use_h2
|
||||||
|
};
|
||||||
|
self.metrics.backend_connection_opened(&upstream_key, connect_start.elapsed());
|
||||||
|
self.metrics.set_backend_protocol(&upstream_key, if final_h2 { "h2" } else { "h1" });
|
||||||
|
(BackendStream::Tls(tls), final_h2)
|
||||||
|
}
|
||||||
Ok(Err(e)) => {
|
Ok(Err(e)) => {
|
||||||
error!("Failed TLS connect to upstream {}:{}: {}", upstream.host, upstream.port, e);
|
error!(
|
||||||
|
backend = %upstream_key,
|
||||||
|
connect_time_ms = %connect_start.elapsed().as_millis(),
|
||||||
|
error = %e,
|
||||||
|
"Backend TLS connect failed"
|
||||||
|
);
|
||||||
|
self.metrics.backend_connect_error(&upstream_key);
|
||||||
self.upstream_selector.connection_ended(&upstream_key);
|
self.upstream_selector.connection_ended(&upstream_key);
|
||||||
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend TLS unavailable"));
|
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend TLS unavailable"));
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
error!("Upstream TLS connect timeout for {}:{}", upstream.host, upstream.port);
|
error!(
|
||||||
|
backend = %upstream_key,
|
||||||
|
connect_time_ms = %connect_start.elapsed().as_millis(),
|
||||||
|
"Backend TLS connect timeout"
|
||||||
|
);
|
||||||
|
self.metrics.backend_connect_error(&upstream_key);
|
||||||
self.upstream_selector.connection_ended(&upstream_key);
|
self.upstream_selector.connection_ended(&upstream_key);
|
||||||
return Ok(error_response(StatusCode::GATEWAY_TIMEOUT, "Backend TLS connect timeout"));
|
return Ok(error_response(StatusCode::GATEWAY_TIMEOUT, "Backend TLS connect timeout"));
|
||||||
}
|
}
|
||||||
@@ -613,29 +713,66 @@ impl HttpProxyService {
|
|||||||
let _ = socket2::SockRef::from(&s).set_tcp_keepalive(
|
let _ = socket2::SockRef::from(&s).set_tcp_keepalive(
|
||||||
&socket2::TcpKeepalive::new().with_time(std::time::Duration::from_secs(60))
|
&socket2::TcpKeepalive::new().with_time(std::time::Duration::from_secs(60))
|
||||||
);
|
);
|
||||||
BackendStream::Plain(s)
|
self.metrics.backend_connection_opened(&upstream_key, connect_start.elapsed());
|
||||||
|
self.metrics.set_backend_protocol(&upstream_key, if use_h2 { "h2" } else { "h1" });
|
||||||
|
(BackendStream::Plain(s), use_h2)
|
||||||
}
|
}
|
||||||
Ok(Err(e)) => {
|
Ok(Err(e)) => {
|
||||||
error!("Failed to connect to upstream {}:{}: {}", upstream.host, upstream.port, e);
|
error!(
|
||||||
|
backend = %upstream_key,
|
||||||
|
connect_time_ms = %connect_start.elapsed().as_millis(),
|
||||||
|
error = %e,
|
||||||
|
"Backend TCP connect failed"
|
||||||
|
);
|
||||||
|
self.metrics.backend_connect_error(&upstream_key);
|
||||||
self.upstream_selector.connection_ended(&upstream_key);
|
self.upstream_selector.connection_ended(&upstream_key);
|
||||||
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend unavailable"));
|
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend unavailable"));
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
error!("Upstream connect timeout for {}:{}", upstream.host, upstream.port);
|
error!(
|
||||||
|
backend = %upstream_key,
|
||||||
|
connect_time_ms = %connect_start.elapsed().as_millis(),
|
||||||
|
"Backend TCP connect timeout"
|
||||||
|
);
|
||||||
|
self.metrics.backend_connect_error(&upstream_key);
|
||||||
self.upstream_selector.connection_ended(&upstream_key);
|
self.upstream_selector.connection_ended(&upstream_key);
|
||||||
return Ok(error_response(StatusCode::GATEWAY_TIMEOUT, "Backend connect timeout"));
|
return Ok(error_response(StatusCode::GATEWAY_TIMEOUT, "Backend connect timeout"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let final_pool_key = crate::connection_pool::PoolKey {
|
||||||
|
host: upstream.host.clone(),
|
||||||
|
port: upstream.port,
|
||||||
|
use_tls: upstream.use_tls,
|
||||||
|
h2: detected_h2,
|
||||||
|
};
|
||||||
|
|
||||||
let io = TokioIo::new(backend);
|
let io = TokioIo::new(backend);
|
||||||
|
|
||||||
let result = if use_h2 {
|
let result = if detected_h2 {
|
||||||
self.forward_h2(io, parts, body, upstream_headers, &upstream_path, &upstream, route_match.route, route_id, &ip_str, &pool_key).await
|
if is_auto_detect_mode {
|
||||||
|
// Auto-detect mode: use fallback-capable H2 forwarding
|
||||||
|
self.forward_h2_with_fallback(
|
||||||
|
io, parts, body, upstream_headers, &upstream_path,
|
||||||
|
&upstream, route_match.route, route_id, &ip_str, &final_pool_key,
|
||||||
|
host.clone(),
|
||||||
|
).await
|
||||||
} else {
|
} else {
|
||||||
self.forward_h1(io, parts, body, upstream_headers, &upstream_path, &upstream, route_match.route, route_id, &ip_str, &pool_key).await
|
// Explicit H2 mode: hard-fail on handshake error (preserved behavior)
|
||||||
|
self.forward_h2(
|
||||||
|
io, parts, body, upstream_headers, &upstream_path,
|
||||||
|
&upstream, route_match.route, route_id, &ip_str, &final_pool_key,
|
||||||
|
).await
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self.forward_h1(
|
||||||
|
io, parts, body, upstream_headers, &upstream_path,
|
||||||
|
&upstream, route_match.route, route_id, &ip_str, &final_pool_key,
|
||||||
|
).await
|
||||||
};
|
};
|
||||||
self.upstream_selector.connection_ended(&upstream_key);
|
self.upstream_selector.connection_ended(&upstream_key);
|
||||||
|
self.metrics.backend_connection_closed(&upstream_key);
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -654,8 +791,11 @@ impl HttpProxyService {
|
|||||||
source_ip: &str,
|
source_ip: &str,
|
||||||
pool_key: &crate::connection_pool::PoolKey,
|
pool_key: &crate::connection_pool::PoolKey,
|
||||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||||
|
let backend_key = format!("{}:{}", pool_key.host, pool_key.port);
|
||||||
|
|
||||||
// Try pooled H1 connection first — avoids TCP+TLS handshake
|
// Try pooled H1 connection first — avoids TCP+TLS handshake
|
||||||
if let Some(pooled_sender) = self.connection_pool.checkout_h1(pool_key) {
|
if let Some(pooled_sender) = self.connection_pool.checkout_h1(pool_key) {
|
||||||
|
self.metrics.backend_pool_hit(&backend_key);
|
||||||
return self.forward_h1_with_sender(
|
return self.forward_h1_with_sender(
|
||||||
pooled_sender, parts, body, upstream_headers, upstream_path,
|
pooled_sender, parts, body, upstream_headers, upstream_path,
|
||||||
route, route_id, source_ip, pool_key,
|
route, route_id, source_ip, pool_key,
|
||||||
@@ -669,7 +809,8 @@ impl HttpProxyService {
|
|||||||
) = match hyper::client::conn::http1::handshake(io).await {
|
) = match hyper::client::conn::http1::handshake(io).await {
|
||||||
Ok(h) => h,
|
Ok(h) => h,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("Upstream handshake failed: {}", e);
|
error!(backend = %backend_key, error = %e, "Backend H1 handshake failed");
|
||||||
|
self.metrics.backend_handshake_error(&backend_key);
|
||||||
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend handshake failed"));
|
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend handshake failed"));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -721,7 +862,9 @@ impl HttpProxyService {
|
|||||||
let upstream_response = match sender.send_request(upstream_req).await {
|
let upstream_response = match sender.send_request(upstream_req).await {
|
||||||
Ok(resp) => resp,
|
Ok(resp) => resp,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("Upstream request failed: {}", e);
|
let bk = format!("{}:{}", pool_key.host, pool_key.port);
|
||||||
|
error!(backend = %bk, error = %e, "Backend H1 request failed");
|
||||||
|
self.metrics.backend_request_error(&bk);
|
||||||
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend request failed"));
|
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend request failed"));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -747,6 +890,7 @@ impl HttpProxyService {
|
|||||||
source_ip: &str,
|
source_ip: &str,
|
||||||
pool_key: &crate::connection_pool::PoolKey,
|
pool_key: &crate::connection_pool::PoolKey,
|
||||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||||
|
let backend_key = format!("{}:{}", pool_key.host, pool_key.port);
|
||||||
let exec = hyper_util::rt::TokioExecutor::new();
|
let exec = hyper_util::rt::TokioExecutor::new();
|
||||||
// Explicitly type the handshake with BoxBody for uniform pool type
|
// Explicitly type the handshake with BoxBody for uniform pool type
|
||||||
let (sender, conn): (
|
let (sender, conn): (
|
||||||
@@ -755,7 +899,8 @@ impl HttpProxyService {
|
|||||||
) = match hyper::client::conn::http2::handshake(exec, io).await {
|
) = match hyper::client::conn::http2::handshake(exec, io).await {
|
||||||
Ok(h) => h,
|
Ok(h) => h,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("HTTP/2 upstream handshake failed: {}", e);
|
error!(backend = %backend_key, error = %e, "Backend H2 handshake failed");
|
||||||
|
self.metrics.backend_handshake_error(&backend_key);
|
||||||
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H2 handshake failed"));
|
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H2 handshake failed"));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -769,10 +914,12 @@ impl HttpProxyService {
|
|||||||
// Register for multiplexed reuse
|
// Register for multiplexed reuse
|
||||||
self.connection_pool.register_h2(pool_key.clone(), sender.clone());
|
self.connection_pool.register_h2(pool_key.clone(), sender.clone());
|
||||||
|
|
||||||
self.forward_h2_with_sender(sender, parts, body, upstream_headers, upstream_path, route, route_id, source_ip).await
|
self.forward_h2_with_sender(sender, parts, body, upstream_headers, upstream_path, route, route_id, source_ip, Some(pool_key)).await
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Forward request using an existing (pooled) HTTP/2 sender.
|
/// Forward request using an existing (pooled) HTTP/2 sender.
|
||||||
|
/// If the pooled sender is stale (GOAWAY, connection closed), evicts it and retries
|
||||||
|
/// with a fresh connection for bodyless requests (GET/HEAD/DELETE).
|
||||||
async fn forward_h2_pooled(
|
async fn forward_h2_pooled(
|
||||||
&self,
|
&self,
|
||||||
sender: hyper::client::conn::http2::SendRequest<BoxBody<Bytes, hyper::Error>>,
|
sender: hyper::client::conn::http2::SendRequest<BoxBody<Bytes, hyper::Error>>,
|
||||||
@@ -783,9 +930,420 @@ impl HttpProxyService {
|
|||||||
route: &rustproxy_config::RouteConfig,
|
route: &rustproxy_config::RouteConfig,
|
||||||
route_id: Option<&str>,
|
route_id: Option<&str>,
|
||||||
source_ip: &str,
|
source_ip: &str,
|
||||||
_pool_key: &crate::connection_pool::PoolKey,
|
pool_key: &crate::connection_pool::PoolKey,
|
||||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||||
self.forward_h2_with_sender(sender, parts, body, upstream_headers, upstream_path, route, route_id, source_ip).await
|
// Save retry state for bodyless requests (cheap: Method is an enum, HeaderMap clones Arc-backed Bytes)
|
||||||
|
let retry_state = if body.is_end_stream() {
|
||||||
|
Some((parts.method.clone(), upstream_headers.clone()))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let result = self.forward_h2_with_sender(
|
||||||
|
sender, parts, body, upstream_headers, upstream_path,
|
||||||
|
route, route_id, source_ip, Some(pool_key),
|
||||||
|
).await;
|
||||||
|
|
||||||
|
// If the request failed (502) and we can retry with an empty body, do so
|
||||||
|
let is_502 = matches!(&result, Ok(resp) if resp.status() == StatusCode::BAD_GATEWAY);
|
||||||
|
if is_502 {
|
||||||
|
if let Some((method, headers)) = retry_state {
|
||||||
|
warn!("Stale pooled H2 sender for {}:{}, retrying with fresh connection",
|
||||||
|
pool_key.host, pool_key.port);
|
||||||
|
return self.retry_h2_with_fresh_connection(
|
||||||
|
method, headers, upstream_path,
|
||||||
|
pool_key, route, route_id, source_ip,
|
||||||
|
).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Retry an H2 request with a fresh backend connection and empty body.
|
||||||
|
/// Used when a pooled sender was stale (GOAWAY/closed) and the original body was empty.
|
||||||
|
async fn retry_h2_with_fresh_connection(
|
||||||
|
&self,
|
||||||
|
method: hyper::Method,
|
||||||
|
upstream_headers: hyper::HeaderMap,
|
||||||
|
upstream_path: &str,
|
||||||
|
pool_key: &crate::connection_pool::PoolKey,
|
||||||
|
route: &rustproxy_config::RouteConfig,
|
||||||
|
route_id: Option<&str>,
|
||||||
|
source_ip: &str,
|
||||||
|
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||||
|
let backend_key = format!("{}:{}", pool_key.host, pool_key.port);
|
||||||
|
|
||||||
|
// Establish fresh backend connection
|
||||||
|
let retry_connect_start = std::time::Instant::now();
|
||||||
|
let backend = if pool_key.use_tls {
|
||||||
|
match tokio::time::timeout(
|
||||||
|
self.connect_timeout,
|
||||||
|
connect_tls_backend(&self.backend_tls_config, &pool_key.host, pool_key.port),
|
||||||
|
).await {
|
||||||
|
Ok(Ok(tls)) => BackendStream::Tls(tls),
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
error!(backend = %backend_key, error = %e, "H2 retry: TLS connect failed");
|
||||||
|
self.metrics.backend_connect_error(&backend_key);
|
||||||
|
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend unavailable on H2 retry"));
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
error!(backend = %backend_key, "H2 retry: TLS connect timeout");
|
||||||
|
self.metrics.backend_connect_error(&backend_key);
|
||||||
|
return Ok(error_response(StatusCode::GATEWAY_TIMEOUT, "Backend timeout on H2 retry"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
match tokio::time::timeout(
|
||||||
|
self.connect_timeout,
|
||||||
|
TcpStream::connect(format!("{}:{}", pool_key.host, pool_key.port)),
|
||||||
|
).await {
|
||||||
|
Ok(Ok(s)) => {
|
||||||
|
s.set_nodelay(true).ok();
|
||||||
|
BackendStream::Plain(s)
|
||||||
|
}
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
error!(backend = %backend_key, error = %e, "H2 retry: TCP connect failed");
|
||||||
|
self.metrics.backend_connect_error(&backend_key);
|
||||||
|
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend unavailable on H2 retry"));
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
error!(backend = %backend_key, "H2 retry: TCP connect timeout");
|
||||||
|
self.metrics.backend_connect_error(&backend_key);
|
||||||
|
return Ok(error_response(StatusCode::GATEWAY_TIMEOUT, "Backend timeout on H2 retry"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
self.metrics.backend_connection_opened(&backend_key, retry_connect_start.elapsed());
|
||||||
|
|
||||||
|
let io = TokioIo::new(backend);
|
||||||
|
let exec = hyper_util::rt::TokioExecutor::new();
|
||||||
|
let (mut sender, conn): (
|
||||||
|
hyper::client::conn::http2::SendRequest<BoxBody<Bytes, hyper::Error>>,
|
||||||
|
hyper::client::conn::http2::Connection<TokioIo<BackendStream>, BoxBody<Bytes, hyper::Error>, hyper_util::rt::TokioExecutor>,
|
||||||
|
) = match hyper::client::conn::http2::handshake(exec, io).await {
|
||||||
|
Ok(h) => h,
|
||||||
|
Err(e) => {
|
||||||
|
error!(backend = %backend_key, error = %e, "H2 retry: handshake failed");
|
||||||
|
self.metrics.backend_handshake_error(&backend_key);
|
||||||
|
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H2 retry handshake failed"));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
tokio::spawn(async move {
|
||||||
|
if let Err(e) = conn.await {
|
||||||
|
debug!("H2 retry: upstream connection error: {}", e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Register fresh sender in pool for future requests
|
||||||
|
self.connection_pool.register_h2(pool_key.clone(), sender.clone());
|
||||||
|
|
||||||
|
// Build request with empty body
|
||||||
|
let mut upstream_req = Request::builder()
|
||||||
|
.method(method)
|
||||||
|
.uri(upstream_path);
|
||||||
|
|
||||||
|
if let Some(headers) = upstream_req.headers_mut() {
|
||||||
|
*headers = upstream_headers;
|
||||||
|
}
|
||||||
|
|
||||||
|
let empty_body: BoxBody<Bytes, hyper::Error> = BoxBody::new(
|
||||||
|
http_body_util::Empty::new().map_err(|never| match never {})
|
||||||
|
);
|
||||||
|
let upstream_req = upstream_req.body(empty_body).unwrap();
|
||||||
|
|
||||||
|
match sender.send_request(upstream_req).await {
|
||||||
|
Ok(resp) => {
|
||||||
|
let result = self.build_streaming_response(resp, route, route_id, source_ip).await;
|
||||||
|
// Close the fresh backend connection (opened at line 1016 above)
|
||||||
|
self.metrics.backend_connection_closed(&backend_key);
|
||||||
|
result
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!(backend = %backend_key, error = %e, "H2 retry: request failed");
|
||||||
|
self.metrics.backend_request_error(&backend_key);
|
||||||
|
self.connection_pool.remove_h2(pool_key);
|
||||||
|
// Close the fresh backend connection (opened at line 1016 above)
|
||||||
|
self.metrics.backend_connection_closed(&backend_key);
|
||||||
|
Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H2 request failed on retry"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Forward via HTTP/2 with fallback to HTTP/1.1 (auto-detect mode).
|
||||||
|
///
|
||||||
|
/// Handles two failure scenarios:
|
||||||
|
/// 1. H2 handshake fails → reconnects and falls back to H1 (body not consumed yet).
|
||||||
|
/// 2. H2 handshake "succeeds" but request fails (backend advertises h2 via ALPN but
|
||||||
|
/// doesn't actually speak h2) → updates cache to H1, retries as H1 for bodyless
|
||||||
|
/// requests, or returns 502 for requests with bodies.
|
||||||
|
async fn forward_h2_with_fallback(
|
||||||
|
&self,
|
||||||
|
io: TokioIo<BackendStream>,
|
||||||
|
parts: hyper::http::request::Parts,
|
||||||
|
body: Incoming,
|
||||||
|
upstream_headers: hyper::HeaderMap,
|
||||||
|
upstream_path: &str,
|
||||||
|
upstream: &crate::upstream_selector::UpstreamSelection,
|
||||||
|
route: &rustproxy_config::RouteConfig,
|
||||||
|
route_id: Option<&str>,
|
||||||
|
source_ip: &str,
|
||||||
|
pool_key: &crate::connection_pool::PoolKey,
|
||||||
|
requested_host: Option<String>,
|
||||||
|
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||||
|
let exec = hyper_util::rt::TokioExecutor::new();
|
||||||
|
let handshake_result: Result<(
|
||||||
|
hyper::client::conn::http2::SendRequest<BoxBody<Bytes, hyper::Error>>,
|
||||||
|
hyper::client::conn::http2::Connection<TokioIo<BackendStream>, BoxBody<Bytes, hyper::Error>, hyper_util::rt::TokioExecutor>,
|
||||||
|
), hyper::Error> = hyper::client::conn::http2::handshake(exec, io).await;
|
||||||
|
|
||||||
|
match handshake_result {
|
||||||
|
Ok((mut sender, conn)) => {
|
||||||
|
tokio::spawn(async move {
|
||||||
|
if let Err(e) = conn.await {
|
||||||
|
debug!("HTTP/2 upstream connection error: {}", e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Save retry state before consuming parts/body (for bodyless requests like GET)
|
||||||
|
let retry_state = if body.is_end_stream() {
|
||||||
|
Some((parts.method.clone(), upstream_headers.clone()))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
// Build and send the h2 request inline (don't register in pool yet —
|
||||||
|
// we need to verify the request actually succeeds first, because some
|
||||||
|
// backends advertise h2 via ALPN but don't speak the h2 binary protocol).
|
||||||
|
let mut upstream_req = Request::builder()
|
||||||
|
.method(parts.method)
|
||||||
|
.uri(upstream_path);
|
||||||
|
|
||||||
|
if let Some(headers) = upstream_req.headers_mut() {
|
||||||
|
*headers = upstream_headers;
|
||||||
|
}
|
||||||
|
|
||||||
|
let counting_req_body = CountingBody::new(
|
||||||
|
body,
|
||||||
|
Arc::clone(&self.metrics),
|
||||||
|
route_id.map(|s| s.to_string()),
|
||||||
|
Some(source_ip.to_string()),
|
||||||
|
Direction::In,
|
||||||
|
);
|
||||||
|
let boxed_body: BoxBody<Bytes, hyper::Error> = BoxBody::new(counting_req_body);
|
||||||
|
let upstream_req = upstream_req.body(boxed_body).unwrap();
|
||||||
|
|
||||||
|
match sender.send_request(upstream_req).await {
|
||||||
|
Ok(upstream_response) => {
|
||||||
|
// H2 works! Register sender in pool for multiplexed reuse
|
||||||
|
self.connection_pool.register_h2(pool_key.clone(), sender);
|
||||||
|
self.build_streaming_response(upstream_response, route, route_id, source_ip).await
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
// H2 request failed — backend advertises h2 via ALPN but doesn't
|
||||||
|
// actually speak it. Update cache so future requests use H1.
|
||||||
|
let bk = format!("{}:{}", upstream.host, upstream.port);
|
||||||
|
warn!(
|
||||||
|
backend = %bk,
|
||||||
|
error = %e,
|
||||||
|
"Auto-detect: H2 request failed, falling back to H1"
|
||||||
|
);
|
||||||
|
self.metrics.backend_h2_failure(&bk);
|
||||||
|
let cache_key = crate::protocol_cache::ProtocolCacheKey {
|
||||||
|
host: upstream.host.clone(),
|
||||||
|
port: upstream.port,
|
||||||
|
requested_host: requested_host.clone(),
|
||||||
|
};
|
||||||
|
self.protocol_cache.insert(cache_key, crate::protocol_cache::DetectedProtocol::H1);
|
||||||
|
|
||||||
|
// Retry as H1 for bodyless requests; return 502 for requests with bodies
|
||||||
|
if let Some((method, headers)) = retry_state {
|
||||||
|
match self.reconnect_backend(upstream).await {
|
||||||
|
Some(fallback_backend) => {
|
||||||
|
let h1_pool_key = crate::connection_pool::PoolKey {
|
||||||
|
host: upstream.host.clone(),
|
||||||
|
port: upstream.port,
|
||||||
|
use_tls: upstream.use_tls,
|
||||||
|
h2: false,
|
||||||
|
};
|
||||||
|
let fallback_io = TokioIo::new(fallback_backend);
|
||||||
|
let result = self.forward_h1_empty_body(
|
||||||
|
fallback_io, method, headers, upstream_path,
|
||||||
|
route, route_id, source_ip, &h1_pool_key,
|
||||||
|
).await;
|
||||||
|
// Close the reconnected backend connection (opened in reconnect_backend)
|
||||||
|
self.metrics.backend_connection_closed(&bk);
|
||||||
|
result
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
Ok(error_response(StatusCode::BAD_GATEWAY, "Backend unavailable after H2 fallback"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Ok(error_response(StatusCode::BAD_GATEWAY, "Backend protocol mismatch"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
// H2 handshake truly failed — fall back to H1
|
||||||
|
// Body is NOT consumed yet, so we can retry the full request.
|
||||||
|
let bk = format!("{}:{}", upstream.host, upstream.port);
|
||||||
|
warn!(
|
||||||
|
backend = %bk,
|
||||||
|
error = %e,
|
||||||
|
"H2 handshake failed, falling back to H1"
|
||||||
|
);
|
||||||
|
self.metrics.backend_h2_failure(&bk);
|
||||||
|
self.metrics.backend_handshake_error(&bk);
|
||||||
|
|
||||||
|
// Update cache to H1 so subsequent requests skip H2
|
||||||
|
let cache_key = crate::protocol_cache::ProtocolCacheKey {
|
||||||
|
host: upstream.host.clone(),
|
||||||
|
port: upstream.port,
|
||||||
|
requested_host: requested_host.clone(),
|
||||||
|
};
|
||||||
|
self.protocol_cache.insert(cache_key, crate::protocol_cache::DetectedProtocol::H1);
|
||||||
|
|
||||||
|
// Reconnect for H1 (the original io was consumed by the failed h2 handshake)
|
||||||
|
match self.reconnect_backend(upstream).await {
|
||||||
|
Some(fallback_backend) => {
|
||||||
|
let h1_pool_key = crate::connection_pool::PoolKey {
|
||||||
|
host: upstream.host.clone(),
|
||||||
|
port: upstream.port,
|
||||||
|
use_tls: upstream.use_tls,
|
||||||
|
h2: false,
|
||||||
|
};
|
||||||
|
let fallback_io = TokioIo::new(fallback_backend);
|
||||||
|
let result = self.forward_h1(
|
||||||
|
fallback_io, parts, body, upstream_headers, upstream_path,
|
||||||
|
upstream, route, route_id, source_ip, &h1_pool_key,
|
||||||
|
).await;
|
||||||
|
// Close the reconnected backend connection (opened in reconnect_backend)
|
||||||
|
self.metrics.backend_connection_closed(&bk);
|
||||||
|
result
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
Ok(error_response(StatusCode::BAD_GATEWAY, "Backend unavailable after H2 fallback"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Forward a request with an empty body via HTTP/1.1.
|
||||||
|
/// Used when retrying after a failed H2 attempt where the original body was consumed.
|
||||||
|
async fn forward_h1_empty_body(
|
||||||
|
&self,
|
||||||
|
io: TokioIo<BackendStream>,
|
||||||
|
method: hyper::Method,
|
||||||
|
upstream_headers: hyper::HeaderMap,
|
||||||
|
upstream_path: &str,
|
||||||
|
route: &rustproxy_config::RouteConfig,
|
||||||
|
route_id: Option<&str>,
|
||||||
|
source_ip: &str,
|
||||||
|
pool_key: &crate::connection_pool::PoolKey,
|
||||||
|
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||||
|
let backend_key = format!("{}:{}", pool_key.host, pool_key.port);
|
||||||
|
let (mut sender, conn): (
|
||||||
|
hyper::client::conn::http1::SendRequest<BoxBody<Bytes, hyper::Error>>,
|
||||||
|
hyper::client::conn::http1::Connection<TokioIo<BackendStream>, BoxBody<Bytes, hyper::Error>>,
|
||||||
|
) = match hyper::client::conn::http1::handshake(io).await {
|
||||||
|
Ok(h) => h,
|
||||||
|
Err(e) => {
|
||||||
|
error!(backend = %backend_key, error = %e, "H1 fallback: handshake failed");
|
||||||
|
self.metrics.backend_handshake_error(&backend_key);
|
||||||
|
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H1 fallback handshake failed"));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
tokio::spawn(async move {
|
||||||
|
if let Err(e) = conn.await {
|
||||||
|
debug!("H1 fallback: upstream connection error: {}", e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let mut upstream_req = Request::builder()
|
||||||
|
.method(method)
|
||||||
|
.uri(upstream_path)
|
||||||
|
.version(hyper::Version::HTTP_11);
|
||||||
|
|
||||||
|
if let Some(headers) = upstream_req.headers_mut() {
|
||||||
|
*headers = upstream_headers;
|
||||||
|
}
|
||||||
|
|
||||||
|
let empty_body: BoxBody<Bytes, hyper::Error> = BoxBody::new(
|
||||||
|
http_body_util::Empty::new().map_err(|never| match never {})
|
||||||
|
);
|
||||||
|
let upstream_req = upstream_req.body(empty_body).unwrap();
|
||||||
|
|
||||||
|
let upstream_response = match sender.send_request(upstream_req).await {
|
||||||
|
Ok(resp) => resp,
|
||||||
|
Err(e) => {
|
||||||
|
error!(backend = %backend_key, error = %e, "H1 fallback: request failed");
|
||||||
|
self.metrics.backend_request_error(&backend_key);
|
||||||
|
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H1 fallback request failed"));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Return sender to pool for keep-alive reuse
|
||||||
|
self.connection_pool.checkin_h1(pool_key.clone(), sender);
|
||||||
|
|
||||||
|
self.build_streaming_response(upstream_response, route, route_id, source_ip).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reconnect to a backend (used for H2→H1 fallback).
|
||||||
|
async fn reconnect_backend(
|
||||||
|
&self,
|
||||||
|
upstream: &crate::upstream_selector::UpstreamSelection,
|
||||||
|
) -> Option<BackendStream> {
|
||||||
|
let backend_key = format!("{}:{}", upstream.host, upstream.port);
|
||||||
|
let reconnect_start = std::time::Instant::now();
|
||||||
|
if upstream.use_tls {
|
||||||
|
match tokio::time::timeout(
|
||||||
|
self.connect_timeout,
|
||||||
|
connect_tls_backend(&self.backend_tls_config, &upstream.host, upstream.port),
|
||||||
|
).await {
|
||||||
|
Ok(Ok(tls)) => {
|
||||||
|
self.metrics.backend_connection_opened(&backend_key, reconnect_start.elapsed());
|
||||||
|
Some(BackendStream::Tls(tls))
|
||||||
|
}
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
error!(backend = %backend_key, error = %e, "H1 fallback: TLS reconnect failed");
|
||||||
|
self.metrics.backend_connect_error(&backend_key);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
error!(backend = %backend_key, "H1 fallback: TLS reconnect timeout");
|
||||||
|
self.metrics.backend_connect_error(&backend_key);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
match tokio::time::timeout(
|
||||||
|
self.connect_timeout,
|
||||||
|
TcpStream::connect(format!("{}:{}", upstream.host, upstream.port)),
|
||||||
|
).await {
|
||||||
|
Ok(Ok(s)) => {
|
||||||
|
s.set_nodelay(true).ok();
|
||||||
|
let _ = socket2::SockRef::from(&s).set_tcp_keepalive(
|
||||||
|
&socket2::TcpKeepalive::new().with_time(std::time::Duration::from_secs(60))
|
||||||
|
);
|
||||||
|
self.metrics.backend_connection_opened(&backend_key, reconnect_start.elapsed());
|
||||||
|
Some(BackendStream::Plain(s))
|
||||||
|
}
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
error!(backend = %backend_key, error = %e, "H1 fallback: TCP reconnect failed");
|
||||||
|
self.metrics.backend_connect_error(&backend_key);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
error!(backend = %backend_key, "H1 fallback: TCP reconnect timeout");
|
||||||
|
self.metrics.backend_connect_error(&backend_key);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Common H2 forwarding logic used by both fresh and pooled paths.
|
/// Common H2 forwarding logic used by both fresh and pooled paths.
|
||||||
@@ -799,6 +1357,7 @@ impl HttpProxyService {
|
|||||||
route: &rustproxy_config::RouteConfig,
|
route: &rustproxy_config::RouteConfig,
|
||||||
route_id: Option<&str>,
|
route_id: Option<&str>,
|
||||||
source_ip: &str,
|
source_ip: &str,
|
||||||
|
pool_key: Option<&crate::connection_pool::PoolKey>,
|
||||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||||
let mut upstream_req = Request::builder()
|
let mut upstream_req = Request::builder()
|
||||||
.method(parts.method)
|
.method(parts.method)
|
||||||
@@ -823,7 +1382,15 @@ impl HttpProxyService {
|
|||||||
let upstream_response = match sender.send_request(upstream_req).await {
|
let upstream_response = match sender.send_request(upstream_req).await {
|
||||||
Ok(resp) => resp,
|
Ok(resp) => resp,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("HTTP/2 upstream request failed: {}", e);
|
// Evict the dead sender so subsequent requests get fresh connections
|
||||||
|
if let Some(key) = pool_key {
|
||||||
|
let bk = format!("{}:{}", key.host, key.port);
|
||||||
|
error!(backend = %bk, error = %e, "Backend H2 request failed");
|
||||||
|
self.metrics.backend_request_error(&bk);
|
||||||
|
self.connection_pool.remove_h2(key);
|
||||||
|
} else {
|
||||||
|
error!(error = %e, "Backend H2 request failed");
|
||||||
|
}
|
||||||
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H2 request failed"));
|
return Ok(error_response(StatusCode::BAD_GATEWAY, "Backend H2 request failed"));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -1432,6 +1999,18 @@ impl HttpProxyService {
|
|||||||
.with_no_client_auth();
|
.with_no_client_auth();
|
||||||
Arc::new(config)
|
Arc::new(config)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Build a default backend TLS config with ALPN h2+http/1.1 for auto-detection.
|
||||||
|
/// Used as fallback when no shared ALPN config is injected from tls_handler.
|
||||||
|
fn default_backend_tls_config_with_alpn() -> Arc<rustls::ClientConfig> {
|
||||||
|
let _ = rustls::crypto::ring::default_provider().install_default();
|
||||||
|
let mut config = rustls::ClientConfig::builder()
|
||||||
|
.dangerous()
|
||||||
|
.with_custom_certificate_verifier(Arc::new(InsecureBackendVerifier))
|
||||||
|
.with_no_client_auth();
|
||||||
|
config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
|
||||||
|
Arc::new(config)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Insecure certificate verifier for backend TLS connections (fallback only).
|
/// Insecure certificate verifier for backend TLS connections (fallback only).
|
||||||
@@ -1488,7 +2067,7 @@ impl rustls::client::danger::ServerCertVerifier for InsecureBackendVerifier {
|
|||||||
impl Default for HttpProxyService {
|
impl Default for HttpProxyService {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
route_manager: Arc::new(RouteManager::new(vec![])),
|
route_manager: Arc::new(ArcSwap::from(Arc::new(RouteManager::new(vec![])))),
|
||||||
metrics: Arc::new(MetricsCollector::new()),
|
metrics: Arc::new(MetricsCollector::new()),
|
||||||
upstream_selector: UpstreamSelector::new(),
|
upstream_selector: UpstreamSelector::new(),
|
||||||
connect_timeout: DEFAULT_CONNECT_TIMEOUT,
|
connect_timeout: DEFAULT_CONNECT_TIMEOUT,
|
||||||
@@ -1496,7 +2075,9 @@ impl Default for HttpProxyService {
|
|||||||
request_counter: AtomicU64::new(0),
|
request_counter: AtomicU64::new(0),
|
||||||
regex_cache: DashMap::new(),
|
regex_cache: DashMap::new(),
|
||||||
backend_tls_config: Self::default_backend_tls_config(),
|
backend_tls_config: Self::default_backend_tls_config(),
|
||||||
|
backend_tls_config_alpn: Self::default_backend_tls_config_with_alpn(),
|
||||||
connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()),
|
connection_pool: Arc::new(crate::connection_pool::ConnectionPool::new()),
|
||||||
|
protocol_cache: Arc::new(crate::protocol_cache::ProtocolCache::new()),
|
||||||
http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT,
|
http_idle_timeout: DEFAULT_HTTP_IDLE_TIMEOUT,
|
||||||
ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT,
|
ws_inactivity_timeout: DEFAULT_WS_INACTIVITY_TIMEOUT,
|
||||||
ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME,
|
ws_max_lifetime: DEFAULT_WS_MAX_LIFETIME,
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::sync::atomic::{AtomicU64, Ordering};
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use crate::throughput::{ThroughputSample, ThroughputTracker};
|
use crate::throughput::{ThroughputSample, ThroughputTracker};
|
||||||
|
|
||||||
@@ -20,6 +21,7 @@ pub struct Metrics {
|
|||||||
pub throughput_recent_out_bytes_per_sec: u64,
|
pub throughput_recent_out_bytes_per_sec: u64,
|
||||||
pub routes: std::collections::HashMap<String, RouteMetrics>,
|
pub routes: std::collections::HashMap<String, RouteMetrics>,
|
||||||
pub ips: std::collections::HashMap<String, IpMetrics>,
|
pub ips: std::collections::HashMap<String, IpMetrics>,
|
||||||
|
pub backends: std::collections::HashMap<String, BackendMetrics>,
|
||||||
pub throughput_history: Vec<ThroughputSample>,
|
pub throughput_history: Vec<ThroughputSample>,
|
||||||
pub total_http_requests: u64,
|
pub total_http_requests: u64,
|
||||||
pub http_requests_per_sec: u64,
|
pub http_requests_per_sec: u64,
|
||||||
@@ -52,6 +54,23 @@ pub struct IpMetrics {
|
|||||||
pub throughput_out_bytes_per_sec: u64,
|
pub throughput_out_bytes_per_sec: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Per-backend metrics (keyed by "host:port").
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct BackendMetrics {
|
||||||
|
pub active_connections: u64,
|
||||||
|
pub total_connections: u64,
|
||||||
|
pub protocol: String,
|
||||||
|
pub connect_errors: u64,
|
||||||
|
pub handshake_errors: u64,
|
||||||
|
pub request_errors: u64,
|
||||||
|
pub total_connect_time_us: u64,
|
||||||
|
pub connect_count: u64,
|
||||||
|
pub pool_hits: u64,
|
||||||
|
pub pool_misses: u64,
|
||||||
|
pub h2_failures: u64,
|
||||||
|
}
|
||||||
|
|
||||||
/// Statistics snapshot.
|
/// Statistics snapshot.
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
@@ -69,6 +88,9 @@ const DEFAULT_RETENTION_SECONDS: usize = 3600;
|
|||||||
/// Maximum number of IPs to include in a snapshot (top by active connections).
|
/// Maximum number of IPs to include in a snapshot (top by active connections).
|
||||||
const MAX_IPS_IN_SNAPSHOT: usize = 100;
|
const MAX_IPS_IN_SNAPSHOT: usize = 100;
|
||||||
|
|
||||||
|
/// Maximum number of backends to include in a snapshot (top by total connections).
|
||||||
|
const MAX_BACKENDS_IN_SNAPSHOT: usize = 100;
|
||||||
|
|
||||||
/// Metrics collector tracking connections and throughput.
|
/// Metrics collector tracking connections and throughput.
|
||||||
///
|
///
|
||||||
/// Design: The hot path (`record_bytes`) is entirely lock-free — it only touches
|
/// Design: The hot path (`record_bytes`) is entirely lock-free — it only touches
|
||||||
@@ -96,6 +118,19 @@ pub struct MetricsCollector {
|
|||||||
ip_pending_tp: DashMap<String, (AtomicU64, AtomicU64)>,
|
ip_pending_tp: DashMap<String, (AtomicU64, AtomicU64)>,
|
||||||
ip_throughput: DashMap<String, Mutex<ThroughputTracker>>,
|
ip_throughput: DashMap<String, Mutex<ThroughputTracker>>,
|
||||||
|
|
||||||
|
// ── Per-backend tracking (keyed by "host:port") ──
|
||||||
|
backend_active: DashMap<String, AtomicU64>,
|
||||||
|
backend_total: DashMap<String, AtomicU64>,
|
||||||
|
backend_protocol: DashMap<String, String>,
|
||||||
|
backend_connect_errors: DashMap<String, AtomicU64>,
|
||||||
|
backend_handshake_errors: DashMap<String, AtomicU64>,
|
||||||
|
backend_request_errors: DashMap<String, AtomicU64>,
|
||||||
|
backend_connect_time_us: DashMap<String, AtomicU64>,
|
||||||
|
backend_connect_count: DashMap<String, AtomicU64>,
|
||||||
|
backend_pool_hits: DashMap<String, AtomicU64>,
|
||||||
|
backend_pool_misses: DashMap<String, AtomicU64>,
|
||||||
|
backend_h2_failures: DashMap<String, AtomicU64>,
|
||||||
|
|
||||||
// ── HTTP request tracking ──
|
// ── HTTP request tracking ──
|
||||||
total_http_requests: AtomicU64,
|
total_http_requests: AtomicU64,
|
||||||
pending_http_requests: AtomicU64,
|
pending_http_requests: AtomicU64,
|
||||||
@@ -134,6 +169,17 @@ impl MetricsCollector {
|
|||||||
ip_bytes_out: DashMap::new(),
|
ip_bytes_out: DashMap::new(),
|
||||||
ip_pending_tp: DashMap::new(),
|
ip_pending_tp: DashMap::new(),
|
||||||
ip_throughput: DashMap::new(),
|
ip_throughput: DashMap::new(),
|
||||||
|
backend_active: DashMap::new(),
|
||||||
|
backend_total: DashMap::new(),
|
||||||
|
backend_protocol: DashMap::new(),
|
||||||
|
backend_connect_errors: DashMap::new(),
|
||||||
|
backend_handshake_errors: DashMap::new(),
|
||||||
|
backend_request_errors: DashMap::new(),
|
||||||
|
backend_connect_time_us: DashMap::new(),
|
||||||
|
backend_connect_count: DashMap::new(),
|
||||||
|
backend_pool_hits: DashMap::new(),
|
||||||
|
backend_pool_misses: DashMap::new(),
|
||||||
|
backend_h2_failures: DashMap::new(),
|
||||||
total_http_requests: AtomicU64::new(0),
|
total_http_requests: AtomicU64::new(0),
|
||||||
pending_http_requests: AtomicU64::new(0),
|
pending_http_requests: AtomicU64::new(0),
|
||||||
http_request_throughput: Mutex::new(ThroughputTracker::new(retention_seconds)),
|
http_request_throughput: Mutex::new(ThroughputTracker::new(retention_seconds)),
|
||||||
@@ -268,6 +314,113 @@ impl MetricsCollector {
|
|||||||
self.pending_http_requests.fetch_add(1, Ordering::Relaxed);
|
self.pending_http_requests.fetch_add(1, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Per-backend recording methods ──
|
||||||
|
|
||||||
|
/// Record a successful backend connection with its connect duration.
|
||||||
|
pub fn backend_connection_opened(&self, key: &str, connect_time: Duration) {
|
||||||
|
self.backend_active
|
||||||
|
.entry(key.to_string())
|
||||||
|
.or_insert_with(|| AtomicU64::new(0))
|
||||||
|
.fetch_add(1, Ordering::Relaxed);
|
||||||
|
self.backend_total
|
||||||
|
.entry(key.to_string())
|
||||||
|
.or_insert_with(|| AtomicU64::new(0))
|
||||||
|
.fetch_add(1, Ordering::Relaxed);
|
||||||
|
self.backend_connect_time_us
|
||||||
|
.entry(key.to_string())
|
||||||
|
.or_insert_with(|| AtomicU64::new(0))
|
||||||
|
.fetch_add(connect_time.as_micros() as u64, Ordering::Relaxed);
|
||||||
|
self.backend_connect_count
|
||||||
|
.entry(key.to_string())
|
||||||
|
.or_insert_with(|| AtomicU64::new(0))
|
||||||
|
.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record a backend connection closing.
|
||||||
|
pub fn backend_connection_closed(&self, key: &str) {
|
||||||
|
if let Some(counter) = self.backend_active.get(key) {
|
||||||
|
let val = counter.load(Ordering::Relaxed);
|
||||||
|
if val > 0 {
|
||||||
|
counter.fetch_sub(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record a backend connect error (TCP or TLS connect failure/timeout).
|
||||||
|
pub fn backend_connect_error(&self, key: &str) {
|
||||||
|
self.backend_connect_errors
|
||||||
|
.entry(key.to_string())
|
||||||
|
.or_insert_with(|| AtomicU64::new(0))
|
||||||
|
.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record a backend handshake error (H1 or H2 handshake failure).
|
||||||
|
pub fn backend_handshake_error(&self, key: &str) {
|
||||||
|
self.backend_handshake_errors
|
||||||
|
.entry(key.to_string())
|
||||||
|
.or_insert_with(|| AtomicU64::new(0))
|
||||||
|
.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record a backend request error (send_request failure).
|
||||||
|
pub fn backend_request_error(&self, key: &str) {
|
||||||
|
self.backend_request_errors
|
||||||
|
.entry(key.to_string())
|
||||||
|
.or_insert_with(|| AtomicU64::new(0))
|
||||||
|
.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record a connection pool hit for a backend.
|
||||||
|
pub fn backend_pool_hit(&self, key: &str) {
|
||||||
|
self.backend_pool_hits
|
||||||
|
.entry(key.to_string())
|
||||||
|
.or_insert_with(|| AtomicU64::new(0))
|
||||||
|
.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record a connection pool miss for a backend.
|
||||||
|
pub fn backend_pool_miss(&self, key: &str) {
|
||||||
|
self.backend_pool_misses
|
||||||
|
.entry(key.to_string())
|
||||||
|
.or_insert_with(|| AtomicU64::new(0))
|
||||||
|
.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record an H2 failure (h2 attempted but fell back to h1).
|
||||||
|
pub fn backend_h2_failure(&self, key: &str) {
|
||||||
|
self.backend_h2_failures
|
||||||
|
.entry(key.to_string())
|
||||||
|
.or_insert_with(|| AtomicU64::new(0))
|
||||||
|
.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the protocol in use for a backend ("h1" or "h2").
|
||||||
|
pub fn set_backend_protocol(&self, key: &str, protocol: &str) {
|
||||||
|
self.backend_protocol
|
||||||
|
.entry(key.to_string())
|
||||||
|
.and_modify(|v| {
|
||||||
|
if v != protocol {
|
||||||
|
*v = protocol.to_string();
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.or_insert_with(|| protocol.to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Remove per-backend metrics for backends no longer in any route target.
|
||||||
|
pub fn retain_backends(&self, active_backends: &HashSet<String>) {
|
||||||
|
self.backend_active.retain(|k, _| active_backends.contains(k));
|
||||||
|
self.backend_total.retain(|k, _| active_backends.contains(k));
|
||||||
|
self.backend_protocol.retain(|k, _| active_backends.contains(k));
|
||||||
|
self.backend_connect_errors.retain(|k, _| active_backends.contains(k));
|
||||||
|
self.backend_handshake_errors.retain(|k, _| active_backends.contains(k));
|
||||||
|
self.backend_request_errors.retain(|k, _| active_backends.contains(k));
|
||||||
|
self.backend_connect_time_us.retain(|k, _| active_backends.contains(k));
|
||||||
|
self.backend_connect_count.retain(|k, _| active_backends.contains(k));
|
||||||
|
self.backend_pool_hits.retain(|k, _| active_backends.contains(k));
|
||||||
|
self.backend_pool_misses.retain(|k, _| active_backends.contains(k));
|
||||||
|
self.backend_h2_failures.retain(|k, _| active_backends.contains(k));
|
||||||
|
}
|
||||||
|
|
||||||
/// Take a throughput sample on all trackers (cold path, call at 1Hz or configured interval).
|
/// Take a throughput sample on all trackers (cold path, call at 1Hz or configured interval).
|
||||||
///
|
///
|
||||||
/// Drains the lock-free pending counters and feeds the accumulated bytes
|
/// Drains the lock-free pending counters and feeds the accumulated bytes
|
||||||
@@ -488,6 +641,72 @@ impl MetricsCollector {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Collect per-backend metrics, capped at top MAX_BACKENDS_IN_SNAPSHOT by total connections
|
||||||
|
let mut backend_entries: Vec<(String, BackendMetrics)> = Vec::new();
|
||||||
|
for entry in self.backend_total.iter() {
|
||||||
|
let key = entry.key().clone();
|
||||||
|
let total = entry.value().load(Ordering::Relaxed);
|
||||||
|
let active = self.backend_active
|
||||||
|
.get(&key)
|
||||||
|
.map(|c| c.load(Ordering::Relaxed))
|
||||||
|
.unwrap_or(0);
|
||||||
|
let protocol = self.backend_protocol
|
||||||
|
.get(&key)
|
||||||
|
.map(|v| v.value().clone())
|
||||||
|
.unwrap_or_else(|| "unknown".to_string());
|
||||||
|
let connect_errors = self.backend_connect_errors
|
||||||
|
.get(&key)
|
||||||
|
.map(|c| c.load(Ordering::Relaxed))
|
||||||
|
.unwrap_or(0);
|
||||||
|
let handshake_errors = self.backend_handshake_errors
|
||||||
|
.get(&key)
|
||||||
|
.map(|c| c.load(Ordering::Relaxed))
|
||||||
|
.unwrap_or(0);
|
||||||
|
let request_errors = self.backend_request_errors
|
||||||
|
.get(&key)
|
||||||
|
.map(|c| c.load(Ordering::Relaxed))
|
||||||
|
.unwrap_or(0);
|
||||||
|
let total_connect_time_us = self.backend_connect_time_us
|
||||||
|
.get(&key)
|
||||||
|
.map(|c| c.load(Ordering::Relaxed))
|
||||||
|
.unwrap_or(0);
|
||||||
|
let connect_count = self.backend_connect_count
|
||||||
|
.get(&key)
|
||||||
|
.map(|c| c.load(Ordering::Relaxed))
|
||||||
|
.unwrap_or(0);
|
||||||
|
let pool_hits = self.backend_pool_hits
|
||||||
|
.get(&key)
|
||||||
|
.map(|c| c.load(Ordering::Relaxed))
|
||||||
|
.unwrap_or(0);
|
||||||
|
let pool_misses = self.backend_pool_misses
|
||||||
|
.get(&key)
|
||||||
|
.map(|c| c.load(Ordering::Relaxed))
|
||||||
|
.unwrap_or(0);
|
||||||
|
let h2_failures = self.backend_h2_failures
|
||||||
|
.get(&key)
|
||||||
|
.map(|c| c.load(Ordering::Relaxed))
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
|
backend_entries.push((key, BackendMetrics {
|
||||||
|
active_connections: active,
|
||||||
|
total_connections: total,
|
||||||
|
protocol,
|
||||||
|
connect_errors,
|
||||||
|
handshake_errors,
|
||||||
|
request_errors,
|
||||||
|
total_connect_time_us,
|
||||||
|
connect_count,
|
||||||
|
pool_hits,
|
||||||
|
pool_misses,
|
||||||
|
h2_failures,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
// Sort by total connections descending, then cap
|
||||||
|
backend_entries.sort_by(|a, b| b.1.total_connections.cmp(&a.1.total_connections));
|
||||||
|
backend_entries.truncate(MAX_BACKENDS_IN_SNAPSHOT);
|
||||||
|
|
||||||
|
let backends: std::collections::HashMap<String, BackendMetrics> = backend_entries.into_iter().collect();
|
||||||
|
|
||||||
// HTTP request rates
|
// HTTP request rates
|
||||||
let (http_rps, http_rps_recent) = self.http_request_throughput
|
let (http_rps, http_rps_recent) = self.http_request_throughput
|
||||||
.lock()
|
.lock()
|
||||||
@@ -509,6 +728,7 @@ impl MetricsCollector {
|
|||||||
throughput_recent_out_bytes_per_sec: global_recent_out,
|
throughput_recent_out_bytes_per_sec: global_recent_out,
|
||||||
routes,
|
routes,
|
||||||
ips,
|
ips,
|
||||||
|
backends,
|
||||||
throughput_history,
|
throughput_history,
|
||||||
total_http_requests: self.total_http_requests.load(Ordering::Relaxed),
|
total_http_requests: self.total_http_requests.load(Ordering::Relaxed),
|
||||||
http_requests_per_sec: http_rps,
|
http_requests_per_sec: http_rps,
|
||||||
@@ -805,4 +1025,120 @@ mod tests {
|
|||||||
assert_eq!(snapshot.throughput_history[0].bytes_in, 100);
|
assert_eq!(snapshot.throughput_history[0].bytes_in, 100);
|
||||||
assert_eq!(snapshot.throughput_history[4].bytes_in, 500);
|
assert_eq!(snapshot.throughput_history[4].bytes_in, 500);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_backend_metrics_basic() {
|
||||||
|
let collector = MetricsCollector::new();
|
||||||
|
let key = "backend1:8080";
|
||||||
|
|
||||||
|
// Open connections with timing
|
||||||
|
collector.backend_connection_opened(key, Duration::from_millis(15));
|
||||||
|
collector.backend_connection_opened(key, Duration::from_millis(25));
|
||||||
|
|
||||||
|
assert_eq!(collector.backend_active.get(key).unwrap().load(Ordering::Relaxed), 2);
|
||||||
|
assert_eq!(collector.backend_total.get(key).unwrap().load(Ordering::Relaxed), 2);
|
||||||
|
assert_eq!(collector.backend_connect_count.get(key).unwrap().load(Ordering::Relaxed), 2);
|
||||||
|
// 15ms + 25ms = 40ms = 40_000us
|
||||||
|
assert_eq!(collector.backend_connect_time_us.get(key).unwrap().load(Ordering::Relaxed), 40_000);
|
||||||
|
|
||||||
|
// Close one
|
||||||
|
collector.backend_connection_closed(key);
|
||||||
|
assert_eq!(collector.backend_active.get(key).unwrap().load(Ordering::Relaxed), 1);
|
||||||
|
// total stays
|
||||||
|
assert_eq!(collector.backend_total.get(key).unwrap().load(Ordering::Relaxed), 2);
|
||||||
|
|
||||||
|
// Record errors
|
||||||
|
collector.backend_connect_error(key);
|
||||||
|
collector.backend_handshake_error(key);
|
||||||
|
collector.backend_request_error(key);
|
||||||
|
collector.backend_h2_failure(key);
|
||||||
|
collector.backend_pool_hit(key);
|
||||||
|
collector.backend_pool_hit(key);
|
||||||
|
collector.backend_pool_miss(key);
|
||||||
|
|
||||||
|
assert_eq!(collector.backend_connect_errors.get(key).unwrap().load(Ordering::Relaxed), 1);
|
||||||
|
assert_eq!(collector.backend_handshake_errors.get(key).unwrap().load(Ordering::Relaxed), 1);
|
||||||
|
assert_eq!(collector.backend_request_errors.get(key).unwrap().load(Ordering::Relaxed), 1);
|
||||||
|
assert_eq!(collector.backend_h2_failures.get(key).unwrap().load(Ordering::Relaxed), 1);
|
||||||
|
assert_eq!(collector.backend_pool_hits.get(key).unwrap().load(Ordering::Relaxed), 2);
|
||||||
|
assert_eq!(collector.backend_pool_misses.get(key).unwrap().load(Ordering::Relaxed), 1);
|
||||||
|
|
||||||
|
// Protocol
|
||||||
|
collector.set_backend_protocol(key, "h1");
|
||||||
|
assert_eq!(collector.backend_protocol.get(key).unwrap().value(), "h1");
|
||||||
|
collector.set_backend_protocol(key, "h2");
|
||||||
|
assert_eq!(collector.backend_protocol.get(key).unwrap().value(), "h2");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_backend_metrics_in_snapshot() {
|
||||||
|
let collector = MetricsCollector::new();
|
||||||
|
|
||||||
|
collector.backend_connection_opened("b1:443", Duration::from_millis(10));
|
||||||
|
collector.backend_connection_opened("b2:8080", Duration::from_millis(20));
|
||||||
|
collector.set_backend_protocol("b1:443", "h2");
|
||||||
|
collector.set_backend_protocol("b2:8080", "h1");
|
||||||
|
collector.backend_connect_error("b1:443");
|
||||||
|
|
||||||
|
let snapshot = collector.snapshot();
|
||||||
|
assert_eq!(snapshot.backends.len(), 2);
|
||||||
|
|
||||||
|
let b1 = snapshot.backends.get("b1:443").unwrap();
|
||||||
|
assert_eq!(b1.active_connections, 1);
|
||||||
|
assert_eq!(b1.total_connections, 1);
|
||||||
|
assert_eq!(b1.protocol, "h2");
|
||||||
|
assert_eq!(b1.connect_errors, 1);
|
||||||
|
assert_eq!(b1.total_connect_time_us, 10_000);
|
||||||
|
assert_eq!(b1.connect_count, 1);
|
||||||
|
|
||||||
|
let b2 = snapshot.backends.get("b2:8080").unwrap();
|
||||||
|
assert_eq!(b2.protocol, "h1");
|
||||||
|
assert_eq!(b2.connect_errors, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_retain_backends_prunes_stale() {
|
||||||
|
let collector = MetricsCollector::new();
|
||||||
|
|
||||||
|
collector.backend_connection_opened("active:443", Duration::from_millis(5));
|
||||||
|
collector.backend_connection_opened("stale:8080", Duration::from_millis(10));
|
||||||
|
collector.set_backend_protocol("active:443", "h1");
|
||||||
|
collector.set_backend_protocol("stale:8080", "h2");
|
||||||
|
collector.backend_connect_error("stale:8080");
|
||||||
|
|
||||||
|
let active = HashSet::from(["active:443".to_string()]);
|
||||||
|
collector.retain_backends(&active);
|
||||||
|
|
||||||
|
// active:443 should still exist
|
||||||
|
assert!(collector.backend_total.get("active:443").is_some());
|
||||||
|
assert!(collector.backend_protocol.get("active:443").is_some());
|
||||||
|
|
||||||
|
// stale:8080 should be fully removed
|
||||||
|
assert!(collector.backend_active.get("stale:8080").is_none());
|
||||||
|
assert!(collector.backend_total.get("stale:8080").is_none());
|
||||||
|
assert!(collector.backend_protocol.get("stale:8080").is_none());
|
||||||
|
assert!(collector.backend_connect_errors.get("stale:8080").is_none());
|
||||||
|
assert!(collector.backend_connect_time_us.get("stale:8080").is_none());
|
||||||
|
assert!(collector.backend_connect_count.get("stale:8080").is_none());
|
||||||
|
assert!(collector.backend_pool_hits.get("stale:8080").is_none());
|
||||||
|
assert!(collector.backend_pool_misses.get("stale:8080").is_none());
|
||||||
|
assert!(collector.backend_h2_failures.get("stale:8080").is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_backend_connection_closed_saturates() {
|
||||||
|
let collector = MetricsCollector::new();
|
||||||
|
let key = "b:80";
|
||||||
|
|
||||||
|
// Close without opening — should not underflow
|
||||||
|
collector.backend_connection_closed(key);
|
||||||
|
// No entry created
|
||||||
|
assert!(collector.backend_active.get(key).is_none());
|
||||||
|
|
||||||
|
// Open one, close two — should saturate at 0
|
||||||
|
collector.backend_connection_opened(key, Duration::from_millis(1));
|
||||||
|
collector.backend_connection_closed(key);
|
||||||
|
collector.backend_connection_closed(key);
|
||||||
|
assert_eq!(collector.backend_active.get(key).unwrap().load(Ordering::Relaxed), 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use arc_swap::ArcSwap;
|
use arc_swap::ArcSwap;
|
||||||
|
use dashmap::DashMap;
|
||||||
use tokio::net::TcpListener;
|
use tokio::net::TcpListener;
|
||||||
use tokio_rustls::TlsAcceptor;
|
use tokio_rustls::TlsAcceptor;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
@@ -162,18 +163,23 @@ pub struct TcpListenerManager {
|
|||||||
socket_handler_relay: Arc<std::sync::RwLock<Option<String>>>,
|
socket_handler_relay: Arc<std::sync::RwLock<Option<String>>>,
|
||||||
/// Global connection semaphore — limits total simultaneous connections.
|
/// Global connection semaphore — limits total simultaneous connections.
|
||||||
conn_semaphore: Arc<tokio::sync::Semaphore>,
|
conn_semaphore: Arc<tokio::sync::Semaphore>,
|
||||||
|
/// Per-route cancellation tokens (child of cancel_token).
|
||||||
|
/// When a route is removed, its token is cancelled, terminating all connections on that route.
|
||||||
|
route_cancels: Arc<DashMap<String, CancellationToken>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TcpListenerManager {
|
impl TcpListenerManager {
|
||||||
pub fn new(route_manager: Arc<RouteManager>) -> Self {
|
pub fn new(route_manager: Arc<RouteManager>) -> Self {
|
||||||
let metrics = Arc::new(MetricsCollector::new());
|
let metrics = Arc::new(MetricsCollector::new());
|
||||||
let conn_config = ConnectionConfig::default();
|
let conn_config = ConnectionConfig::default();
|
||||||
|
let route_manager_swap = Arc::new(ArcSwap::from(route_manager));
|
||||||
let mut http_proxy_svc = HttpProxyService::with_connect_timeout(
|
let mut http_proxy_svc = HttpProxyService::with_connect_timeout(
|
||||||
Arc::clone(&route_manager),
|
Arc::clone(&route_manager_swap),
|
||||||
Arc::clone(&metrics),
|
Arc::clone(&metrics),
|
||||||
std::time::Duration::from_millis(conn_config.connection_timeout_ms),
|
std::time::Duration::from_millis(conn_config.connection_timeout_ms),
|
||||||
);
|
);
|
||||||
http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config());
|
http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config());
|
||||||
|
http_proxy_svc.set_backend_tls_config_alpn(tls_handler::shared_backend_tls_config_alpn());
|
||||||
http_proxy_svc.set_connection_timeouts(
|
http_proxy_svc.set_connection_timeouts(
|
||||||
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
|
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
|
||||||
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
|
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
|
||||||
@@ -187,7 +193,7 @@ impl TcpListenerManager {
|
|||||||
let max_conns = conn_config.max_connections as usize;
|
let max_conns = conn_config.max_connections as usize;
|
||||||
Self {
|
Self {
|
||||||
listeners: HashMap::new(),
|
listeners: HashMap::new(),
|
||||||
route_manager: Arc::new(ArcSwap::from(route_manager)),
|
route_manager: route_manager_swap,
|
||||||
metrics,
|
metrics,
|
||||||
tls_configs: Arc::new(ArcSwap::from(Arc::new(HashMap::new()))),
|
tls_configs: Arc::new(ArcSwap::from(Arc::new(HashMap::new()))),
|
||||||
shared_tls_acceptor: Arc::new(ArcSwap::from(Arc::new(None))),
|
shared_tls_acceptor: Arc::new(ArcSwap::from(Arc::new(None))),
|
||||||
@@ -197,18 +203,21 @@ impl TcpListenerManager {
|
|||||||
cancel_token: CancellationToken::new(),
|
cancel_token: CancellationToken::new(),
|
||||||
socket_handler_relay: Arc::new(std::sync::RwLock::new(None)),
|
socket_handler_relay: Arc::new(std::sync::RwLock::new(None)),
|
||||||
conn_semaphore: Arc::new(tokio::sync::Semaphore::new(max_conns)),
|
conn_semaphore: Arc::new(tokio::sync::Semaphore::new(max_conns)),
|
||||||
|
route_cancels: Arc::new(DashMap::new()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create with a metrics collector.
|
/// Create with a metrics collector.
|
||||||
pub fn with_metrics(route_manager: Arc<RouteManager>, metrics: Arc<MetricsCollector>) -> Self {
|
pub fn with_metrics(route_manager: Arc<RouteManager>, metrics: Arc<MetricsCollector>) -> Self {
|
||||||
let conn_config = ConnectionConfig::default();
|
let conn_config = ConnectionConfig::default();
|
||||||
|
let route_manager_swap = Arc::new(ArcSwap::from(route_manager));
|
||||||
let mut http_proxy_svc = HttpProxyService::with_connect_timeout(
|
let mut http_proxy_svc = HttpProxyService::with_connect_timeout(
|
||||||
Arc::clone(&route_manager),
|
Arc::clone(&route_manager_swap),
|
||||||
Arc::clone(&metrics),
|
Arc::clone(&metrics),
|
||||||
std::time::Duration::from_millis(conn_config.connection_timeout_ms),
|
std::time::Duration::from_millis(conn_config.connection_timeout_ms),
|
||||||
);
|
);
|
||||||
http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config());
|
http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config());
|
||||||
|
http_proxy_svc.set_backend_tls_config_alpn(tls_handler::shared_backend_tls_config_alpn());
|
||||||
http_proxy_svc.set_connection_timeouts(
|
http_proxy_svc.set_connection_timeouts(
|
||||||
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
|
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
|
||||||
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
|
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
|
||||||
@@ -222,7 +231,7 @@ impl TcpListenerManager {
|
|||||||
let max_conns = conn_config.max_connections as usize;
|
let max_conns = conn_config.max_connections as usize;
|
||||||
Self {
|
Self {
|
||||||
listeners: HashMap::new(),
|
listeners: HashMap::new(),
|
||||||
route_manager: Arc::new(ArcSwap::from(route_manager)),
|
route_manager: route_manager_swap,
|
||||||
metrics,
|
metrics,
|
||||||
tls_configs: Arc::new(ArcSwap::from(Arc::new(HashMap::new()))),
|
tls_configs: Arc::new(ArcSwap::from(Arc::new(HashMap::new()))),
|
||||||
shared_tls_acceptor: Arc::new(ArcSwap::from(Arc::new(None))),
|
shared_tls_acceptor: Arc::new(ArcSwap::from(Arc::new(None))),
|
||||||
@@ -232,6 +241,7 @@ impl TcpListenerManager {
|
|||||||
cancel_token: CancellationToken::new(),
|
cancel_token: CancellationToken::new(),
|
||||||
socket_handler_relay: Arc::new(std::sync::RwLock::new(None)),
|
socket_handler_relay: Arc::new(std::sync::RwLock::new(None)),
|
||||||
conn_semaphore: Arc::new(tokio::sync::Semaphore::new(max_conns)),
|
conn_semaphore: Arc::new(tokio::sync::Semaphore::new(max_conns)),
|
||||||
|
route_cancels: Arc::new(DashMap::new()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -243,14 +253,14 @@ impl TcpListenerManager {
|
|||||||
));
|
));
|
||||||
self.conn_semaphore = Arc::new(tokio::sync::Semaphore::new(config.max_connections as usize));
|
self.conn_semaphore = Arc::new(tokio::sync::Semaphore::new(config.max_connections as usize));
|
||||||
|
|
||||||
// Rebuild http_proxy with updated timeouts
|
// Rebuild http_proxy with updated timeouts (shares the same ArcSwap<RouteManager>)
|
||||||
let rm = self.route_manager.load_full();
|
|
||||||
let mut http_proxy_svc = HttpProxyService::with_connect_timeout(
|
let mut http_proxy_svc = HttpProxyService::with_connect_timeout(
|
||||||
rm,
|
Arc::clone(&self.route_manager),
|
||||||
Arc::clone(&self.metrics),
|
Arc::clone(&self.metrics),
|
||||||
std::time::Duration::from_millis(config.connection_timeout_ms),
|
std::time::Duration::from_millis(config.connection_timeout_ms),
|
||||||
);
|
);
|
||||||
http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config());
|
http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config());
|
||||||
|
http_proxy_svc.set_backend_tls_config_alpn(tls_handler::shared_backend_tls_config_alpn());
|
||||||
http_proxy_svc.set_connection_timeouts(
|
http_proxy_svc.set_connection_timeouts(
|
||||||
std::time::Duration::from_millis(config.socket_timeout_ms),
|
std::time::Duration::from_millis(config.socket_timeout_ms),
|
||||||
std::time::Duration::from_millis(config.socket_timeout_ms),
|
std::time::Duration::from_millis(config.socket_timeout_ms),
|
||||||
@@ -314,12 +324,13 @@ impl TcpListenerManager {
|
|||||||
let cancel = self.cancel_token.clone();
|
let cancel = self.cancel_token.clone();
|
||||||
let relay = Arc::clone(&self.socket_handler_relay);
|
let relay = Arc::clone(&self.socket_handler_relay);
|
||||||
let semaphore = Arc::clone(&self.conn_semaphore);
|
let semaphore = Arc::clone(&self.conn_semaphore);
|
||||||
|
let route_cancels = Arc::clone(&self.route_cancels);
|
||||||
|
|
||||||
let handle = tokio::spawn(async move {
|
let handle = tokio::spawn(async move {
|
||||||
Self::accept_loop(
|
Self::accept_loop(
|
||||||
listener, port, route_manager_swap, metrics, tls_configs,
|
listener, port, route_manager_swap, metrics, tls_configs,
|
||||||
shared_tls_acceptor, http_proxy, conn_config, conn_tracker, cancel, relay,
|
shared_tls_acceptor, http_proxy, conn_config, conn_tracker, cancel, relay,
|
||||||
semaphore,
|
semaphore, route_cancels,
|
||||||
).await;
|
).await;
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -398,6 +409,20 @@ impl TcpListenerManager {
|
|||||||
self.route_manager.store(route_manager);
|
self.route_manager.store(route_manager);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Cancel connections on routes that no longer exist in the active set.
|
||||||
|
/// Existing connections on removed routes are terminated via their per-route CancellationToken.
|
||||||
|
pub fn invalidate_removed_routes(&self, active_route_ids: &std::collections::HashSet<String>) {
|
||||||
|
self.route_cancels.retain(|id, token| {
|
||||||
|
if active_route_ids.contains(id) {
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
info!("Cancelling connections for removed route '{}'", id);
|
||||||
|
token.cancel();
|
||||||
|
false // remove cancelled token from map
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/// Prune HTTP proxy caches for route IDs that are no longer active.
|
/// Prune HTTP proxy caches for route IDs that are no longer active.
|
||||||
pub fn prune_http_proxy_caches(&self, active_route_ids: &std::collections::HashSet<String>) {
|
pub fn prune_http_proxy_caches(&self, active_route_ids: &std::collections::HashSet<String>) {
|
||||||
self.http_proxy.prune_stale_routes(active_route_ids);
|
self.http_proxy.prune_stale_routes(active_route_ids);
|
||||||
@@ -427,6 +452,7 @@ impl TcpListenerManager {
|
|||||||
cancel: CancellationToken,
|
cancel: CancellationToken,
|
||||||
socket_handler_relay: Arc<std::sync::RwLock<Option<String>>>,
|
socket_handler_relay: Arc<std::sync::RwLock<Option<String>>>,
|
||||||
conn_semaphore: Arc<tokio::sync::Semaphore>,
|
conn_semaphore: Arc<tokio::sync::Semaphore>,
|
||||||
|
route_cancels: Arc<DashMap<String, CancellationToken>>,
|
||||||
) {
|
) {
|
||||||
loop {
|
loop {
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
@@ -481,6 +507,7 @@ impl TcpListenerManager {
|
|||||||
let ct = Arc::clone(&conn_tracker);
|
let ct = Arc::clone(&conn_tracker);
|
||||||
let cn = cancel.clone();
|
let cn = cancel.clone();
|
||||||
let sr = Arc::clone(&socket_handler_relay);
|
let sr = Arc::clone(&socket_handler_relay);
|
||||||
|
let rc = Arc::clone(&route_cancels);
|
||||||
debug!("Accepted connection from {} on port {}", peer_addr, port);
|
debug!("Accepted connection from {} on port {}", peer_addr, port);
|
||||||
|
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
@@ -489,7 +516,7 @@ impl TcpListenerManager {
|
|||||||
// RAII guard ensures connection_closed is called on all paths
|
// RAII guard ensures connection_closed is called on all paths
|
||||||
let _ct_guard = ConnectionTrackerGuard::new(ct, ip);
|
let _ct_guard = ConnectionTrackerGuard::new(ct, ip);
|
||||||
let result = Self::handle_connection(
|
let result = Self::handle_connection(
|
||||||
stream, port, peer_addr, rm, m, tc, sa, hp, cc, cn, sr,
|
stream, port, peer_addr, rm, m, tc, sa, hp, cc, cn, sr, rc,
|
||||||
).await;
|
).await;
|
||||||
if let Err(e) = result {
|
if let Err(e) = result {
|
||||||
debug!("Connection error from {}: {}", peer_addr, e);
|
debug!("Connection error from {}: {}", peer_addr, e);
|
||||||
@@ -519,6 +546,7 @@ impl TcpListenerManager {
|
|||||||
conn_config: Arc<ConnectionConfig>,
|
conn_config: Arc<ConnectionConfig>,
|
||||||
cancel: CancellationToken,
|
cancel: CancellationToken,
|
||||||
socket_handler_relay: Arc<std::sync::RwLock<Option<String>>>,
|
socket_handler_relay: Arc<std::sync::RwLock<Option<String>>>,
|
||||||
|
route_cancels: Arc<DashMap<String, CancellationToken>>,
|
||||||
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
use tokio::io::AsyncReadExt;
|
use tokio::io::AsyncReadExt;
|
||||||
|
|
||||||
@@ -623,6 +651,14 @@ impl TcpListenerManager {
|
|||||||
let target_port = target.port.resolve(port);
|
let target_port = target.port.resolve(port);
|
||||||
let route_id = quick_match.route.id.as_deref();
|
let route_id = quick_match.route.id.as_deref();
|
||||||
|
|
||||||
|
// Resolve per-route cancel token (child of global cancel)
|
||||||
|
let conn_cancel = match route_id {
|
||||||
|
Some(id) => route_cancels.entry(id.to_string())
|
||||||
|
.or_insert_with(|| cancel.child_token())
|
||||||
|
.clone(),
|
||||||
|
None => cancel.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
// Check route-level IP security
|
// Check route-level IP security
|
||||||
if let Some(ref security) = quick_match.route.security {
|
if let Some(ref security) = quick_match.route.security {
|
||||||
if !rustproxy_http::request_filter::RequestFilter::check_ip_security(
|
if !rustproxy_http::request_filter::RequestFilter::check_ip_security(
|
||||||
@@ -677,7 +713,7 @@ impl TcpListenerManager {
|
|||||||
|
|
||||||
let (_bytes_in, _bytes_out) = forwarder::forward_bidirectional_with_timeouts(
|
let (_bytes_in, _bytes_out) = forwarder::forward_bidirectional_with_timeouts(
|
||||||
stream, backend_w, None,
|
stream, backend_w, None,
|
||||||
inactivity_timeout, max_lifetime, cancel,
|
inactivity_timeout, max_lifetime, conn_cancel,
|
||||||
Some(forwarder::ForwardMetricsCtx {
|
Some(forwarder::ForwardMetricsCtx {
|
||||||
collector: Arc::clone(&metrics),
|
collector: Arc::clone(&metrics),
|
||||||
route_id: route_id.map(|s| s.to_string()),
|
route_id: route_id.map(|s| s.to_string()),
|
||||||
@@ -687,7 +723,7 @@ impl TcpListenerManager {
|
|||||||
} else {
|
} else {
|
||||||
let (_bytes_in, _bytes_out) = forwarder::forward_bidirectional_with_timeouts(
|
let (_bytes_in, _bytes_out) = forwarder::forward_bidirectional_with_timeouts(
|
||||||
stream, backend, None,
|
stream, backend, None,
|
||||||
inactivity_timeout, max_lifetime, cancel,
|
inactivity_timeout, max_lifetime, conn_cancel,
|
||||||
Some(forwarder::ForwardMetricsCtx {
|
Some(forwarder::ForwardMetricsCtx {
|
||||||
collector: Arc::clone(&metrics),
|
collector: Arc::clone(&metrics),
|
||||||
route_id: route_id.map(|s| s.to_string()),
|
route_id: route_id.map(|s| s.to_string()),
|
||||||
@@ -792,6 +828,16 @@ impl TcpListenerManager {
|
|||||||
|
|
||||||
let route_id = route_match.route.id.as_deref();
|
let route_id = route_match.route.id.as_deref();
|
||||||
|
|
||||||
|
// Resolve per-route cancel token (child of global cancel).
|
||||||
|
// When this route is removed via updateRoutes, the token is cancelled,
|
||||||
|
// terminating all connections on this route.
|
||||||
|
let cancel = match route_id {
|
||||||
|
Some(id) => route_cancels.entry(id.to_string())
|
||||||
|
.or_insert_with(|| cancel.child_token())
|
||||||
|
.clone(),
|
||||||
|
None => cancel,
|
||||||
|
};
|
||||||
|
|
||||||
// Check route-level IP security for passthrough connections
|
// Check route-level IP security for passthrough connections
|
||||||
if let Some(ref security) = route_match.route.security {
|
if let Some(ref security) = route_match.route.security {
|
||||||
if !rustproxy_http::request_filter::RequestFilter::check_ip_security(
|
if !rustproxy_http::request_filter::RequestFilter::check_ip_security(
|
||||||
|
|||||||
@@ -98,10 +98,24 @@ pub fn build_shared_tls_acceptor(resolver: CertResolver) -> Result<TlsAcceptor,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Build a TLS acceptor from PEM-encoded cert and key data.
|
/// Build a TLS acceptor from PEM-encoded cert and key data.
|
||||||
|
/// Advertises both h2 and http/1.1 via ALPN (for client-facing connections).
|
||||||
pub fn build_tls_acceptor(cert_pem: &str, key_pem: &str) -> Result<TlsAcceptor, Box<dyn std::error::Error + Send + Sync>> {
|
pub fn build_tls_acceptor(cert_pem: &str, key_pem: &str) -> Result<TlsAcceptor, Box<dyn std::error::Error + Send + Sync>> {
|
||||||
build_tls_acceptor_with_config(cert_pem, key_pem, None)
|
build_tls_acceptor_with_config(cert_pem, key_pem, None)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Build a TLS acceptor for backend servers that only speak HTTP/1.1.
|
||||||
|
/// Does NOT advertise h2 in ALPN, preventing false h2 auto-detection.
|
||||||
|
pub fn build_tls_acceptor_h1_only(cert_pem: &str, key_pem: &str) -> Result<TlsAcceptor, Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
ensure_crypto_provider();
|
||||||
|
let certs = load_certs(cert_pem)?;
|
||||||
|
let key = load_private_key(key_pem)?;
|
||||||
|
let mut config = ServerConfig::builder()
|
||||||
|
.with_no_client_auth()
|
||||||
|
.with_single_cert(certs, key)?;
|
||||||
|
config.alpn_protocols = vec![b"http/1.1".to_vec()];
|
||||||
|
Ok(TlsAcceptor::from(Arc::new(config)))
|
||||||
|
}
|
||||||
|
|
||||||
/// Build a TLS acceptor with optional RouteTls configuration for version/cipher tuning.
|
/// Build a TLS acceptor with optional RouteTls configuration for version/cipher tuning.
|
||||||
pub fn build_tls_acceptor_with_config(
|
pub fn build_tls_acceptor_with_config(
|
||||||
cert_pem: &str,
|
cert_pem: &str,
|
||||||
@@ -204,6 +218,25 @@ pub fn shared_backend_tls_config() -> Arc<rustls::ClientConfig> {
|
|||||||
}).clone()
|
}).clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get or create a shared backend TLS `ClientConfig` with ALPN `h2` + `http/1.1`.
|
||||||
|
///
|
||||||
|
/// Used for auto-detection mode: the backend server picks its preferred protocol
|
||||||
|
/// via ALPN, and the proxy reads the negotiated result to decide h1 vs h2 forwarding.
|
||||||
|
static SHARED_CLIENT_CONFIG_ALPN: OnceLock<Arc<rustls::ClientConfig>> = OnceLock::new();
|
||||||
|
|
||||||
|
pub fn shared_backend_tls_config_alpn() -> Arc<rustls::ClientConfig> {
|
||||||
|
SHARED_CLIENT_CONFIG_ALPN.get_or_init(|| {
|
||||||
|
ensure_crypto_provider();
|
||||||
|
let mut config = rustls::ClientConfig::builder()
|
||||||
|
.dangerous()
|
||||||
|
.with_custom_certificate_verifier(Arc::new(InsecureVerifier))
|
||||||
|
.with_no_client_auth();
|
||||||
|
config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
|
||||||
|
info!("Built shared backend TLS client config with ALPN h2+http/1.1 for auto-detection");
|
||||||
|
Arc::new(config)
|
||||||
|
}).clone()
|
||||||
|
}
|
||||||
|
|
||||||
/// Connect to a backend with TLS (for terminate-and-reencrypt mode).
|
/// Connect to a backend with TLS (for terminate-and-reencrypt mode).
|
||||||
/// Uses the shared backend TLS config for session resumption.
|
/// Uses the shared backend TLS config for session resumption.
|
||||||
pub async fn connect_tls(
|
pub async fn connect_tls(
|
||||||
|
|||||||
@@ -603,6 +603,31 @@ impl RustProxy {
|
|||||||
.collect();
|
.collect();
|
||||||
self.metrics.retain_routes(&active_route_ids);
|
self.metrics.retain_routes(&active_route_ids);
|
||||||
|
|
||||||
|
// Prune per-backend metrics for backends no longer in any route target.
|
||||||
|
// For PortSpec::Preserve routes, expand across all listening ports since
|
||||||
|
// the actual runtime port depends on the incoming connection.
|
||||||
|
let listening_ports = self.get_listening_ports();
|
||||||
|
let active_backends: HashSet<String> = routes.iter()
|
||||||
|
.filter_map(|r| r.action.targets.as_ref())
|
||||||
|
.flat_map(|targets| targets.iter())
|
||||||
|
.flat_map(|target| {
|
||||||
|
let hosts: Vec<String> = target.host.to_vec().into_iter().map(|s| s.to_string()).collect();
|
||||||
|
match &target.port {
|
||||||
|
rustproxy_config::PortSpec::Fixed(p) => {
|
||||||
|
hosts.into_iter().map(|h| format!("{}:{}", h, p)).collect::<Vec<_>>()
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// Preserve/special: expand across all listening ports
|
||||||
|
let lp = &listening_ports;
|
||||||
|
hosts.into_iter()
|
||||||
|
.flat_map(|h| lp.iter().map(move |p| format!("{}:{}", h, *p)))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
self.metrics.retain_backends(&active_backends);
|
||||||
|
|
||||||
// Atomically swap the route table
|
// Atomically swap the route table
|
||||||
let new_manager = Arc::new(new_manager);
|
let new_manager = Arc::new(new_manager);
|
||||||
self.route_table.store(Arc::clone(&new_manager));
|
self.route_table.store(Arc::clone(&new_manager));
|
||||||
@@ -610,6 +635,8 @@ impl RustProxy {
|
|||||||
// Update listener manager
|
// Update listener manager
|
||||||
if let Some(ref mut listener) = self.listener_manager {
|
if let Some(ref mut listener) = self.listener_manager {
|
||||||
listener.update_route_manager(Arc::clone(&new_manager));
|
listener.update_route_manager(Arc::clone(&new_manager));
|
||||||
|
// Cancel connections on routes that were removed or disabled
|
||||||
|
listener.invalidate_removed_routes(&active_route_ids);
|
||||||
// Prune HTTP proxy caches (rate limiters, regex cache, round-robin counters)
|
// Prune HTTP proxy caches (rate limiters, regex cache, round-robin counters)
|
||||||
listener.prune_http_proxy_caches(&active_route_ids);
|
listener.prune_http_proxy_caches(&active_route_ids);
|
||||||
|
|
||||||
|
|||||||
@@ -195,7 +195,10 @@ pub async fn start_tls_http_backend(
|
|||||||
) -> JoinHandle<()> {
|
) -> JoinHandle<()> {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
let acceptor = rustproxy_passthrough::build_tls_acceptor(cert_pem, key_pem)
|
// Use h1-only acceptor: test backends speak raw HTTP/1.1 text,
|
||||||
|
// so they must NOT advertise h2 via ALPN (which would cause
|
||||||
|
// auto-detect to attempt h2 binary framing and fail).
|
||||||
|
let acceptor = rustproxy_passthrough::build_tls_acceptor_h1_only(cert_pem, key_pem)
|
||||||
.expect("Failed to build TLS acceptor");
|
.expect("Failed to build TLS acceptor");
|
||||||
let acceptor = Arc::new(acceptor);
|
let acceptor = Arc::new(acceptor);
|
||||||
let name = backend_name.to_string();
|
let name = backend_name.to_string();
|
||||||
|
|||||||
@@ -7,10 +7,15 @@
|
|||||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||||
import { SmartProxy } from '../ts/proxies/smart-proxy/index.js';
|
import { SmartProxy } from '../ts/proxies/smart-proxy/index.js';
|
||||||
import type { IRouteConfig } from '../ts/proxies/smart-proxy/models/route-types.js';
|
import type { IRouteConfig } from '../ts/proxies/smart-proxy/models/route-types.js';
|
||||||
|
import { findFreePorts } from './helpers/port-allocator.js';
|
||||||
|
|
||||||
// Use unique high ports for each test to avoid conflicts
|
let testPorts: number[];
|
||||||
let testPort = 20000;
|
let portIndex = 0;
|
||||||
const getNextPort = () => testPort++;
|
const getNextPort = () => testPorts[portIndex++];
|
||||||
|
|
||||||
|
tap.test('setup - allocate ports', async () => {
|
||||||
|
testPorts = await findFreePorts(16);
|
||||||
|
});
|
||||||
|
|
||||||
// --------------------------------- Single Route, No Domain Restriction ---------------------------------
|
// --------------------------------- Single Route, No Domain Restriction ---------------------------------
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,6 @@
|
|||||||
*/
|
*/
|
||||||
export const commitinfo = {
|
export const commitinfo = {
|
||||||
name: '@push.rocks/smartproxy',
|
name: '@push.rocks/smartproxy',
|
||||||
version: '25.8.5',
|
version: '25.10.2',
|
||||||
description: 'A powerful proxy package with unified route-based configuration for high traffic management. Features include SSL/TLS support, flexible routing patterns, WebSocket handling, advanced security options, and automatic ACME certificate management.'
|
description: 'A powerful proxy package with unified route-based configuration for high traffic management. Features include SSL/TLS support, flexible routing patterns, WebSocket handling, advanced security options, and automatic ACME certificate management.'
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -67,6 +67,13 @@ export interface IMetrics {
|
|||||||
connections(): number;
|
connections(): number;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Backend metrics
|
||||||
|
backends: {
|
||||||
|
byBackend(): Map<string, IBackendMetrics>;
|
||||||
|
protocols(): Map<string, string>;
|
||||||
|
topByErrors(limit?: number): Array<{ backend: string; errors: number }>;
|
||||||
|
};
|
||||||
|
|
||||||
// Performance metrics
|
// Performance metrics
|
||||||
percentiles: {
|
percentiles: {
|
||||||
connectionDuration(): { p50: number; p95: number; p99: number };
|
connectionDuration(): { p50: number; p95: number; p99: number };
|
||||||
@@ -98,6 +105,21 @@ export interface IMetricsConfig {
|
|||||||
prometheusPrefix: string; // Default: smartproxy_
|
prometheusPrefix: string; // Default: smartproxy_
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per-backend metrics
|
||||||
|
*/
|
||||||
|
export interface IBackendMetrics {
|
||||||
|
protocol: string;
|
||||||
|
activeConnections: number;
|
||||||
|
totalConnections: number;
|
||||||
|
connectErrors: number;
|
||||||
|
handshakeErrors: number;
|
||||||
|
requestErrors: number;
|
||||||
|
avgConnectTimeMs: number;
|
||||||
|
poolHitRate: number;
|
||||||
|
h2Failures: number;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Internal interface for connection byte tracking
|
* Internal interface for connection byte tracking
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -262,7 +262,7 @@ export interface IRouteAction {
|
|||||||
|
|
||||||
// Additional options for backend-specific settings
|
// Additional options for backend-specific settings
|
||||||
options?: {
|
options?: {
|
||||||
backendProtocol?: 'http1' | 'http2';
|
backendProtocol?: 'http1' | 'http2' | 'auto';
|
||||||
[key: string]: any;
|
[key: string]: any;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import type { IMetrics, IThroughputData, IThroughputHistoryPoint } from './models/metrics-types.js';
|
import type { IMetrics, IBackendMetrics, IThroughputData, IThroughputHistoryPoint } from './models/metrics-types.js';
|
||||||
import type { RustProxyBridge } from './rust-proxy-bridge.js';
|
import type { RustProxyBridge } from './rust-proxy-bridge.js';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -169,6 +169,55 @@ export class RustMetricsAdapter implements IMetrics {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
public backends = {
|
||||||
|
byBackend: (): Map<string, IBackendMetrics> => {
|
||||||
|
const result = new Map<string, IBackendMetrics>();
|
||||||
|
if (this.cache?.backends) {
|
||||||
|
for (const [key, bm] of Object.entries(this.cache.backends)) {
|
||||||
|
const m = bm as any;
|
||||||
|
const totalTimeUs = m.totalConnectTimeUs ?? 0;
|
||||||
|
const count = m.connectCount ?? 0;
|
||||||
|
const poolHits = m.poolHits ?? 0;
|
||||||
|
const poolMisses = m.poolMisses ?? 0;
|
||||||
|
const poolTotal = poolHits + poolMisses;
|
||||||
|
result.set(key, {
|
||||||
|
protocol: m.protocol ?? 'unknown',
|
||||||
|
activeConnections: m.activeConnections ?? 0,
|
||||||
|
totalConnections: m.totalConnections ?? 0,
|
||||||
|
connectErrors: m.connectErrors ?? 0,
|
||||||
|
handshakeErrors: m.handshakeErrors ?? 0,
|
||||||
|
requestErrors: m.requestErrors ?? 0,
|
||||||
|
avgConnectTimeMs: count > 0 ? (totalTimeUs / count) / 1000 : 0,
|
||||||
|
poolHitRate: poolTotal > 0 ? poolHits / poolTotal : 0,
|
||||||
|
h2Failures: m.h2Failures ?? 0,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
},
|
||||||
|
protocols: (): Map<string, string> => {
|
||||||
|
const result = new Map<string, string>();
|
||||||
|
if (this.cache?.backends) {
|
||||||
|
for (const [key, bm] of Object.entries(this.cache.backends)) {
|
||||||
|
result.set(key, (bm as any).protocol ?? 'unknown');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
},
|
||||||
|
topByErrors: (limit: number = 10): Array<{ backend: string; errors: number }> => {
|
||||||
|
const result: Array<{ backend: string; errors: number }> = [];
|
||||||
|
if (this.cache?.backends) {
|
||||||
|
for (const [key, bm] of Object.entries(this.cache.backends)) {
|
||||||
|
const m = bm as any;
|
||||||
|
const errors = (m.connectErrors ?? 0) + (m.handshakeErrors ?? 0) + (m.requestErrors ?? 0);
|
||||||
|
if (errors > 0) result.push({ backend: key, errors });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.sort((a, b) => b.errors - a.errors);
|
||||||
|
return result.slice(0, limit);
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
public percentiles = {
|
public percentiles = {
|
||||||
connectionDuration: (): { p50: number; p95: number; p99: number } => {
|
connectionDuration: (): { p50: number; p95: number; p99: number } => {
|
||||||
return { p50: 0, p95: 0, p99: 0 };
|
return { p50: 0, p95: 0, p99: 0 };
|
||||||
|
|||||||
Reference in New Issue
Block a user