Compare commits
25 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 90b83a9dbe | |||
| 508621e231 | |||
| 9ef21dcb41 | |||
| 0acd907431 | |||
| 80276a70e8 | |||
| 0d4399d7f1 | |||
| 0380a957d0 | |||
| 5271447264 | |||
| be9898805f | |||
| d4aa46aed7 | |||
| 4f1c5c919f | |||
| d51b2c5890 | |||
| bb471a8cc9 | |||
| c52128f12d | |||
| e69de246e9 | |||
| 5126049ae6 | |||
| 8db621657f | |||
| ef060d5e79 | |||
| cd7f3f7f75 | |||
| 8df18728d4 | |||
| bedecc6b6b | |||
| b5f166bc92 | |||
| 94266222fe | |||
| 697d51a9d4 | |||
| 7e5fe2bec3 |
94
changelog.md
94
changelog.md
@@ -1,5 +1,99 @@
|
||||
# Changelog
|
||||
|
||||
## 2026-03-12 - 25.10.2 - fix(repo)
|
||||
no code changes to release
|
||||
|
||||
|
||||
## 2026-03-12 - 25.10.1 - fix(repo)
|
||||
no changes to commit
|
||||
|
||||
|
||||
## 2026-03-12 - 25.10.0 - feat(metrics)
|
||||
add per-backend connection, error, protocol, and pool metrics with stale backend pruning
|
||||
|
||||
- tracks backend connection lifecycle, connect timing, protocol detection, pool hit/miss rates, handshake/request errors, and h2 fallback failures in Rust metrics
|
||||
- exposes backend metrics through the TypeScript metrics adapter with backend listings, protocol lookup, and top error summaries
|
||||
- prunes backend metrics for backends no longer referenced by active routes, including preserved-port targets expanded across listening ports
|
||||
|
||||
## 2026-03-11 - 25.9.3 - fix(rustproxy-http)
|
||||
Evict stale HTTP/2 pooled senders and retry bodyless requests with fresh backend connections to avoid 502s
|
||||
|
||||
- Introduce MAX_H2_AGE (120s) and evict HTTP/2 senders older than this or closed
|
||||
- Check MAX_H2_AGE on checkout and during background eviction to prevent reuse of stale h2 connections
|
||||
- Add connection_pool.remove_h2() to explicitly remove dead H2 senders from the pool
|
||||
- When a pooled H2 request returns a 502 and the original request had an empty body, retry using a fresh H2 connection (retry_h2_with_fresh_connection)
|
||||
- On H2 auto-detect failures, retry as HTTP/1.1 for bodyless requests via forward_h1_empty_body; return 502 for requests with bodies
|
||||
- Evict dead H2 senders on backend request failures in reconnect_backend so subsequent attempts create fresh connections
|
||||
|
||||
## 2026-03-08 - 25.9.2 - fix(protocol-cache)
|
||||
Include requested_host in protocol detection cache key to avoid cache oscillation when multiple frontend domains share the same backend
|
||||
|
||||
- Add ProtocolCacheKey.requested_host: Option<String> to distinguish cache entries by incoming request Host/:authority
|
||||
- Update protocol cache lookups/inserts in proxy_service to populate requested_host
|
||||
- Enhance debug logging to show requested_host on cache hits
|
||||
- Fixes repeated ALPN probing / cache oscillation when different frontend domains share a backend with differing HTTP/2 support
|
||||
|
||||
## 2026-03-03 - 25.9.1 - fix(rustproxy)
|
||||
Cancel connections for routes removed/disabled by adding per-route cancellation tokens and make RouteManager swappable (ArcSwap) for runtime updates
|
||||
|
||||
- Add per-route CancellationToken map (DashMap) to TcpListenerManager and call token.cancel() when routes are removed (invalidate_removed_routes)
|
||||
- Propagate Arc<ArcSwap<RouteManager>> into HttpProxyService and passthrough listener so the route manager can be hot-swapped without restarting listeners
|
||||
- Use per-route child cancellation tokens in accept/connection handling and forwarders to terminate existing connections when a route is removed
|
||||
- Prune HTTP proxy caches and retain/cleanup per-route tokens when routes are active/removed
|
||||
- Update test.test.sni-requirement.node.ts to allocate unique free ports via findFreePorts to avoid port conflicts during tests
|
||||
|
||||
## 2026-03-03 - 25.9.0 - feat(rustproxy-http)
|
||||
add HTTP/2 auto-detection via ALPN with TTL-backed protocol cache and h1-only/h2 ALPN client configs
|
||||
|
||||
- Add protocol_cache module: bounded, TTL-based cache (5min TTL), max entries (4096), background cleanup task and clear() to discard stale detections.
|
||||
- Introduce BackendProtocol::Auto and expose 'auto' in TypeScript route types to allow ALPN-based protocol auto-detection.
|
||||
- Add build_tls_acceptor_h1_only() to create a TLS acceptor that advertises only http/1.1 (used for backends/tests that speak plain HTTP/1.1).
|
||||
- Add shared_backend_tls_config_alpn() and default_backend_tls_config_with_alpn() to provide client TLS configs advertising h2+http/1.1 for auto-detection.
|
||||
- Wire backend_tls_config_alpn and protocol_cache into proxy_service, tcp_listener and passthrough paths; add set_backend_tls_config_alpn() and prune protocol_cache on route updates.
|
||||
- Update passthrough tests to use h1-only acceptor to avoid false HTTP/2 detection when backends speak plain HTTP/1.1.
|
||||
- Include reconnection/fallback handling and ensure ALPN-enabled client config is used for auto-detection mode.
|
||||
|
||||
## 2026-02-26 - 25.8.5 - fix(release)
|
||||
bump patch version (no source changes)
|
||||
|
||||
- No changes detected in git diff
|
||||
- Current version: 25.8.4
|
||||
- Recommend patch bump to 25.8.5 to record release without code changes
|
||||
|
||||
## 2026-02-26 - 25.8.4 - fix(proxy)
|
||||
adjust default proxy timeouts and keep-alive behavior to shorter, more consistent values
|
||||
|
||||
- Increase connection timeout default from 30,000ms to 60,000ms (30s -> 60s).
|
||||
- Reduce socket timeout default from 3,600,000ms to 60,000ms (1h -> 60s).
|
||||
- Reduce max connection lifetime default from 86,400,000ms to 3,600,000ms (24h -> 1h).
|
||||
- Change inactivity timeout default from 14,400,000ms to 75,000ms (4h -> 75s).
|
||||
- Update keep-alive defaults: keepAliveTreatment 'extended' -> 'standard', keepAliveInactivityMultiplier 6 -> 4, extendedKeepAliveLifetime 604800000 -> 3,600,000ms (7d -> 1h).
|
||||
- Apply these consistent default values across Rust crates (rustproxy-config, rustproxy-passthrough) and the TypeScript smart-proxy implementation.
|
||||
- Update unit test expectations to match the new defaults.
|
||||
|
||||
## 2026-02-26 - 25.8.3 - fix(smartproxy)
|
||||
no code or dependency changes detected; no version bump required
|
||||
|
||||
- No files changed in the provided diff (No changes).
|
||||
- package.json version remains 25.8.2.
|
||||
- No dependency or source updates detected; skip release.
|
||||
|
||||
## 2026-02-26 - 25.8.2 - fix(connection)
|
||||
improve connection handling and timeouts
|
||||
|
||||
- Flush logs on process beforeExit and avoid calling process.exit in SIGINT/SIGTERM handlers to preserve host graceful shutdown
|
||||
- Store protocol entries with a createdAt timestamp in ProtocolDetector and remove stale entries older than 30s to prevent leaked state from abandoned handshakes or port scanners
|
||||
- Add backend connect timeout (30s) and idle timeouts (5 minutes) for dynamic forwards; destroy sockets on timeout and emit logs for timeout events
|
||||
|
||||
## 2026-02-25 - 25.8.1 - fix(allocator)
|
||||
switch global allocator from tikv-jemallocator to mimalloc
|
||||
|
||||
- Replaced tikv-jemallocator with mimalloc in rust/Cargo.toml workspace dependencies.
|
||||
- Updated rust/crates/rustproxy/Cargo.toml to use mimalloc as a workspace dependency.
|
||||
- Updated rust/Cargo.lock: added mimalloc and libmimalloc-sys entries and removed tikv-jemallocator and tikv-jemalloc-sys entries.
|
||||
- Changed the global allocator in crates/rustproxy/src/main.rs from tikv_jemallocator::Jemalloc to mimalloc::MiMalloc.
|
||||
- Impact: runtime memory allocator is changed which may affect memory usage and performance; no public API changes but recommend testing memory/performance in deployments.
|
||||
|
||||
## 2026-02-24 - 25.8.0 - feat(rustproxy)
|
||||
use tikv-jemallocator as the global allocator to reduce glibc fragmentation and slow RSS growth; add allocator dependency and enable it in rustproxy, update lockfile, and run tsrust before tests
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@push.rocks/smartproxy",
|
||||
"version": "25.8.0",
|
||||
"version": "25.10.2",
|
||||
"private": false,
|
||||
"description": "A powerful proxy package with unified route-based configuration for high traffic management. Features include SSL/TLS support, flexible routing patterns, WebSocket handling, advanced security options, and automatic ACME certificate management.",
|
||||
"main": "dist_ts/index.js",
|
||||
|
||||
41
rust/Cargo.lock
generated
41
rust/Cargo.lock
generated
@@ -612,6 +612,16 @@ version = "0.2.180"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
|
||||
|
||||
[[package]]
|
||||
name = "libmimalloc-sys"
|
||||
version = "0.1.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "667f4fec20f29dfc6bc7357c582d91796c169ad7e2fce709468aefeb2c099870"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.14"
|
||||
@@ -642,6 +652,15 @@ version = "2.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
||||
|
||||
[[package]]
|
||||
name = "mimalloc"
|
||||
version = "0.1.48"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1ee66a4b64c74f4ef288bcbb9192ad9c3feaad75193129ac8509af543894fd8"
|
||||
dependencies = [
|
||||
"libmimalloc-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "1.1.1"
|
||||
@@ -924,6 +943,7 @@ dependencies = [
|
||||
"http-body-util",
|
||||
"hyper",
|
||||
"hyper-util",
|
||||
"mimalloc",
|
||||
"rcgen",
|
||||
"rustls",
|
||||
"rustproxy-config",
|
||||
@@ -936,7 +956,6 @@ dependencies = [
|
||||
"rustproxy-tls",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tikv-jemallocator",
|
||||
"tokio",
|
||||
"tokio-rustls",
|
||||
"tokio-util",
|
||||
@@ -1299,26 +1318,6 @@ dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tikv-jemalloc-sys"
|
||||
version = "0.6.1+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd8aa5b2ab86a2cefa406d889139c162cbb230092f7d1d7cbc1716405d852a3b"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tikv-jemallocator"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0359b4327f954e0567e69fb191cf1436617748813819c94b8cd4a431422d053a"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"tikv-jemalloc-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.47"
|
||||
|
||||
@@ -91,8 +91,8 @@ libc = "0.2"
|
||||
# Socket-level options (keepalive, etc.)
|
||||
socket2 = { version = "0.5", features = ["all"] }
|
||||
|
||||
# jemalloc allocator (prevents glibc fragmentation / slow RSS growth)
|
||||
tikv-jemallocator = "0.6"
|
||||
# mimalloc allocator (prevents glibc fragmentation / slow RSS growth)
|
||||
mimalloc = "0.1"
|
||||
|
||||
# Internal crates
|
||||
rustproxy-config = { path = "crates/rustproxy-config" }
|
||||
|
||||
@@ -298,7 +298,7 @@ impl RustProxyOptions {
|
||||
|
||||
/// Get the effective connection timeout in milliseconds.
|
||||
pub fn effective_connection_timeout(&self) -> u64 {
|
||||
self.connection_timeout.unwrap_or(30_000)
|
||||
self.connection_timeout.unwrap_or(60_000)
|
||||
}
|
||||
|
||||
/// Get the effective initial data timeout in milliseconds.
|
||||
@@ -308,12 +308,12 @@ impl RustProxyOptions {
|
||||
|
||||
/// Get the effective socket timeout in milliseconds.
|
||||
pub fn effective_socket_timeout(&self) -> u64 {
|
||||
self.socket_timeout.unwrap_or(3_600_000)
|
||||
self.socket_timeout.unwrap_or(60_000)
|
||||
}
|
||||
|
||||
/// Get the effective max connection lifetime in milliseconds.
|
||||
pub fn effective_max_connection_lifetime(&self) -> u64 {
|
||||
self.max_connection_lifetime.unwrap_or(86_400_000)
|
||||
self.max_connection_lifetime.unwrap_or(3_600_000)
|
||||
}
|
||||
|
||||
/// Get all unique ports that routes listen on.
|
||||
@@ -377,10 +377,10 @@ mod tests {
|
||||
#[test]
|
||||
fn test_default_timeouts() {
|
||||
let options = RustProxyOptions::default();
|
||||
assert_eq!(options.effective_connection_timeout(), 30_000);
|
||||
assert_eq!(options.effective_connection_timeout(), 60_000);
|
||||
assert_eq!(options.effective_initial_data_timeout(), 60_000);
|
||||
assert_eq!(options.effective_socket_timeout(), 3_600_000);
|
||||
assert_eq!(options.effective_max_connection_lifetime(), 86_400_000);
|
||||
assert_eq!(options.effective_socket_timeout(), 60_000);
|
||||
assert_eq!(options.effective_max_connection_lifetime(), 3_600_000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -367,6 +367,7 @@ pub struct NfTablesOptions {
|
||||
pub enum BackendProtocol {
|
||||
Http1,
|
||||
Http2,
|
||||
Auto,
|
||||
}
|
||||
|
||||
/// Action options.
|
||||
|
||||
@@ -18,6 +18,9 @@ const MAX_IDLE_PER_KEY: usize = 16;
|
||||
const IDLE_TIMEOUT: Duration = Duration::from_secs(90);
|
||||
/// Background eviction interval.
|
||||
const EVICTION_INTERVAL: Duration = Duration::from_secs(30);
|
||||
/// Maximum age for pooled HTTP/2 connections before proactive eviction.
|
||||
/// Prevents staleness from backends that close idle connections (e.g. nginx GOAWAY).
|
||||
const MAX_H2_AGE: Duration = Duration::from_secs(120);
|
||||
|
||||
/// Identifies a unique backend endpoint.
|
||||
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
|
||||
@@ -37,7 +40,6 @@ struct IdleH1 {
|
||||
/// A pooled HTTP/2 sender (multiplexed, Clone-able).
|
||||
struct PooledH2 {
|
||||
sender: http2::SendRequest<BoxBody<Bytes, hyper::Error>>,
|
||||
#[allow(dead_code)] // Reserved for future age-based eviction
|
||||
created_at: Instant,
|
||||
}
|
||||
|
||||
@@ -116,8 +118,8 @@ impl ConnectionPool {
|
||||
let entry = self.h2_pool.get(key)?;
|
||||
let pooled = entry.value();
|
||||
|
||||
// Check if the h2 connection is still alive
|
||||
if pooled.sender.is_closed() {
|
||||
// Check if the h2 connection is still alive and not too old
|
||||
if pooled.sender.is_closed() || pooled.created_at.elapsed() >= MAX_H2_AGE {
|
||||
drop(entry);
|
||||
self.h2_pool.remove(key);
|
||||
return None;
|
||||
@@ -130,6 +132,12 @@ impl ConnectionPool {
|
||||
None
|
||||
}
|
||||
|
||||
/// Remove a dead HTTP/2 sender from the pool.
|
||||
/// Called when `send_request` fails to prevent subsequent requests from reusing the stale sender.
|
||||
pub fn remove_h2(&self, key: &PoolKey) {
|
||||
self.h2_pool.remove(key);
|
||||
}
|
||||
|
||||
/// Register an HTTP/2 sender in the pool. Since h2 is multiplexed,
|
||||
/// only one sender per key is stored (it's Clone-able).
|
||||
pub fn register_h2(&self, key: PoolKey, sender: http2::SendRequest<BoxBody<Bytes, hyper::Error>>) {
|
||||
@@ -165,10 +173,10 @@ impl ConnectionPool {
|
||||
h1_pool.remove(&key);
|
||||
}
|
||||
|
||||
// Evict dead H2 connections
|
||||
// Evict dead or aged-out H2 connections
|
||||
let mut dead_h2 = Vec::new();
|
||||
for entry in h2_pool.iter() {
|
||||
if entry.value().sender.is_closed() {
|
||||
if entry.value().sender.is_closed() || entry.value().created_at.elapsed() >= MAX_H2_AGE {
|
||||
dead_h2.push(entry.key().clone());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
|
||||
pub mod connection_pool;
|
||||
pub mod counting_body;
|
||||
pub mod protocol_cache;
|
||||
pub mod proxy_service;
|
||||
pub mod request_filter;
|
||||
pub mod response_filter;
|
||||
|
||||
140
rust/crates/rustproxy-http/src/protocol_cache.rs
Normal file
140
rust/crates/rustproxy-http/src/protocol_cache.rs
Normal file
@@ -0,0 +1,140 @@
|
||||
//! Bounded, TTL-based protocol detection cache for HTTP/2 auto-detection.
|
||||
//!
|
||||
//! Caches the ALPN-negotiated protocol (H1 or H2) per backend endpoint and requested
|
||||
//! domain (host:port + requested_host). This prevents cache oscillation when multiple
|
||||
//! frontend domains share the same backend but differ in HTTP/2 support.
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use dashmap::DashMap;
|
||||
use tracing::debug;
|
||||
|
||||
/// TTL for cached protocol detection results.
|
||||
/// After this duration, the next request will re-probe the backend.
|
||||
const PROTOCOL_CACHE_TTL: Duration = Duration::from_secs(300); // 5 minutes
|
||||
|
||||
/// Maximum number of entries in the protocol cache.
|
||||
/// Prevents unbounded growth when backends come and go.
|
||||
const PROTOCOL_CACHE_MAX_ENTRIES: usize = 4096;
|
||||
|
||||
/// Background cleanup interval for the protocol cache.
|
||||
const PROTOCOL_CACHE_CLEANUP_INTERVAL: Duration = Duration::from_secs(60);
|
||||
|
||||
/// Detected backend protocol.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum DetectedProtocol {
|
||||
H1,
|
||||
H2,
|
||||
}
|
||||
|
||||
/// Key for the protocol cache: (host, port, requested_host).
|
||||
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
|
||||
pub struct ProtocolCacheKey {
|
||||
pub host: String,
|
||||
pub port: u16,
|
||||
/// The incoming request's domain (Host header / :authority).
|
||||
/// Distinguishes protocol detection when multiple domains share the same backend.
|
||||
pub requested_host: Option<String>,
|
||||
}
|
||||
|
||||
/// A cached protocol detection result with a timestamp.
|
||||
struct CachedEntry {
|
||||
protocol: DetectedProtocol,
|
||||
detected_at: Instant,
|
||||
}
|
||||
|
||||
/// Bounded, TTL-based protocol detection cache.
|
||||
///
|
||||
/// Memory safety guarantees:
|
||||
/// - Hard cap at `PROTOCOL_CACHE_MAX_ENTRIES` — cannot grow unboundedly.
|
||||
/// - TTL expiry — stale entries naturally age out on lookup.
|
||||
/// - Background cleanup task — proactively removes expired entries every 60s.
|
||||
/// - `clear()` — called on route updates to discard stale detections.
|
||||
/// - `Drop` — aborts the background task to prevent dangling tokio tasks.
|
||||
pub struct ProtocolCache {
|
||||
cache: Arc<DashMap<ProtocolCacheKey, CachedEntry>>,
|
||||
cleanup_handle: Option<tokio::task::JoinHandle<()>>,
|
||||
}
|
||||
|
||||
impl ProtocolCache {
|
||||
/// Create a new protocol cache and start the background cleanup task.
|
||||
pub fn new() -> Self {
|
||||
let cache: Arc<DashMap<ProtocolCacheKey, CachedEntry>> = Arc::new(DashMap::new());
|
||||
let cache_clone = Arc::clone(&cache);
|
||||
let cleanup_handle = tokio::spawn(async move {
|
||||
Self::cleanup_loop(cache_clone).await;
|
||||
});
|
||||
|
||||
Self {
|
||||
cache,
|
||||
cleanup_handle: Some(cleanup_handle),
|
||||
}
|
||||
}
|
||||
|
||||
/// Look up the cached protocol for a backend endpoint.
|
||||
/// Returns `None` if not cached or expired (caller should probe via ALPN).
|
||||
pub fn get(&self, key: &ProtocolCacheKey) -> Option<DetectedProtocol> {
|
||||
let entry = self.cache.get(key)?;
|
||||
if entry.detected_at.elapsed() < PROTOCOL_CACHE_TTL {
|
||||
debug!("Protocol cache hit: {:?} for {}:{} (requested: {:?})", entry.protocol, key.host, key.port, key.requested_host);
|
||||
Some(entry.protocol)
|
||||
} else {
|
||||
// Expired — remove and return None to trigger re-probe
|
||||
drop(entry); // release DashMap ref before remove
|
||||
self.cache.remove(key);
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert a detected protocol into the cache.
|
||||
/// If the cache is at capacity, evict the oldest entry first.
|
||||
pub fn insert(&self, key: ProtocolCacheKey, protocol: DetectedProtocol) {
|
||||
if self.cache.len() >= PROTOCOL_CACHE_MAX_ENTRIES && !self.cache.contains_key(&key) {
|
||||
// Evict the oldest entry to stay within bounds
|
||||
let oldest = self.cache.iter()
|
||||
.min_by_key(|entry| entry.value().detected_at)
|
||||
.map(|entry| entry.key().clone());
|
||||
if let Some(oldest_key) = oldest {
|
||||
self.cache.remove(&oldest_key);
|
||||
}
|
||||
}
|
||||
self.cache.insert(key, CachedEntry {
|
||||
protocol,
|
||||
detected_at: Instant::now(),
|
||||
});
|
||||
}
|
||||
|
||||
/// Clear all entries. Called on route updates to discard stale detections.
|
||||
pub fn clear(&self) {
|
||||
self.cache.clear();
|
||||
}
|
||||
|
||||
/// Background cleanup loop — removes expired entries every `PROTOCOL_CACHE_CLEANUP_INTERVAL`.
|
||||
async fn cleanup_loop(cache: Arc<DashMap<ProtocolCacheKey, CachedEntry>>) {
|
||||
let mut interval = tokio::time::interval(PROTOCOL_CACHE_CLEANUP_INTERVAL);
|
||||
loop {
|
||||
interval.tick().await;
|
||||
|
||||
let expired: Vec<ProtocolCacheKey> = cache.iter()
|
||||
.filter(|entry| entry.value().detected_at.elapsed() >= PROTOCOL_CACHE_TTL)
|
||||
.map(|entry| entry.key().clone())
|
||||
.collect();
|
||||
|
||||
if !expired.is_empty() {
|
||||
debug!("Protocol cache cleanup: removing {} expired entries", expired.len());
|
||||
for key in expired {
|
||||
cache.remove(&key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ProtocolCache {
|
||||
fn drop(&mut self) {
|
||||
if let Some(handle) = self.cleanup_handle.take() {
|
||||
handle.abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -3,6 +3,7 @@ use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashSet;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::Mutex;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::throughput::{ThroughputSample, ThroughputTracker};
|
||||
|
||||
@@ -20,6 +21,7 @@ pub struct Metrics {
|
||||
pub throughput_recent_out_bytes_per_sec: u64,
|
||||
pub routes: std::collections::HashMap<String, RouteMetrics>,
|
||||
pub ips: std::collections::HashMap<String, IpMetrics>,
|
||||
pub backends: std::collections::HashMap<String, BackendMetrics>,
|
||||
pub throughput_history: Vec<ThroughputSample>,
|
||||
pub total_http_requests: u64,
|
||||
pub http_requests_per_sec: u64,
|
||||
@@ -52,6 +54,23 @@ pub struct IpMetrics {
|
||||
pub throughput_out_bytes_per_sec: u64,
|
||||
}
|
||||
|
||||
/// Per-backend metrics (keyed by "host:port").
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct BackendMetrics {
|
||||
pub active_connections: u64,
|
||||
pub total_connections: u64,
|
||||
pub protocol: String,
|
||||
pub connect_errors: u64,
|
||||
pub handshake_errors: u64,
|
||||
pub request_errors: u64,
|
||||
pub total_connect_time_us: u64,
|
||||
pub connect_count: u64,
|
||||
pub pool_hits: u64,
|
||||
pub pool_misses: u64,
|
||||
pub h2_failures: u64,
|
||||
}
|
||||
|
||||
/// Statistics snapshot.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@@ -69,6 +88,9 @@ const DEFAULT_RETENTION_SECONDS: usize = 3600;
|
||||
/// Maximum number of IPs to include in a snapshot (top by active connections).
|
||||
const MAX_IPS_IN_SNAPSHOT: usize = 100;
|
||||
|
||||
/// Maximum number of backends to include in a snapshot (top by total connections).
|
||||
const MAX_BACKENDS_IN_SNAPSHOT: usize = 100;
|
||||
|
||||
/// Metrics collector tracking connections and throughput.
|
||||
///
|
||||
/// Design: The hot path (`record_bytes`) is entirely lock-free — it only touches
|
||||
@@ -96,6 +118,19 @@ pub struct MetricsCollector {
|
||||
ip_pending_tp: DashMap<String, (AtomicU64, AtomicU64)>,
|
||||
ip_throughput: DashMap<String, Mutex<ThroughputTracker>>,
|
||||
|
||||
// ── Per-backend tracking (keyed by "host:port") ──
|
||||
backend_active: DashMap<String, AtomicU64>,
|
||||
backend_total: DashMap<String, AtomicU64>,
|
||||
backend_protocol: DashMap<String, String>,
|
||||
backend_connect_errors: DashMap<String, AtomicU64>,
|
||||
backend_handshake_errors: DashMap<String, AtomicU64>,
|
||||
backend_request_errors: DashMap<String, AtomicU64>,
|
||||
backend_connect_time_us: DashMap<String, AtomicU64>,
|
||||
backend_connect_count: DashMap<String, AtomicU64>,
|
||||
backend_pool_hits: DashMap<String, AtomicU64>,
|
||||
backend_pool_misses: DashMap<String, AtomicU64>,
|
||||
backend_h2_failures: DashMap<String, AtomicU64>,
|
||||
|
||||
// ── HTTP request tracking ──
|
||||
total_http_requests: AtomicU64,
|
||||
pending_http_requests: AtomicU64,
|
||||
@@ -134,6 +169,17 @@ impl MetricsCollector {
|
||||
ip_bytes_out: DashMap::new(),
|
||||
ip_pending_tp: DashMap::new(),
|
||||
ip_throughput: DashMap::new(),
|
||||
backend_active: DashMap::new(),
|
||||
backend_total: DashMap::new(),
|
||||
backend_protocol: DashMap::new(),
|
||||
backend_connect_errors: DashMap::new(),
|
||||
backend_handshake_errors: DashMap::new(),
|
||||
backend_request_errors: DashMap::new(),
|
||||
backend_connect_time_us: DashMap::new(),
|
||||
backend_connect_count: DashMap::new(),
|
||||
backend_pool_hits: DashMap::new(),
|
||||
backend_pool_misses: DashMap::new(),
|
||||
backend_h2_failures: DashMap::new(),
|
||||
total_http_requests: AtomicU64::new(0),
|
||||
pending_http_requests: AtomicU64::new(0),
|
||||
http_request_throughput: Mutex::new(ThroughputTracker::new(retention_seconds)),
|
||||
@@ -268,6 +314,113 @@ impl MetricsCollector {
|
||||
self.pending_http_requests.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// ── Per-backend recording methods ──
|
||||
|
||||
/// Record a successful backend connection with its connect duration.
|
||||
pub fn backend_connection_opened(&self, key: &str, connect_time: Duration) {
|
||||
self.backend_active
|
||||
.entry(key.to_string())
|
||||
.or_insert_with(|| AtomicU64::new(0))
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
self.backend_total
|
||||
.entry(key.to_string())
|
||||
.or_insert_with(|| AtomicU64::new(0))
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
self.backend_connect_time_us
|
||||
.entry(key.to_string())
|
||||
.or_insert_with(|| AtomicU64::new(0))
|
||||
.fetch_add(connect_time.as_micros() as u64, Ordering::Relaxed);
|
||||
self.backend_connect_count
|
||||
.entry(key.to_string())
|
||||
.or_insert_with(|| AtomicU64::new(0))
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Record a backend connection closing.
|
||||
pub fn backend_connection_closed(&self, key: &str) {
|
||||
if let Some(counter) = self.backend_active.get(key) {
|
||||
let val = counter.load(Ordering::Relaxed);
|
||||
if val > 0 {
|
||||
counter.fetch_sub(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Record a backend connect error (TCP or TLS connect failure/timeout).
|
||||
pub fn backend_connect_error(&self, key: &str) {
|
||||
self.backend_connect_errors
|
||||
.entry(key.to_string())
|
||||
.or_insert_with(|| AtomicU64::new(0))
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Record a backend handshake error (H1 or H2 handshake failure).
|
||||
pub fn backend_handshake_error(&self, key: &str) {
|
||||
self.backend_handshake_errors
|
||||
.entry(key.to_string())
|
||||
.or_insert_with(|| AtomicU64::new(0))
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Record a backend request error (send_request failure).
|
||||
pub fn backend_request_error(&self, key: &str) {
|
||||
self.backend_request_errors
|
||||
.entry(key.to_string())
|
||||
.or_insert_with(|| AtomicU64::new(0))
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Record a connection pool hit for a backend.
|
||||
pub fn backend_pool_hit(&self, key: &str) {
|
||||
self.backend_pool_hits
|
||||
.entry(key.to_string())
|
||||
.or_insert_with(|| AtomicU64::new(0))
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Record a connection pool miss for a backend.
|
||||
pub fn backend_pool_miss(&self, key: &str) {
|
||||
self.backend_pool_misses
|
||||
.entry(key.to_string())
|
||||
.or_insert_with(|| AtomicU64::new(0))
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Record an H2 failure (h2 attempted but fell back to h1).
|
||||
pub fn backend_h2_failure(&self, key: &str) {
|
||||
self.backend_h2_failures
|
||||
.entry(key.to_string())
|
||||
.or_insert_with(|| AtomicU64::new(0))
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Set the protocol in use for a backend ("h1" or "h2").
|
||||
pub fn set_backend_protocol(&self, key: &str, protocol: &str) {
|
||||
self.backend_protocol
|
||||
.entry(key.to_string())
|
||||
.and_modify(|v| {
|
||||
if v != protocol {
|
||||
*v = protocol.to_string();
|
||||
}
|
||||
})
|
||||
.or_insert_with(|| protocol.to_string());
|
||||
}
|
||||
|
||||
/// Remove per-backend metrics for backends no longer in any route target.
|
||||
pub fn retain_backends(&self, active_backends: &HashSet<String>) {
|
||||
self.backend_active.retain(|k, _| active_backends.contains(k));
|
||||
self.backend_total.retain(|k, _| active_backends.contains(k));
|
||||
self.backend_protocol.retain(|k, _| active_backends.contains(k));
|
||||
self.backend_connect_errors.retain(|k, _| active_backends.contains(k));
|
||||
self.backend_handshake_errors.retain(|k, _| active_backends.contains(k));
|
||||
self.backend_request_errors.retain(|k, _| active_backends.contains(k));
|
||||
self.backend_connect_time_us.retain(|k, _| active_backends.contains(k));
|
||||
self.backend_connect_count.retain(|k, _| active_backends.contains(k));
|
||||
self.backend_pool_hits.retain(|k, _| active_backends.contains(k));
|
||||
self.backend_pool_misses.retain(|k, _| active_backends.contains(k));
|
||||
self.backend_h2_failures.retain(|k, _| active_backends.contains(k));
|
||||
}
|
||||
|
||||
/// Take a throughput sample on all trackers (cold path, call at 1Hz or configured interval).
|
||||
///
|
||||
/// Drains the lock-free pending counters and feeds the accumulated bytes
|
||||
@@ -488,6 +641,72 @@ impl MetricsCollector {
|
||||
});
|
||||
}
|
||||
|
||||
// Collect per-backend metrics, capped at top MAX_BACKENDS_IN_SNAPSHOT by total connections
|
||||
let mut backend_entries: Vec<(String, BackendMetrics)> = Vec::new();
|
||||
for entry in self.backend_total.iter() {
|
||||
let key = entry.key().clone();
|
||||
let total = entry.value().load(Ordering::Relaxed);
|
||||
let active = self.backend_active
|
||||
.get(&key)
|
||||
.map(|c| c.load(Ordering::Relaxed))
|
||||
.unwrap_or(0);
|
||||
let protocol = self.backend_protocol
|
||||
.get(&key)
|
||||
.map(|v| v.value().clone())
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
let connect_errors = self.backend_connect_errors
|
||||
.get(&key)
|
||||
.map(|c| c.load(Ordering::Relaxed))
|
||||
.unwrap_or(0);
|
||||
let handshake_errors = self.backend_handshake_errors
|
||||
.get(&key)
|
||||
.map(|c| c.load(Ordering::Relaxed))
|
||||
.unwrap_or(0);
|
||||
let request_errors = self.backend_request_errors
|
||||
.get(&key)
|
||||
.map(|c| c.load(Ordering::Relaxed))
|
||||
.unwrap_or(0);
|
||||
let total_connect_time_us = self.backend_connect_time_us
|
||||
.get(&key)
|
||||
.map(|c| c.load(Ordering::Relaxed))
|
||||
.unwrap_or(0);
|
||||
let connect_count = self.backend_connect_count
|
||||
.get(&key)
|
||||
.map(|c| c.load(Ordering::Relaxed))
|
||||
.unwrap_or(0);
|
||||
let pool_hits = self.backend_pool_hits
|
||||
.get(&key)
|
||||
.map(|c| c.load(Ordering::Relaxed))
|
||||
.unwrap_or(0);
|
||||
let pool_misses = self.backend_pool_misses
|
||||
.get(&key)
|
||||
.map(|c| c.load(Ordering::Relaxed))
|
||||
.unwrap_or(0);
|
||||
let h2_failures = self.backend_h2_failures
|
||||
.get(&key)
|
||||
.map(|c| c.load(Ordering::Relaxed))
|
||||
.unwrap_or(0);
|
||||
|
||||
backend_entries.push((key, BackendMetrics {
|
||||
active_connections: active,
|
||||
total_connections: total,
|
||||
protocol,
|
||||
connect_errors,
|
||||
handshake_errors,
|
||||
request_errors,
|
||||
total_connect_time_us,
|
||||
connect_count,
|
||||
pool_hits,
|
||||
pool_misses,
|
||||
h2_failures,
|
||||
}));
|
||||
}
|
||||
// Sort by total connections descending, then cap
|
||||
backend_entries.sort_by(|a, b| b.1.total_connections.cmp(&a.1.total_connections));
|
||||
backend_entries.truncate(MAX_BACKENDS_IN_SNAPSHOT);
|
||||
|
||||
let backends: std::collections::HashMap<String, BackendMetrics> = backend_entries.into_iter().collect();
|
||||
|
||||
// HTTP request rates
|
||||
let (http_rps, http_rps_recent) = self.http_request_throughput
|
||||
.lock()
|
||||
@@ -509,6 +728,7 @@ impl MetricsCollector {
|
||||
throughput_recent_out_bytes_per_sec: global_recent_out,
|
||||
routes,
|
||||
ips,
|
||||
backends,
|
||||
throughput_history,
|
||||
total_http_requests: self.total_http_requests.load(Ordering::Relaxed),
|
||||
http_requests_per_sec: http_rps,
|
||||
@@ -805,4 +1025,120 @@ mod tests {
|
||||
assert_eq!(snapshot.throughput_history[0].bytes_in, 100);
|
||||
assert_eq!(snapshot.throughput_history[4].bytes_in, 500);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_backend_metrics_basic() {
|
||||
let collector = MetricsCollector::new();
|
||||
let key = "backend1:8080";
|
||||
|
||||
// Open connections with timing
|
||||
collector.backend_connection_opened(key, Duration::from_millis(15));
|
||||
collector.backend_connection_opened(key, Duration::from_millis(25));
|
||||
|
||||
assert_eq!(collector.backend_active.get(key).unwrap().load(Ordering::Relaxed), 2);
|
||||
assert_eq!(collector.backend_total.get(key).unwrap().load(Ordering::Relaxed), 2);
|
||||
assert_eq!(collector.backend_connect_count.get(key).unwrap().load(Ordering::Relaxed), 2);
|
||||
// 15ms + 25ms = 40ms = 40_000us
|
||||
assert_eq!(collector.backend_connect_time_us.get(key).unwrap().load(Ordering::Relaxed), 40_000);
|
||||
|
||||
// Close one
|
||||
collector.backend_connection_closed(key);
|
||||
assert_eq!(collector.backend_active.get(key).unwrap().load(Ordering::Relaxed), 1);
|
||||
// total stays
|
||||
assert_eq!(collector.backend_total.get(key).unwrap().load(Ordering::Relaxed), 2);
|
||||
|
||||
// Record errors
|
||||
collector.backend_connect_error(key);
|
||||
collector.backend_handshake_error(key);
|
||||
collector.backend_request_error(key);
|
||||
collector.backend_h2_failure(key);
|
||||
collector.backend_pool_hit(key);
|
||||
collector.backend_pool_hit(key);
|
||||
collector.backend_pool_miss(key);
|
||||
|
||||
assert_eq!(collector.backend_connect_errors.get(key).unwrap().load(Ordering::Relaxed), 1);
|
||||
assert_eq!(collector.backend_handshake_errors.get(key).unwrap().load(Ordering::Relaxed), 1);
|
||||
assert_eq!(collector.backend_request_errors.get(key).unwrap().load(Ordering::Relaxed), 1);
|
||||
assert_eq!(collector.backend_h2_failures.get(key).unwrap().load(Ordering::Relaxed), 1);
|
||||
assert_eq!(collector.backend_pool_hits.get(key).unwrap().load(Ordering::Relaxed), 2);
|
||||
assert_eq!(collector.backend_pool_misses.get(key).unwrap().load(Ordering::Relaxed), 1);
|
||||
|
||||
// Protocol
|
||||
collector.set_backend_protocol(key, "h1");
|
||||
assert_eq!(collector.backend_protocol.get(key).unwrap().value(), "h1");
|
||||
collector.set_backend_protocol(key, "h2");
|
||||
assert_eq!(collector.backend_protocol.get(key).unwrap().value(), "h2");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_backend_metrics_in_snapshot() {
|
||||
let collector = MetricsCollector::new();
|
||||
|
||||
collector.backend_connection_opened("b1:443", Duration::from_millis(10));
|
||||
collector.backend_connection_opened("b2:8080", Duration::from_millis(20));
|
||||
collector.set_backend_protocol("b1:443", "h2");
|
||||
collector.set_backend_protocol("b2:8080", "h1");
|
||||
collector.backend_connect_error("b1:443");
|
||||
|
||||
let snapshot = collector.snapshot();
|
||||
assert_eq!(snapshot.backends.len(), 2);
|
||||
|
||||
let b1 = snapshot.backends.get("b1:443").unwrap();
|
||||
assert_eq!(b1.active_connections, 1);
|
||||
assert_eq!(b1.total_connections, 1);
|
||||
assert_eq!(b1.protocol, "h2");
|
||||
assert_eq!(b1.connect_errors, 1);
|
||||
assert_eq!(b1.total_connect_time_us, 10_000);
|
||||
assert_eq!(b1.connect_count, 1);
|
||||
|
||||
let b2 = snapshot.backends.get("b2:8080").unwrap();
|
||||
assert_eq!(b2.protocol, "h1");
|
||||
assert_eq!(b2.connect_errors, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_retain_backends_prunes_stale() {
|
||||
let collector = MetricsCollector::new();
|
||||
|
||||
collector.backend_connection_opened("active:443", Duration::from_millis(5));
|
||||
collector.backend_connection_opened("stale:8080", Duration::from_millis(10));
|
||||
collector.set_backend_protocol("active:443", "h1");
|
||||
collector.set_backend_protocol("stale:8080", "h2");
|
||||
collector.backend_connect_error("stale:8080");
|
||||
|
||||
let active = HashSet::from(["active:443".to_string()]);
|
||||
collector.retain_backends(&active);
|
||||
|
||||
// active:443 should still exist
|
||||
assert!(collector.backend_total.get("active:443").is_some());
|
||||
assert!(collector.backend_protocol.get("active:443").is_some());
|
||||
|
||||
// stale:8080 should be fully removed
|
||||
assert!(collector.backend_active.get("stale:8080").is_none());
|
||||
assert!(collector.backend_total.get("stale:8080").is_none());
|
||||
assert!(collector.backend_protocol.get("stale:8080").is_none());
|
||||
assert!(collector.backend_connect_errors.get("stale:8080").is_none());
|
||||
assert!(collector.backend_connect_time_us.get("stale:8080").is_none());
|
||||
assert!(collector.backend_connect_count.get("stale:8080").is_none());
|
||||
assert!(collector.backend_pool_hits.get("stale:8080").is_none());
|
||||
assert!(collector.backend_pool_misses.get("stale:8080").is_none());
|
||||
assert!(collector.backend_h2_failures.get("stale:8080").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_backend_connection_closed_saturates() {
|
||||
let collector = MetricsCollector::new();
|
||||
let key = "b:80";
|
||||
|
||||
// Close without opening — should not underflow
|
||||
collector.backend_connection_closed(key);
|
||||
// No entry created
|
||||
assert!(collector.backend_active.get(key).is_none());
|
||||
|
||||
// Open one, close two — should saturate at 0
|
||||
collector.backend_connection_opened(key, Duration::from_millis(1));
|
||||
collector.backend_connection_closed(key);
|
||||
collector.backend_connection_closed(key);
|
||||
assert_eq!(collector.backend_active.get(key).unwrap().load(Ordering::Relaxed), 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use arc_swap::ArcSwap;
|
||||
use dashmap::DashMap;
|
||||
use tokio::net::TcpListener;
|
||||
use tokio_rustls::TlsAcceptor;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -118,10 +119,10 @@ pub struct ConnectionConfig {
|
||||
impl Default for ConnectionConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
connection_timeout_ms: 30_000,
|
||||
connection_timeout_ms: 60_000,
|
||||
initial_data_timeout_ms: 60_000,
|
||||
socket_timeout_ms: 3_600_000,
|
||||
max_connection_lifetime_ms: 86_400_000,
|
||||
socket_timeout_ms: 60_000,
|
||||
max_connection_lifetime_ms: 3_600_000,
|
||||
graceful_shutdown_timeout_ms: 30_000,
|
||||
max_connections_per_ip: None,
|
||||
connection_rate_limit_per_minute: None,
|
||||
@@ -162,18 +163,28 @@ pub struct TcpListenerManager {
|
||||
socket_handler_relay: Arc<std::sync::RwLock<Option<String>>>,
|
||||
/// Global connection semaphore — limits total simultaneous connections.
|
||||
conn_semaphore: Arc<tokio::sync::Semaphore>,
|
||||
/// Per-route cancellation tokens (child of cancel_token).
|
||||
/// When a route is removed, its token is cancelled, terminating all connections on that route.
|
||||
route_cancels: Arc<DashMap<String, CancellationToken>>,
|
||||
}
|
||||
|
||||
impl TcpListenerManager {
|
||||
pub fn new(route_manager: Arc<RouteManager>) -> Self {
|
||||
let metrics = Arc::new(MetricsCollector::new());
|
||||
let conn_config = ConnectionConfig::default();
|
||||
let route_manager_swap = Arc::new(ArcSwap::from(route_manager));
|
||||
let mut http_proxy_svc = HttpProxyService::with_connect_timeout(
|
||||
Arc::clone(&route_manager),
|
||||
Arc::clone(&route_manager_swap),
|
||||
Arc::clone(&metrics),
|
||||
std::time::Duration::from_millis(conn_config.connection_timeout_ms),
|
||||
);
|
||||
http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config());
|
||||
http_proxy_svc.set_backend_tls_config_alpn(tls_handler::shared_backend_tls_config_alpn());
|
||||
http_proxy_svc.set_connection_timeouts(
|
||||
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
|
||||
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
|
||||
std::time::Duration::from_millis(conn_config.max_connection_lifetime_ms),
|
||||
);
|
||||
let http_proxy = Arc::new(http_proxy_svc);
|
||||
let conn_tracker = Arc::new(ConnectionTracker::new(
|
||||
conn_config.max_connections_per_ip,
|
||||
@@ -182,7 +193,7 @@ impl TcpListenerManager {
|
||||
let max_conns = conn_config.max_connections as usize;
|
||||
Self {
|
||||
listeners: HashMap::new(),
|
||||
route_manager: Arc::new(ArcSwap::from(route_manager)),
|
||||
route_manager: route_manager_swap,
|
||||
metrics,
|
||||
tls_configs: Arc::new(ArcSwap::from(Arc::new(HashMap::new()))),
|
||||
shared_tls_acceptor: Arc::new(ArcSwap::from(Arc::new(None))),
|
||||
@@ -192,18 +203,26 @@ impl TcpListenerManager {
|
||||
cancel_token: CancellationToken::new(),
|
||||
socket_handler_relay: Arc::new(std::sync::RwLock::new(None)),
|
||||
conn_semaphore: Arc::new(tokio::sync::Semaphore::new(max_conns)),
|
||||
route_cancels: Arc::new(DashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create with a metrics collector.
|
||||
pub fn with_metrics(route_manager: Arc<RouteManager>, metrics: Arc<MetricsCollector>) -> Self {
|
||||
let conn_config = ConnectionConfig::default();
|
||||
let route_manager_swap = Arc::new(ArcSwap::from(route_manager));
|
||||
let mut http_proxy_svc = HttpProxyService::with_connect_timeout(
|
||||
Arc::clone(&route_manager),
|
||||
Arc::clone(&route_manager_swap),
|
||||
Arc::clone(&metrics),
|
||||
std::time::Duration::from_millis(conn_config.connection_timeout_ms),
|
||||
);
|
||||
http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config());
|
||||
http_proxy_svc.set_backend_tls_config_alpn(tls_handler::shared_backend_tls_config_alpn());
|
||||
http_proxy_svc.set_connection_timeouts(
|
||||
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
|
||||
std::time::Duration::from_millis(conn_config.socket_timeout_ms),
|
||||
std::time::Duration::from_millis(conn_config.max_connection_lifetime_ms),
|
||||
);
|
||||
let http_proxy = Arc::new(http_proxy_svc);
|
||||
let conn_tracker = Arc::new(ConnectionTracker::new(
|
||||
conn_config.max_connections_per_ip,
|
||||
@@ -212,7 +231,7 @@ impl TcpListenerManager {
|
||||
let max_conns = conn_config.max_connections as usize;
|
||||
Self {
|
||||
listeners: HashMap::new(),
|
||||
route_manager: Arc::new(ArcSwap::from(route_manager)),
|
||||
route_manager: route_manager_swap,
|
||||
metrics,
|
||||
tls_configs: Arc::new(ArcSwap::from(Arc::new(HashMap::new()))),
|
||||
shared_tls_acceptor: Arc::new(ArcSwap::from(Arc::new(None))),
|
||||
@@ -222,6 +241,7 @@ impl TcpListenerManager {
|
||||
cancel_token: CancellationToken::new(),
|
||||
socket_handler_relay: Arc::new(std::sync::RwLock::new(None)),
|
||||
conn_semaphore: Arc::new(tokio::sync::Semaphore::new(max_conns)),
|
||||
route_cancels: Arc::new(DashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -232,6 +252,22 @@ impl TcpListenerManager {
|
||||
config.connection_rate_limit_per_minute,
|
||||
));
|
||||
self.conn_semaphore = Arc::new(tokio::sync::Semaphore::new(config.max_connections as usize));
|
||||
|
||||
// Rebuild http_proxy with updated timeouts (shares the same ArcSwap<RouteManager>)
|
||||
let mut http_proxy_svc = HttpProxyService::with_connect_timeout(
|
||||
Arc::clone(&self.route_manager),
|
||||
Arc::clone(&self.metrics),
|
||||
std::time::Duration::from_millis(config.connection_timeout_ms),
|
||||
);
|
||||
http_proxy_svc.set_backend_tls_config(tls_handler::shared_backend_tls_config());
|
||||
http_proxy_svc.set_backend_tls_config_alpn(tls_handler::shared_backend_tls_config_alpn());
|
||||
http_proxy_svc.set_connection_timeouts(
|
||||
std::time::Duration::from_millis(config.socket_timeout_ms),
|
||||
std::time::Duration::from_millis(config.socket_timeout_ms),
|
||||
std::time::Duration::from_millis(config.max_connection_lifetime_ms),
|
||||
);
|
||||
self.http_proxy = Arc::new(http_proxy_svc);
|
||||
|
||||
self.conn_config = Arc::new(config);
|
||||
}
|
||||
|
||||
@@ -288,12 +324,13 @@ impl TcpListenerManager {
|
||||
let cancel = self.cancel_token.clone();
|
||||
let relay = Arc::clone(&self.socket_handler_relay);
|
||||
let semaphore = Arc::clone(&self.conn_semaphore);
|
||||
let route_cancels = Arc::clone(&self.route_cancels);
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
Self::accept_loop(
|
||||
listener, port, route_manager_swap, metrics, tls_configs,
|
||||
shared_tls_acceptor, http_proxy, conn_config, conn_tracker, cancel, relay,
|
||||
semaphore,
|
||||
semaphore, route_cancels,
|
||||
).await;
|
||||
});
|
||||
|
||||
@@ -336,13 +373,15 @@ impl TcpListenerManager {
|
||||
|
||||
for (port, handle) in self.listeners.drain() {
|
||||
let remaining = deadline.saturating_duration_since(tokio::time::Instant::now());
|
||||
let abort_handle = handle.abort_handle();
|
||||
if remaining.is_zero() {
|
||||
handle.abort();
|
||||
abort_handle.abort();
|
||||
warn!("Force-stopped listener on port {} (timeout exceeded)", port);
|
||||
} else {
|
||||
match tokio::time::timeout(remaining, handle).await {
|
||||
Ok(_) => info!("Listener on port {} stopped gracefully", port),
|
||||
Err(_) => {
|
||||
abort_handle.abort();
|
||||
warn!("Listener on port {} did not stop in time, aborting", port);
|
||||
}
|
||||
}
|
||||
@@ -370,6 +409,20 @@ impl TcpListenerManager {
|
||||
self.route_manager.store(route_manager);
|
||||
}
|
||||
|
||||
/// Cancel connections on routes that no longer exist in the active set.
|
||||
/// Existing connections on removed routes are terminated via their per-route CancellationToken.
|
||||
pub fn invalidate_removed_routes(&self, active_route_ids: &std::collections::HashSet<String>) {
|
||||
self.route_cancels.retain(|id, token| {
|
||||
if active_route_ids.contains(id) {
|
||||
true
|
||||
} else {
|
||||
info!("Cancelling connections for removed route '{}'", id);
|
||||
token.cancel();
|
||||
false // remove cancelled token from map
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Prune HTTP proxy caches for route IDs that are no longer active.
|
||||
pub fn prune_http_proxy_caches(&self, active_route_ids: &std::collections::HashSet<String>) {
|
||||
self.http_proxy.prune_stale_routes(active_route_ids);
|
||||
@@ -399,6 +452,7 @@ impl TcpListenerManager {
|
||||
cancel: CancellationToken,
|
||||
socket_handler_relay: Arc<std::sync::RwLock<Option<String>>>,
|
||||
conn_semaphore: Arc<tokio::sync::Semaphore>,
|
||||
route_cancels: Arc<DashMap<String, CancellationToken>>,
|
||||
) {
|
||||
loop {
|
||||
tokio::select! {
|
||||
@@ -453,6 +507,7 @@ impl TcpListenerManager {
|
||||
let ct = Arc::clone(&conn_tracker);
|
||||
let cn = cancel.clone();
|
||||
let sr = Arc::clone(&socket_handler_relay);
|
||||
let rc = Arc::clone(&route_cancels);
|
||||
debug!("Accepted connection from {} on port {}", peer_addr, port);
|
||||
|
||||
tokio::spawn(async move {
|
||||
@@ -461,7 +516,7 @@ impl TcpListenerManager {
|
||||
// RAII guard ensures connection_closed is called on all paths
|
||||
let _ct_guard = ConnectionTrackerGuard::new(ct, ip);
|
||||
let result = Self::handle_connection(
|
||||
stream, port, peer_addr, rm, m, tc, sa, hp, cc, cn, sr,
|
||||
stream, port, peer_addr, rm, m, tc, sa, hp, cc, cn, sr, rc,
|
||||
).await;
|
||||
if let Err(e) = result {
|
||||
debug!("Connection error from {}: {}", peer_addr, e);
|
||||
@@ -491,6 +546,7 @@ impl TcpListenerManager {
|
||||
conn_config: Arc<ConnectionConfig>,
|
||||
cancel: CancellationToken,
|
||||
socket_handler_relay: Arc<std::sync::RwLock<Option<String>>>,
|
||||
route_cancels: Arc<DashMap<String, CancellationToken>>,
|
||||
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
use tokio::io::AsyncReadExt;
|
||||
|
||||
@@ -595,6 +651,14 @@ impl TcpListenerManager {
|
||||
let target_port = target.port.resolve(port);
|
||||
let route_id = quick_match.route.id.as_deref();
|
||||
|
||||
// Resolve per-route cancel token (child of global cancel)
|
||||
let conn_cancel = match route_id {
|
||||
Some(id) => route_cancels.entry(id.to_string())
|
||||
.or_insert_with(|| cancel.child_token())
|
||||
.clone(),
|
||||
None => cancel.clone(),
|
||||
};
|
||||
|
||||
// Check route-level IP security
|
||||
if let Some(ref security) = quick_match.route.security {
|
||||
if !rustproxy_http::request_filter::RequestFilter::check_ip_security(
|
||||
@@ -649,7 +713,7 @@ impl TcpListenerManager {
|
||||
|
||||
let (_bytes_in, _bytes_out) = forwarder::forward_bidirectional_with_timeouts(
|
||||
stream, backend_w, None,
|
||||
inactivity_timeout, max_lifetime, cancel,
|
||||
inactivity_timeout, max_lifetime, conn_cancel,
|
||||
Some(forwarder::ForwardMetricsCtx {
|
||||
collector: Arc::clone(&metrics),
|
||||
route_id: route_id.map(|s| s.to_string()),
|
||||
@@ -659,7 +723,7 @@ impl TcpListenerManager {
|
||||
} else {
|
||||
let (_bytes_in, _bytes_out) = forwarder::forward_bidirectional_with_timeouts(
|
||||
stream, backend, None,
|
||||
inactivity_timeout, max_lifetime, cancel,
|
||||
inactivity_timeout, max_lifetime, conn_cancel,
|
||||
Some(forwarder::ForwardMetricsCtx {
|
||||
collector: Arc::clone(&metrics),
|
||||
route_id: route_id.map(|s| s.to_string()),
|
||||
@@ -764,6 +828,16 @@ impl TcpListenerManager {
|
||||
|
||||
let route_id = route_match.route.id.as_deref();
|
||||
|
||||
// Resolve per-route cancel token (child of global cancel).
|
||||
// When this route is removed via updateRoutes, the token is cancelled,
|
||||
// terminating all connections on this route.
|
||||
let cancel = match route_id {
|
||||
Some(id) => route_cancels.entry(id.to_string())
|
||||
.or_insert_with(|| cancel.child_token())
|
||||
.clone(),
|
||||
None => cancel,
|
||||
};
|
||||
|
||||
// Check route-level IP security for passthrough connections
|
||||
if let Some(ref security) = route_match.route.security {
|
||||
if !rustproxy_http::request_filter::RequestFilter::check_ip_security(
|
||||
@@ -791,7 +865,8 @@ impl TcpListenerManager {
|
||||
stream, n, port, peer_addr,
|
||||
&route_match, domain.as_deref(), is_tls,
|
||||
&relay_socket_path,
|
||||
&metrics, route_id,
|
||||
Arc::clone(&metrics), route_id,
|
||||
&conn_config, cancel.clone(),
|
||||
).await;
|
||||
} else {
|
||||
debug!("Socket-handler route matched but no relay path configured");
|
||||
@@ -964,7 +1039,7 @@ impl TcpListenerManager {
|
||||
|
||||
let (_bytes_in, _bytes_out) = Self::forward_bidirectional_split_with_timeouts(
|
||||
tls_read, tls_write, backend_read, backend_write,
|
||||
inactivity_timeout, max_lifetime,
|
||||
inactivity_timeout, max_lifetime, cancel.clone(),
|
||||
Some(forwarder::ForwardMetricsCtx {
|
||||
collector: Arc::clone(&metrics),
|
||||
route_id: route_id.map(|s| s.to_string()),
|
||||
@@ -1023,7 +1098,7 @@ impl TcpListenerManager {
|
||||
Self::handle_tls_reencrypt_tunnel(
|
||||
buf_stream, &target_host, target_port,
|
||||
peer_addr, Arc::clone(&metrics), route_id,
|
||||
&conn_config,
|
||||
&conn_config, cancel.clone(),
|
||||
).await?;
|
||||
}
|
||||
Ok(())
|
||||
@@ -1100,8 +1175,10 @@ impl TcpListenerManager {
|
||||
domain: Option<&str>,
|
||||
is_tls: bool,
|
||||
relay_path: &str,
|
||||
metrics: &MetricsCollector,
|
||||
metrics: Arc<MetricsCollector>,
|
||||
route_id: Option<&str>,
|
||||
conn_config: &ConnectionConfig,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use tokio::net::UnixStream;
|
||||
@@ -1141,27 +1218,34 @@ impl TcpListenerManager {
|
||||
// Forward initial data to the Unix socket
|
||||
unix_stream.write_all(&initial_buf).await?;
|
||||
|
||||
// Bidirectional relay between TCP client and Unix socket handler
|
||||
// Bidirectional relay with inactivity timeout, max lifetime, and cancellation.
|
||||
// Split both streams and use the same watchdog pattern as other forwarding paths.
|
||||
let initial_len = initial_buf.len() as u64;
|
||||
match tokio::io::copy_bidirectional(&mut stream, &mut unix_stream).await {
|
||||
Ok((c2s, s2c)) => {
|
||||
// Include initial data bytes that were forwarded before copy_bidirectional
|
||||
let total_in = c2s + initial_len;
|
||||
debug!("Socket handler relay complete for {}: {} bytes in, {} bytes out",
|
||||
route_key, total_in, s2c);
|
||||
let ip = peer_addr.ip().to_string();
|
||||
metrics.record_bytes(total_in, s2c, route_id, Some(&ip));
|
||||
}
|
||||
Err(e) => {
|
||||
// Still record the initial data even on error
|
||||
if initial_len > 0 {
|
||||
let ip = peer_addr.ip().to_string();
|
||||
metrics.record_bytes(initial_len, 0, route_id, Some(&ip));
|
||||
}
|
||||
debug!("Socket handler relay ended for {}: {}", route_key, e);
|
||||
}
|
||||
let inactivity_timeout = std::time::Duration::from_millis(conn_config.socket_timeout_ms);
|
||||
let max_lifetime = std::time::Duration::from_millis(conn_config.max_connection_lifetime_ms);
|
||||
|
||||
let (tcp_read, tcp_write) = stream.into_split();
|
||||
let (unix_read, unix_write) = unix_stream.into_split();
|
||||
|
||||
let ip_str = peer_addr.ip().to_string();
|
||||
let (_bytes_in, _bytes_out) = Self::forward_bidirectional_split_with_timeouts(
|
||||
tcp_read, tcp_write, unix_read, unix_write,
|
||||
inactivity_timeout, max_lifetime, cancel,
|
||||
Some(forwarder::ForwardMetricsCtx {
|
||||
collector: Arc::clone(&metrics),
|
||||
route_id: route_id.map(|s| s.to_string()),
|
||||
source_ip: Some(ip_str.clone()),
|
||||
}),
|
||||
).await;
|
||||
|
||||
// Include the initial data that was forwarded before the bidirectional relay
|
||||
if initial_len > 0 {
|
||||
metrics.record_bytes(initial_len, 0, route_id, Some(&ip_str));
|
||||
}
|
||||
|
||||
debug!("Socket handler relay complete for {}: {} bytes in, {} bytes out",
|
||||
route_key, _bytes_in + initial_len, _bytes_out);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1176,6 +1260,7 @@ impl TcpListenerManager {
|
||||
metrics: Arc<MetricsCollector>,
|
||||
route_id: Option<&str>,
|
||||
conn_config: &ConnectionConfig,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
// Connect to backend over TLS with timeout
|
||||
let backend_tls = match tokio::time::timeout(
|
||||
@@ -1220,7 +1305,7 @@ impl TcpListenerManager {
|
||||
|
||||
let (_bytes_in, _bytes_out) = Self::forward_bidirectional_split_with_timeouts(
|
||||
client_read, client_write, backend_read, backend_write,
|
||||
inactivity_timeout, max_lifetime,
|
||||
inactivity_timeout, max_lifetime, cancel,
|
||||
Some(forwarder::ForwardMetricsCtx {
|
||||
collector: metrics,
|
||||
route_id: route_id.map(|s| s.to_string()),
|
||||
@@ -1295,6 +1380,7 @@ impl TcpListenerManager {
|
||||
mut backend_write: W2,
|
||||
inactivity_timeout: std::time::Duration,
|
||||
max_lifetime: std::time::Duration,
|
||||
cancel: CancellationToken,
|
||||
metrics: Option<forwarder::ForwardMetricsCtx>,
|
||||
) -> (u64, u64)
|
||||
where
|
||||
@@ -1362,7 +1448,7 @@ impl TcpListenerManager {
|
||||
total
|
||||
});
|
||||
|
||||
// Watchdog task: check for inactivity and max lifetime
|
||||
// Watchdog task: check for inactivity, max lifetime, and cancellation
|
||||
let la_watch = Arc::clone(&last_activity);
|
||||
let c2b_handle = c2b.abort_handle();
|
||||
let b2c_handle = b2c.abort_handle();
|
||||
@@ -1370,29 +1456,37 @@ impl TcpListenerManager {
|
||||
let check_interval = std::time::Duration::from_secs(5);
|
||||
let mut last_seen = 0u64;
|
||||
loop {
|
||||
tokio::time::sleep(check_interval).await;
|
||||
|
||||
// Check max lifetime
|
||||
if start.elapsed() >= max_lifetime {
|
||||
debug!("Connection exceeded max lifetime, closing");
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
|
||||
// Check inactivity
|
||||
let current = la_watch.load(Ordering::Relaxed);
|
||||
if current == last_seen {
|
||||
// No activity since last check
|
||||
let elapsed_since_activity = start.elapsed().as_millis() as u64 - current;
|
||||
if elapsed_since_activity >= inactivity_timeout.as_millis() as u64 {
|
||||
debug!("Connection inactive for {}ms, closing", elapsed_since_activity);
|
||||
tokio::select! {
|
||||
_ = cancel.cancelled() => {
|
||||
debug!("Split-stream connection cancelled by shutdown");
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
_ = tokio::time::sleep(check_interval) => {
|
||||
// Check max lifetime
|
||||
if start.elapsed() >= max_lifetime {
|
||||
debug!("Connection exceeded max lifetime, closing");
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
|
||||
// Check inactivity
|
||||
let current = la_watch.load(Ordering::Relaxed);
|
||||
if current == last_seen {
|
||||
// No activity since last check
|
||||
let elapsed_since_activity = start.elapsed().as_millis() as u64 - current;
|
||||
if elapsed_since_activity >= inactivity_timeout.as_millis() as u64 {
|
||||
debug!("Connection inactive for {}ms, closing", elapsed_since_activity);
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
last_seen = current;
|
||||
}
|
||||
}
|
||||
last_seen = current;
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -98,10 +98,24 @@ pub fn build_shared_tls_acceptor(resolver: CertResolver) -> Result<TlsAcceptor,
|
||||
}
|
||||
|
||||
/// Build a TLS acceptor from PEM-encoded cert and key data.
|
||||
/// Advertises both h2 and http/1.1 via ALPN (for client-facing connections).
|
||||
pub fn build_tls_acceptor(cert_pem: &str, key_pem: &str) -> Result<TlsAcceptor, Box<dyn std::error::Error + Send + Sync>> {
|
||||
build_tls_acceptor_with_config(cert_pem, key_pem, None)
|
||||
}
|
||||
|
||||
/// Build a TLS acceptor for backend servers that only speak HTTP/1.1.
|
||||
/// Does NOT advertise h2 in ALPN, preventing false h2 auto-detection.
|
||||
pub fn build_tls_acceptor_h1_only(cert_pem: &str, key_pem: &str) -> Result<TlsAcceptor, Box<dyn std::error::Error + Send + Sync>> {
|
||||
ensure_crypto_provider();
|
||||
let certs = load_certs(cert_pem)?;
|
||||
let key = load_private_key(key_pem)?;
|
||||
let mut config = ServerConfig::builder()
|
||||
.with_no_client_auth()
|
||||
.with_single_cert(certs, key)?;
|
||||
config.alpn_protocols = vec![b"http/1.1".to_vec()];
|
||||
Ok(TlsAcceptor::from(Arc::new(config)))
|
||||
}
|
||||
|
||||
/// Build a TLS acceptor with optional RouteTls configuration for version/cipher tuning.
|
||||
pub fn build_tls_acceptor_with_config(
|
||||
cert_pem: &str,
|
||||
@@ -204,6 +218,25 @@ pub fn shared_backend_tls_config() -> Arc<rustls::ClientConfig> {
|
||||
}).clone()
|
||||
}
|
||||
|
||||
/// Get or create a shared backend TLS `ClientConfig` with ALPN `h2` + `http/1.1`.
|
||||
///
|
||||
/// Used for auto-detection mode: the backend server picks its preferred protocol
|
||||
/// via ALPN, and the proxy reads the negotiated result to decide h1 vs h2 forwarding.
|
||||
static SHARED_CLIENT_CONFIG_ALPN: OnceLock<Arc<rustls::ClientConfig>> = OnceLock::new();
|
||||
|
||||
pub fn shared_backend_tls_config_alpn() -> Arc<rustls::ClientConfig> {
|
||||
SHARED_CLIENT_CONFIG_ALPN.get_or_init(|| {
|
||||
ensure_crypto_provider();
|
||||
let mut config = rustls::ClientConfig::builder()
|
||||
.dangerous()
|
||||
.with_custom_certificate_verifier(Arc::new(InsecureVerifier))
|
||||
.with_no_client_auth();
|
||||
config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
|
||||
info!("Built shared backend TLS client config with ALPN h2+http/1.1 for auto-detection");
|
||||
Arc::new(config)
|
||||
}).clone()
|
||||
}
|
||||
|
||||
/// Connect to a backend with TLS (for terminate-and-reencrypt mode).
|
||||
/// Uses the shared backend TLS config for session resumption.
|
||||
pub async fn connect_tls(
|
||||
|
||||
@@ -39,7 +39,7 @@ hyper = { workspace = true }
|
||||
hyper-util = { workspace = true }
|
||||
http-body-util = { workspace = true }
|
||||
bytes = { workspace = true }
|
||||
tikv-jemallocator = { workspace = true }
|
||||
mimalloc = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
rcgen = { workspace = true }
|
||||
|
||||
@@ -603,6 +603,31 @@ impl RustProxy {
|
||||
.collect();
|
||||
self.metrics.retain_routes(&active_route_ids);
|
||||
|
||||
// Prune per-backend metrics for backends no longer in any route target.
|
||||
// For PortSpec::Preserve routes, expand across all listening ports since
|
||||
// the actual runtime port depends on the incoming connection.
|
||||
let listening_ports = self.get_listening_ports();
|
||||
let active_backends: HashSet<String> = routes.iter()
|
||||
.filter_map(|r| r.action.targets.as_ref())
|
||||
.flat_map(|targets| targets.iter())
|
||||
.flat_map(|target| {
|
||||
let hosts: Vec<String> = target.host.to_vec().into_iter().map(|s| s.to_string()).collect();
|
||||
match &target.port {
|
||||
rustproxy_config::PortSpec::Fixed(p) => {
|
||||
hosts.into_iter().map(|h| format!("{}:{}", h, p)).collect::<Vec<_>>()
|
||||
}
|
||||
_ => {
|
||||
// Preserve/special: expand across all listening ports
|
||||
let lp = &listening_ports;
|
||||
hosts.into_iter()
|
||||
.flat_map(|h| lp.iter().map(move |p| format!("{}:{}", h, *p)))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
self.metrics.retain_backends(&active_backends);
|
||||
|
||||
// Atomically swap the route table
|
||||
let new_manager = Arc::new(new_manager);
|
||||
self.route_table.store(Arc::clone(&new_manager));
|
||||
@@ -610,6 +635,8 @@ impl RustProxy {
|
||||
// Update listener manager
|
||||
if let Some(ref mut listener) = self.listener_manager {
|
||||
listener.update_route_manager(Arc::clone(&new_manager));
|
||||
// Cancel connections on routes that were removed or disabled
|
||||
listener.invalidate_removed_routes(&active_route_ids);
|
||||
// Prune HTTP proxy caches (rate limiters, regex cache, round-robin counters)
|
||||
listener.prune_http_proxy_caches(&active_route_ids);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#[global_allocator]
|
||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
use clap::Parser;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
@@ -195,7 +195,10 @@ pub async fn start_tls_http_backend(
|
||||
) -> JoinHandle<()> {
|
||||
use std::sync::Arc;
|
||||
|
||||
let acceptor = rustproxy_passthrough::build_tls_acceptor(cert_pem, key_pem)
|
||||
// Use h1-only acceptor: test backends speak raw HTTP/1.1 text,
|
||||
// so they must NOT advertise h2 via ALPN (which would cause
|
||||
// auto-detect to attempt h2 binary framing and fail).
|
||||
let acceptor = rustproxy_passthrough::build_tls_acceptor_h1_only(cert_pem, key_pem)
|
||||
.expect("Failed to build TLS acceptor");
|
||||
let acceptor = Arc::new(acceptor);
|
||||
let name = backend_name.to_string();
|
||||
|
||||
@@ -7,10 +7,15 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import { SmartProxy } from '../ts/proxies/smart-proxy/index.js';
|
||||
import type { IRouteConfig } from '../ts/proxies/smart-proxy/models/route-types.js';
|
||||
import { findFreePorts } from './helpers/port-allocator.js';
|
||||
|
||||
// Use unique high ports for each test to avoid conflicts
|
||||
let testPort = 20000;
|
||||
const getNextPort = () => testPort++;
|
||||
let testPorts: number[];
|
||||
let portIndex = 0;
|
||||
const getNextPort = () => testPorts[portIndex++];
|
||||
|
||||
tap.test('setup - allocate ports', async () => {
|
||||
testPorts = await findFreePorts(16);
|
||||
});
|
||||
|
||||
// --------------------------------- Single Route, No Domain Restriction ---------------------------------
|
||||
|
||||
|
||||
@@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@push.rocks/smartproxy',
|
||||
version: '25.8.0',
|
||||
version: '25.10.2',
|
||||
description: 'A powerful proxy package with unified route-based configuration for high traffic management. Features include SSL/TLS support, flexible routing patterns, WebSocket handling, advanced security options, and automatic ACME certificate management.'
|
||||
}
|
||||
|
||||
@@ -354,17 +354,17 @@ export class LogDeduplicator {
|
||||
// Global instance for connection-related log deduplication
|
||||
export const connectionLogDeduplicator = new LogDeduplicator(5000); // 5 second batches
|
||||
|
||||
// Ensure logs are flushed on process exit
|
||||
// Ensure logs are flushed on process exit.
|
||||
// Only use beforeExit — do NOT call process.exit() from SIGINT/SIGTERM handlers
|
||||
// as that kills the host process's graceful shutdown (e.g., dcrouter connection draining).
|
||||
process.on('beforeExit', () => {
|
||||
connectionLogDeduplicator.flushAll();
|
||||
});
|
||||
|
||||
process.on('SIGINT', () => {
|
||||
connectionLogDeduplicator.cleanup();
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
process.on('SIGTERM', () => {
|
||||
connectionLogDeduplicator.cleanup();
|
||||
process.exit(0);
|
||||
});
|
||||
@@ -18,8 +18,8 @@ export class ProtocolDetector {
|
||||
private fragmentManager: DetectionFragmentManager;
|
||||
private tlsDetector: TlsDetector;
|
||||
private httpDetector: HttpDetector;
|
||||
private connectionProtocols: Map<string, 'tls' | 'http'> = new Map();
|
||||
|
||||
private connectionProtocols: Map<string, { protocol: 'tls' | 'http'; createdAt: number }> = new Map();
|
||||
|
||||
constructor() {
|
||||
this.fragmentManager = new DetectionFragmentManager();
|
||||
this.tlsDetector = new TlsDetector();
|
||||
@@ -124,8 +124,9 @@ export class ProtocolDetector {
|
||||
const connectionId = DetectionFragmentManager.createConnectionId(context);
|
||||
|
||||
// Check if we already know the protocol for this connection
|
||||
const knownProtocol = this.connectionProtocols.get(connectionId);
|
||||
|
||||
const knownEntry = this.connectionProtocols.get(connectionId);
|
||||
const knownProtocol = knownEntry?.protocol;
|
||||
|
||||
if (knownProtocol === 'http') {
|
||||
const result = this.httpDetector.detectWithContext(buffer, context, options);
|
||||
if (result) {
|
||||
@@ -163,7 +164,7 @@ export class ProtocolDetector {
|
||||
if (!knownProtocol) {
|
||||
// First peek to determine protocol type
|
||||
if (this.tlsDetector.canHandle(buffer)) {
|
||||
this.connectionProtocols.set(connectionId, 'tls');
|
||||
this.connectionProtocols.set(connectionId, { protocol: 'tls', createdAt: Date.now() });
|
||||
// Handle TLS with fragment accumulation
|
||||
const handler = this.fragmentManager.getHandler('tls');
|
||||
const fragmentResult = handler.addFragment(connectionId, buffer);
|
||||
@@ -189,7 +190,7 @@ export class ProtocolDetector {
|
||||
}
|
||||
|
||||
if (this.httpDetector.canHandle(buffer)) {
|
||||
this.connectionProtocols.set(connectionId, 'http');
|
||||
this.connectionProtocols.set(connectionId, { protocol: 'http', createdAt: Date.now() });
|
||||
const result = this.httpDetector.detectWithContext(buffer, context, options);
|
||||
if (result) {
|
||||
if (result.isComplete) {
|
||||
@@ -221,6 +222,14 @@ export class ProtocolDetector {
|
||||
|
||||
private cleanupInstance(): void {
|
||||
this.fragmentManager.cleanup();
|
||||
// Remove stale connectionProtocols entries (abandoned handshakes, port scanners)
|
||||
const maxAge = 30_000; // 30 seconds
|
||||
const now = Date.now();
|
||||
for (const [id, entry] of this.connectionProtocols) {
|
||||
if (now - entry.createdAt > maxAge) {
|
||||
this.connectionProtocols.delete(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -242,8 +251,7 @@ export class ProtocolDetector {
|
||||
* @param _maxAge Maximum age in milliseconds (default: 30 seconds)
|
||||
*/
|
||||
static cleanupConnections(_maxAge: number = 30000): void {
|
||||
// Cleanup is now handled internally by the fragment manager
|
||||
this.getInstance().fragmentManager.cleanup();
|
||||
this.getInstance().cleanupInstance();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -112,12 +112,12 @@ export interface ISmartProxyOptions {
|
||||
maxVersion?: string;
|
||||
|
||||
// Timeout settings
|
||||
connectionTimeout?: number; // Timeout for establishing connection to backend (ms), default: 30000 (30s)
|
||||
connectionTimeout?: number; // Timeout for establishing connection to backend (ms), default: 60000 (60s)
|
||||
initialDataTimeout?: number; // Timeout for initial data/SNI (ms), default: 60000 (60s)
|
||||
socketTimeout?: number; // Socket inactivity timeout (ms), default: 3600000 (1h)
|
||||
socketTimeout?: number; // Socket inactivity timeout (ms), default: 60000 (60s)
|
||||
inactivityCheckInterval?: number; // How often to check for inactive connections (ms), default: 60000 (60s)
|
||||
maxConnectionLifetime?: number; // Default max connection lifetime (ms), default: 86400000 (24h)
|
||||
inactivityTimeout?: number; // Inactivity timeout (ms), default: 14400000 (4h)
|
||||
maxConnectionLifetime?: number; // Max connection lifetime (ms), default: 3600000 (1h)
|
||||
inactivityTimeout?: number; // Inactivity timeout (ms), default: 75000 (75s)
|
||||
|
||||
gracefulShutdownTimeout?: number; // (ms) maximum time to wait for connections to close during shutdown
|
||||
|
||||
|
||||
@@ -67,6 +67,13 @@ export interface IMetrics {
|
||||
connections(): number;
|
||||
};
|
||||
|
||||
// Backend metrics
|
||||
backends: {
|
||||
byBackend(): Map<string, IBackendMetrics>;
|
||||
protocols(): Map<string, string>;
|
||||
topByErrors(limit?: number): Array<{ backend: string; errors: number }>;
|
||||
};
|
||||
|
||||
// Performance metrics
|
||||
percentiles: {
|
||||
connectionDuration(): { p50: number; p95: number; p99: number };
|
||||
@@ -98,6 +105,21 @@ export interface IMetricsConfig {
|
||||
prometheusPrefix: string; // Default: smartproxy_
|
||||
}
|
||||
|
||||
/**
|
||||
* Per-backend metrics
|
||||
*/
|
||||
export interface IBackendMetrics {
|
||||
protocol: string;
|
||||
activeConnections: number;
|
||||
totalConnections: number;
|
||||
connectErrors: number;
|
||||
handshakeErrors: number;
|
||||
requestErrors: number;
|
||||
avgConnectTimeMs: number;
|
||||
poolHitRate: number;
|
||||
h2Failures: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal interface for connection byte tracking
|
||||
*/
|
||||
|
||||
@@ -262,7 +262,7 @@ export interface IRouteAction {
|
||||
|
||||
// Additional options for backend-specific settings
|
||||
options?: {
|
||||
backendProtocol?: 'http1' | 'http2';
|
||||
backendProtocol?: 'http1' | 'http2' | 'auto';
|
||||
[key: string]: any;
|
||||
};
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import type { IMetrics, IThroughputData, IThroughputHistoryPoint } from './models/metrics-types.js';
|
||||
import type { IMetrics, IBackendMetrics, IThroughputData, IThroughputHistoryPoint } from './models/metrics-types.js';
|
||||
import type { RustProxyBridge } from './rust-proxy-bridge.js';
|
||||
|
||||
/**
|
||||
@@ -169,6 +169,55 @@ export class RustMetricsAdapter implements IMetrics {
|
||||
},
|
||||
};
|
||||
|
||||
public backends = {
|
||||
byBackend: (): Map<string, IBackendMetrics> => {
|
||||
const result = new Map<string, IBackendMetrics>();
|
||||
if (this.cache?.backends) {
|
||||
for (const [key, bm] of Object.entries(this.cache.backends)) {
|
||||
const m = bm as any;
|
||||
const totalTimeUs = m.totalConnectTimeUs ?? 0;
|
||||
const count = m.connectCount ?? 0;
|
||||
const poolHits = m.poolHits ?? 0;
|
||||
const poolMisses = m.poolMisses ?? 0;
|
||||
const poolTotal = poolHits + poolMisses;
|
||||
result.set(key, {
|
||||
protocol: m.protocol ?? 'unknown',
|
||||
activeConnections: m.activeConnections ?? 0,
|
||||
totalConnections: m.totalConnections ?? 0,
|
||||
connectErrors: m.connectErrors ?? 0,
|
||||
handshakeErrors: m.handshakeErrors ?? 0,
|
||||
requestErrors: m.requestErrors ?? 0,
|
||||
avgConnectTimeMs: count > 0 ? (totalTimeUs / count) / 1000 : 0,
|
||||
poolHitRate: poolTotal > 0 ? poolHits / poolTotal : 0,
|
||||
h2Failures: m.h2Failures ?? 0,
|
||||
});
|
||||
}
|
||||
}
|
||||
return result;
|
||||
},
|
||||
protocols: (): Map<string, string> => {
|
||||
const result = new Map<string, string>();
|
||||
if (this.cache?.backends) {
|
||||
for (const [key, bm] of Object.entries(this.cache.backends)) {
|
||||
result.set(key, (bm as any).protocol ?? 'unknown');
|
||||
}
|
||||
}
|
||||
return result;
|
||||
},
|
||||
topByErrors: (limit: number = 10): Array<{ backend: string; errors: number }> => {
|
||||
const result: Array<{ backend: string; errors: number }> = [];
|
||||
if (this.cache?.backends) {
|
||||
for (const [key, bm] of Object.entries(this.cache.backends)) {
|
||||
const m = bm as any;
|
||||
const errors = (m.connectErrors ?? 0) + (m.handshakeErrors ?? 0) + (m.requestErrors ?? 0);
|
||||
if (errors > 0) result.push({ backend: key, errors });
|
||||
}
|
||||
}
|
||||
result.sort((a, b) => b.errors - a.errors);
|
||||
return result.slice(0, limit);
|
||||
},
|
||||
};
|
||||
|
||||
public percentiles = {
|
||||
connectionDuration: (): { p50: number; p95: number; p99: number } => {
|
||||
return { p50: 0, p95: 0, p99: 0 };
|
||||
|
||||
@@ -47,16 +47,16 @@ export class SmartProxy extends plugins.EventEmitter {
|
||||
// Apply defaults
|
||||
this.settings = {
|
||||
...settingsArg,
|
||||
initialDataTimeout: settingsArg.initialDataTimeout || 120000,
|
||||
socketTimeout: settingsArg.socketTimeout || 3600000,
|
||||
maxConnectionLifetime: settingsArg.maxConnectionLifetime || 86400000,
|
||||
inactivityTimeout: settingsArg.inactivityTimeout || 14400000,
|
||||
gracefulShutdownTimeout: settingsArg.gracefulShutdownTimeout || 30000,
|
||||
initialDataTimeout: settingsArg.initialDataTimeout || 60_000,
|
||||
socketTimeout: settingsArg.socketTimeout || 60_000,
|
||||
maxConnectionLifetime: settingsArg.maxConnectionLifetime || 3_600_000,
|
||||
inactivityTimeout: settingsArg.inactivityTimeout || 75_000,
|
||||
gracefulShutdownTimeout: settingsArg.gracefulShutdownTimeout || 30_000,
|
||||
maxConnectionsPerIP: settingsArg.maxConnectionsPerIP || 100,
|
||||
connectionRateLimitPerMinute: settingsArg.connectionRateLimitPerMinute || 300,
|
||||
keepAliveTreatment: settingsArg.keepAliveTreatment || 'extended',
|
||||
keepAliveInactivityMultiplier: settingsArg.keepAliveInactivityMultiplier || 6,
|
||||
extendedKeepAliveLifetime: settingsArg.extendedKeepAliveLifetime || 7 * 24 * 60 * 60 * 1000,
|
||||
keepAliveTreatment: settingsArg.keepAliveTreatment || 'standard',
|
||||
keepAliveInactivityMultiplier: settingsArg.keepAliveInactivityMultiplier || 4,
|
||||
extendedKeepAliveLifetime: settingsArg.extendedKeepAliveLifetime || 3_600_000,
|
||||
};
|
||||
|
||||
// Normalize ACME options
|
||||
|
||||
@@ -92,6 +92,16 @@ export class SocketHandlerServer {
|
||||
let metadataBuffer = '';
|
||||
let metadataParsed = false;
|
||||
|
||||
// 10s timeout for metadata parsing phase — if Rust connects but never
|
||||
// sends the JSON metadata line, don't hold the socket open indefinitely.
|
||||
socket.setTimeout(10_000);
|
||||
socket.on('timeout', () => {
|
||||
if (!metadataParsed) {
|
||||
logger.log('warn', 'Socket handler metadata timeout, closing', { component: 'socket-handler-server' });
|
||||
socket.destroy();
|
||||
}
|
||||
});
|
||||
|
||||
const onData = (chunk: Buffer) => {
|
||||
if (metadataParsed) return;
|
||||
|
||||
@@ -108,6 +118,7 @@ export class SocketHandlerServer {
|
||||
}
|
||||
|
||||
metadataParsed = true;
|
||||
socket.setTimeout(0); // Clear metadata timeout
|
||||
socket.removeListener('data', onData);
|
||||
socket.pause(); // Prevent data loss between handler removal and pipe setup
|
||||
|
||||
@@ -254,11 +265,30 @@ export class SocketHandlerServer {
|
||||
|
||||
// Connect to the resolved target
|
||||
const backend = plugins.net.connect(port, host, () => {
|
||||
// Connection established — set idle timeout on both sides (5 min)
|
||||
socket.setTimeout(300_000);
|
||||
backend.setTimeout(300_000);
|
||||
|
||||
// Pipe bidirectionally
|
||||
socket.pipe(backend);
|
||||
backend.pipe(socket);
|
||||
});
|
||||
|
||||
// Connect timeout: if backend doesn't connect within 30s, destroy both
|
||||
backend.setTimeout(30_000);
|
||||
|
||||
backend.on('timeout', () => {
|
||||
logger.log('warn', `Dynamic forward timeout to ${host}:${port}`, { component: 'socket-handler-server' });
|
||||
backend.destroy();
|
||||
socket.destroy();
|
||||
});
|
||||
|
||||
socket.on('timeout', () => {
|
||||
logger.log('debug', `Dynamic forward client idle timeout`, { component: 'socket-handler-server' });
|
||||
socket.destroy();
|
||||
backend.destroy();
|
||||
});
|
||||
|
||||
backend.on('error', (err) => {
|
||||
logger.log('error', `Dynamic forward backend error: ${err.message}`, { component: 'socket-handler-server' });
|
||||
socket.destroy();
|
||||
|
||||
Reference in New Issue
Block a user