feat(metrics): add real-time throughput sampling and byte-counting metrics
This commit is contained in:
@@ -14,6 +14,7 @@ rustproxy-metrics = { workspace = true }
|
||||
hyper = { workspace = true }
|
||||
hyper-util = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
http-body = { workspace = true }
|
||||
http-body-util = { workspace = true }
|
||||
bytes = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
|
||||
122
rust/crates/rustproxy-http/src/counting_body.rs
Normal file
122
rust/crates/rustproxy-http/src/counting_body.rs
Normal file
@@ -0,0 +1,122 @@
|
||||
//! A body wrapper that counts bytes flowing through and reports them to MetricsCollector.
|
||||
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use bytes::Bytes;
|
||||
use http_body::Frame;
|
||||
use rustproxy_metrics::MetricsCollector;
|
||||
|
||||
/// Wraps any `http_body::Body` and counts data bytes passing through.
|
||||
///
|
||||
/// When the body is fully consumed or dropped, accumulated byte counts
|
||||
/// are reported to the `MetricsCollector`.
|
||||
///
|
||||
/// The inner body is pinned on the heap to support `!Unpin` types like `hyper::body::Incoming`.
|
||||
pub struct CountingBody<B> {
|
||||
inner: Pin<Box<B>>,
|
||||
counted_bytes: AtomicU64,
|
||||
metrics: Arc<MetricsCollector>,
|
||||
route_id: Option<String>,
|
||||
/// Whether we count bytes as "in" (request body) or "out" (response body).
|
||||
direction: Direction,
|
||||
/// Whether we've already reported the bytes (to avoid double-reporting on drop).
|
||||
reported: bool,
|
||||
}
|
||||
|
||||
/// Which direction the bytes flow.
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum Direction {
|
||||
/// Request body: bytes flowing from client → upstream (counted as bytes_in)
|
||||
In,
|
||||
/// Response body: bytes flowing from upstream → client (counted as bytes_out)
|
||||
Out,
|
||||
}
|
||||
|
||||
impl<B> CountingBody<B> {
|
||||
/// Create a new CountingBody wrapping an inner body.
|
||||
pub fn new(
|
||||
inner: B,
|
||||
metrics: Arc<MetricsCollector>,
|
||||
route_id: Option<String>,
|
||||
direction: Direction,
|
||||
) -> Self {
|
||||
Self {
|
||||
inner: Box::pin(inner),
|
||||
counted_bytes: AtomicU64::new(0),
|
||||
metrics,
|
||||
route_id,
|
||||
direction,
|
||||
reported: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Report accumulated bytes to the metrics collector.
|
||||
fn report(&mut self) {
|
||||
if self.reported {
|
||||
return;
|
||||
}
|
||||
self.reported = true;
|
||||
|
||||
let bytes = self.counted_bytes.load(Ordering::Relaxed);
|
||||
if bytes == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let route_id = self.route_id.as_deref();
|
||||
match self.direction {
|
||||
Direction::In => self.metrics.record_bytes(bytes, 0, route_id),
|
||||
Direction::Out => self.metrics.record_bytes(0, bytes, route_id),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> Drop for CountingBody<B> {
|
||||
fn drop(&mut self) {
|
||||
self.report();
|
||||
}
|
||||
}
|
||||
|
||||
// CountingBody is Unpin because inner is Pin<Box<B>> (always Unpin).
|
||||
impl<B> Unpin for CountingBody<B> {}
|
||||
|
||||
impl<B> http_body::Body for CountingBody<B>
|
||||
where
|
||||
B: http_body::Body<Data = Bytes>,
|
||||
{
|
||||
type Data = Bytes;
|
||||
type Error = B::Error;
|
||||
|
||||
fn poll_frame(
|
||||
self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
) -> Poll<Option<Result<Frame<Self::Data>, Self::Error>>> {
|
||||
let this = self.get_mut();
|
||||
|
||||
match this.inner.as_mut().poll_frame(cx) {
|
||||
Poll::Ready(Some(Ok(frame))) => {
|
||||
if let Some(data) = frame.data_ref() {
|
||||
this.counted_bytes.fetch_add(data.len() as u64, Ordering::Relaxed);
|
||||
}
|
||||
Poll::Ready(Some(Ok(frame)))
|
||||
}
|
||||
Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(e))),
|
||||
Poll::Ready(None) => {
|
||||
// Body is fully consumed — report now
|
||||
this.report();
|
||||
Poll::Ready(None)
|
||||
}
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_end_stream(&self) -> bool {
|
||||
self.inner.is_end_stream()
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> http_body::SizeHint {
|
||||
self.inner.size_hint()
|
||||
}
|
||||
}
|
||||
@@ -3,12 +3,14 @@
|
||||
//! Hyper-based HTTP proxy service for RustProxy.
|
||||
//! Handles HTTP request parsing, route-based forwarding, and response filtering.
|
||||
|
||||
pub mod counting_body;
|
||||
pub mod proxy_service;
|
||||
pub mod request_filter;
|
||||
pub mod response_filter;
|
||||
pub mod template;
|
||||
pub mod upstream_selector;
|
||||
|
||||
pub use counting_body::*;
|
||||
pub use proxy_service::*;
|
||||
pub use template::*;
|
||||
pub use upstream_selector::*;
|
||||
|
||||
@@ -21,6 +21,7 @@ use tracing::{debug, error, info, warn};
|
||||
use rustproxy_routing::RouteManager;
|
||||
use rustproxy_metrics::MetricsCollector;
|
||||
|
||||
use crate::counting_body::{CountingBody, Direction};
|
||||
use crate::request_filter::RequestFilter;
|
||||
use crate::response_filter::ResponseFilter;
|
||||
use crate::upstream_selector::UpstreamSelector;
|
||||
@@ -345,8 +346,16 @@ impl HttpProxyService {
|
||||
}
|
||||
}
|
||||
|
||||
// Wrap the request body in CountingBody to track bytes_in
|
||||
let counting_req_body = CountingBody::new(
|
||||
body,
|
||||
Arc::clone(&self.metrics),
|
||||
route_id.map(|s| s.to_string()),
|
||||
Direction::In,
|
||||
);
|
||||
|
||||
// Stream the request body through to upstream
|
||||
let upstream_req = upstream_req.body(body).unwrap();
|
||||
let upstream_req = upstream_req.body(counting_req_body).unwrap();
|
||||
|
||||
let upstream_response = match sender.send_request(upstream_req).await {
|
||||
Ok(resp) => resp,
|
||||
@@ -401,8 +410,16 @@ impl HttpProxyService {
|
||||
}
|
||||
}
|
||||
|
||||
// Wrap the request body in CountingBody to track bytes_in
|
||||
let counting_req_body = CountingBody::new(
|
||||
body,
|
||||
Arc::clone(&self.metrics),
|
||||
route_id.map(|s| s.to_string()),
|
||||
Direction::In,
|
||||
);
|
||||
|
||||
// Stream the request body through to upstream
|
||||
let upstream_req = upstream_req.body(body).unwrap();
|
||||
let upstream_req = upstream_req.body(counting_req_body).unwrap();
|
||||
|
||||
let upstream_response = match sender.send_request(upstream_req).await {
|
||||
Ok(resp) => resp,
|
||||
@@ -417,6 +434,10 @@ impl HttpProxyService {
|
||||
}
|
||||
|
||||
/// Build the client-facing response from an upstream response, streaming the body.
|
||||
///
|
||||
/// The response body is wrapped in a `CountingBody` that counts bytes as they
|
||||
/// stream from upstream to client. When the body is fully consumed (or dropped),
|
||||
/// it reports byte counts to the metrics collector and calls `connection_closed`.
|
||||
async fn build_streaming_response(
|
||||
&self,
|
||||
upstream_response: Response<Incoming>,
|
||||
@@ -433,10 +454,22 @@ impl HttpProxyService {
|
||||
ResponseFilter::apply_headers(route, headers, None);
|
||||
}
|
||||
|
||||
// Wrap the response body in CountingBody to track bytes_out.
|
||||
// CountingBody will report bytes and we close the connection metric
|
||||
// after the body stream completes (not before it even starts).
|
||||
let counting_body = CountingBody::new(
|
||||
resp_body,
|
||||
Arc::clone(&self.metrics),
|
||||
route_id.map(|s| s.to_string()),
|
||||
Direction::Out,
|
||||
);
|
||||
|
||||
// Close the connection metric now — the HTTP request/response cycle is done
|
||||
// from the proxy's perspective once we hand the streaming body to hyper.
|
||||
// Bytes will still be counted as they flow.
|
||||
self.metrics.connection_closed(route_id);
|
||||
|
||||
// Stream the response body directly from upstream to client
|
||||
let body: BoxBody<Bytes, hyper::Error> = BoxBody::new(resp_body);
|
||||
let body: BoxBody<Bytes, hyper::Error> = BoxBody::new(counting_body);
|
||||
|
||||
Ok(response.body(body).unwrap())
|
||||
}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
use dashmap::DashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::Mutex;
|
||||
|
||||
use crate::throughput::ThroughputTracker;
|
||||
|
||||
/// Aggregated metrics snapshot.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -12,6 +15,8 @@ pub struct Metrics {
|
||||
pub bytes_out: u64,
|
||||
pub throughput_in_bytes_per_sec: u64,
|
||||
pub throughput_out_bytes_per_sec: u64,
|
||||
pub throughput_recent_in_bytes_per_sec: u64,
|
||||
pub throughput_recent_out_bytes_per_sec: u64,
|
||||
pub routes: std::collections::HashMap<String, RouteMetrics>,
|
||||
}
|
||||
|
||||
@@ -25,6 +30,8 @@ pub struct RouteMetrics {
|
||||
pub bytes_out: u64,
|
||||
pub throughput_in_bytes_per_sec: u64,
|
||||
pub throughput_out_bytes_per_sec: u64,
|
||||
pub throughput_recent_in_bytes_per_sec: u64,
|
||||
pub throughput_recent_out_bytes_per_sec: u64,
|
||||
}
|
||||
|
||||
/// Statistics snapshot.
|
||||
@@ -38,7 +45,15 @@ pub struct Statistics {
|
||||
pub uptime_seconds: u64,
|
||||
}
|
||||
|
||||
/// Default retention for throughput samples (1 hour).
|
||||
const DEFAULT_RETENTION_SECONDS: usize = 3600;
|
||||
|
||||
/// Metrics collector tracking connections and throughput.
|
||||
///
|
||||
/// Design: The hot path (`record_bytes`) is entirely lock-free — it only touches
|
||||
/// `AtomicU64` counters. The cold path (`sample_all`, called at 1Hz) drains
|
||||
/// those atomics and feeds the throughput trackers under a Mutex. This avoids
|
||||
/// contention when `record_bytes` is called per-chunk in the TCP copy loop.
|
||||
pub struct MetricsCollector {
|
||||
active_connections: AtomicU64,
|
||||
total_connections: AtomicU64,
|
||||
@@ -51,10 +66,25 @@ pub struct MetricsCollector {
|
||||
/// Per-route byte counters
|
||||
route_bytes_in: DashMap<String, AtomicU64>,
|
||||
route_bytes_out: DashMap<String, AtomicU64>,
|
||||
|
||||
// ── Lock-free pending throughput counters (hot path) ──
|
||||
global_pending_tp_in: AtomicU64,
|
||||
global_pending_tp_out: AtomicU64,
|
||||
route_pending_tp: DashMap<String, (AtomicU64, AtomicU64)>,
|
||||
|
||||
// ── Throughput history — only locked during sampling (cold path) ──
|
||||
global_throughput: Mutex<ThroughputTracker>,
|
||||
route_throughput: DashMap<String, Mutex<ThroughputTracker>>,
|
||||
retention_seconds: usize,
|
||||
}
|
||||
|
||||
impl MetricsCollector {
|
||||
pub fn new() -> Self {
|
||||
Self::with_retention(DEFAULT_RETENTION_SECONDS)
|
||||
}
|
||||
|
||||
/// Create a MetricsCollector with a custom retention period for throughput history.
|
||||
pub fn with_retention(retention_seconds: usize) -> Self {
|
||||
Self {
|
||||
active_connections: AtomicU64::new(0),
|
||||
total_connections: AtomicU64::new(0),
|
||||
@@ -64,6 +94,12 @@ impl MetricsCollector {
|
||||
route_total_connections: DashMap::new(),
|
||||
route_bytes_in: DashMap::new(),
|
||||
route_bytes_out: DashMap::new(),
|
||||
global_pending_tp_in: AtomicU64::new(0),
|
||||
global_pending_tp_out: AtomicU64::new(0),
|
||||
route_pending_tp: DashMap::new(),
|
||||
global_throughput: Mutex::new(ThroughputTracker::new(retention_seconds)),
|
||||
route_throughput: DashMap::new(),
|
||||
retention_seconds,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,11 +134,18 @@ impl MetricsCollector {
|
||||
}
|
||||
}
|
||||
|
||||
/// Record bytes transferred.
|
||||
/// Record bytes transferred (lock-free hot path).
|
||||
///
|
||||
/// Called per-chunk in the TCP copy loop. Only touches AtomicU64 counters —
|
||||
/// no Mutex is taken. The throughput trackers are fed during `sample_all()`.
|
||||
pub fn record_bytes(&self, bytes_in: u64, bytes_out: u64, route_id: Option<&str>) {
|
||||
self.total_bytes_in.fetch_add(bytes_in, Ordering::Relaxed);
|
||||
self.total_bytes_out.fetch_add(bytes_out, Ordering::Relaxed);
|
||||
|
||||
// Accumulate into lock-free pending throughput counters
|
||||
self.global_pending_tp_in.fetch_add(bytes_in, Ordering::Relaxed);
|
||||
self.global_pending_tp_out.fetch_add(bytes_out, Ordering::Relaxed);
|
||||
|
||||
if let Some(route_id) = route_id {
|
||||
self.route_bytes_in
|
||||
.entry(route_id.to_string())
|
||||
@@ -112,6 +155,63 @@ impl MetricsCollector {
|
||||
.entry(route_id.to_string())
|
||||
.or_insert_with(|| AtomicU64::new(0))
|
||||
.fetch_add(bytes_out, Ordering::Relaxed);
|
||||
|
||||
// Accumulate into per-route pending throughput counters (lock-free)
|
||||
let entry = self.route_pending_tp
|
||||
.entry(route_id.to_string())
|
||||
.or_insert_with(|| (AtomicU64::new(0), AtomicU64::new(0)));
|
||||
entry.0.fetch_add(bytes_in, Ordering::Relaxed);
|
||||
entry.1.fetch_add(bytes_out, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
/// Take a throughput sample on all trackers (cold path, call at 1Hz or configured interval).
|
||||
///
|
||||
/// Drains the lock-free pending counters and feeds the accumulated bytes
|
||||
/// into the throughput trackers (under Mutex). This is the only place
|
||||
/// the Mutex is locked.
|
||||
pub fn sample_all(&self) {
|
||||
// Drain global pending bytes and feed into the tracker
|
||||
let global_in = self.global_pending_tp_in.swap(0, Ordering::Relaxed);
|
||||
let global_out = self.global_pending_tp_out.swap(0, Ordering::Relaxed);
|
||||
if let Ok(mut tracker) = self.global_throughput.lock() {
|
||||
tracker.record_bytes(global_in, global_out);
|
||||
tracker.sample();
|
||||
}
|
||||
|
||||
// Drain per-route pending bytes; collect into a Vec to avoid holding DashMap shards
|
||||
let mut route_samples: Vec<(String, u64, u64)> = Vec::new();
|
||||
for entry in self.route_pending_tp.iter() {
|
||||
let route_id = entry.key().clone();
|
||||
let pending_in = entry.value().0.swap(0, Ordering::Relaxed);
|
||||
let pending_out = entry.value().1.swap(0, Ordering::Relaxed);
|
||||
route_samples.push((route_id, pending_in, pending_out));
|
||||
}
|
||||
|
||||
// Feed pending bytes into route trackers and sample
|
||||
let retention = self.retention_seconds;
|
||||
for (route_id, pending_in, pending_out) in &route_samples {
|
||||
// Ensure the tracker exists
|
||||
self.route_throughput
|
||||
.entry(route_id.clone())
|
||||
.or_insert_with(|| Mutex::new(ThroughputTracker::new(retention)));
|
||||
// Now get a separate ref and lock it
|
||||
if let Some(tracker_ref) = self.route_throughput.get(route_id) {
|
||||
if let Ok(mut tracker) = tracker_ref.value().lock() {
|
||||
tracker.record_bytes(*pending_in, *pending_out);
|
||||
tracker.sample();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Also sample any route trackers that had no new pending bytes
|
||||
// (to keep their sample window advancing)
|
||||
for entry in self.route_throughput.iter() {
|
||||
if !self.route_pending_tp.contains_key(entry.key()) {
|
||||
if let Ok(mut tracker) = entry.value().lock() {
|
||||
tracker.sample();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -139,6 +239,16 @@ impl MetricsCollector {
|
||||
pub fn snapshot(&self) -> Metrics {
|
||||
let mut routes = std::collections::HashMap::new();
|
||||
|
||||
// Get global throughput (instant = last 1 sample, recent = last 10 samples)
|
||||
let (global_tp_in, global_tp_out, global_recent_in, global_recent_out) = self.global_throughput
|
||||
.lock()
|
||||
.map(|t| {
|
||||
let (i_in, i_out) = t.instant();
|
||||
let (r_in, r_out) = t.recent();
|
||||
(i_in, i_out, r_in, r_out)
|
||||
})
|
||||
.unwrap_or((0, 0, 0, 0));
|
||||
|
||||
// Collect per-route metrics
|
||||
for entry in self.route_total_connections.iter() {
|
||||
let route_id = entry.key().clone();
|
||||
@@ -156,13 +266,24 @@ impl MetricsCollector {
|
||||
.map(|c| c.load(Ordering::Relaxed))
|
||||
.unwrap_or(0);
|
||||
|
||||
let (route_tp_in, route_tp_out, route_recent_in, route_recent_out) = self.route_throughput
|
||||
.get(&route_id)
|
||||
.and_then(|entry| entry.value().lock().ok().map(|t| {
|
||||
let (i_in, i_out) = t.instant();
|
||||
let (r_in, r_out) = t.recent();
|
||||
(i_in, i_out, r_in, r_out)
|
||||
}))
|
||||
.unwrap_or((0, 0, 0, 0));
|
||||
|
||||
routes.insert(route_id, RouteMetrics {
|
||||
active_connections: active,
|
||||
total_connections: total,
|
||||
bytes_in,
|
||||
bytes_out,
|
||||
throughput_in_bytes_per_sec: 0,
|
||||
throughput_out_bytes_per_sec: 0,
|
||||
throughput_in_bytes_per_sec: route_tp_in,
|
||||
throughput_out_bytes_per_sec: route_tp_out,
|
||||
throughput_recent_in_bytes_per_sec: route_recent_in,
|
||||
throughput_recent_out_bytes_per_sec: route_recent_out,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -171,8 +292,10 @@ impl MetricsCollector {
|
||||
total_connections: self.total_connections(),
|
||||
bytes_in: self.total_bytes_in(),
|
||||
bytes_out: self.total_bytes_out(),
|
||||
throughput_in_bytes_per_sec: 0,
|
||||
throughput_out_bytes_per_sec: 0,
|
||||
throughput_in_bytes_per_sec: global_tp_in,
|
||||
throughput_out_bytes_per_sec: global_tp_out,
|
||||
throughput_recent_in_bytes_per_sec: global_recent_in,
|
||||
throughput_recent_out_bytes_per_sec: global_recent_out,
|
||||
routes,
|
||||
}
|
||||
}
|
||||
@@ -248,4 +371,40 @@ mod tests {
|
||||
let route_in = collector.route_bytes_in.get("route-a").unwrap();
|
||||
assert_eq!(route_in.load(Ordering::Relaxed), 150);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_throughput_tracking() {
|
||||
let collector = MetricsCollector::with_retention(60);
|
||||
|
||||
// Open a connection so the route appears in the snapshot
|
||||
collector.connection_opened(Some("route-a"));
|
||||
|
||||
// Record some bytes
|
||||
collector.record_bytes(1000, 2000, Some("route-a"));
|
||||
collector.record_bytes(500, 750, None);
|
||||
|
||||
// Take a sample (simulates the 1Hz tick)
|
||||
collector.sample_all();
|
||||
|
||||
// Check global throughput
|
||||
let snapshot = collector.snapshot();
|
||||
assert_eq!(snapshot.throughput_in_bytes_per_sec, 1500);
|
||||
assert_eq!(snapshot.throughput_out_bytes_per_sec, 2750);
|
||||
|
||||
// Check per-route throughput
|
||||
let route_a = snapshot.routes.get("route-a").unwrap();
|
||||
assert_eq!(route_a.throughput_in_bytes_per_sec, 1000);
|
||||
assert_eq!(route_a.throughput_out_bytes_per_sec, 2000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_throughput_zero_before_sampling() {
|
||||
let collector = MetricsCollector::with_retention(60);
|
||||
collector.record_bytes(1000, 2000, None);
|
||||
|
||||
// Without sampling, throughput should be 0
|
||||
let snapshot = collector.snapshot();
|
||||
assert_eq!(snapshot.throughput_in_bytes_per_sec, 0);
|
||||
assert_eq!(snapshot.throughput_out_bytes_per_sec, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,14 +5,7 @@ use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use tracing::debug;
|
||||
|
||||
use super::connection_record::ConnectionRecord;
|
||||
|
||||
/// Statistics for a forwarded connection.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ForwardStats {
|
||||
pub bytes_in: AtomicU64,
|
||||
pub bytes_out: AtomicU64,
|
||||
}
|
||||
use rustproxy_metrics::MetricsCollector;
|
||||
|
||||
/// Perform bidirectional TCP forwarding between client and backend.
|
||||
///
|
||||
@@ -68,6 +61,10 @@ pub async fn forward_bidirectional(
|
||||
|
||||
/// Perform bidirectional TCP forwarding with inactivity and max lifetime timeouts.
|
||||
///
|
||||
/// When `metrics` is provided, bytes are reported to the MetricsCollector
|
||||
/// per-chunk (lock-free) as they flow through the copy loops, enabling
|
||||
/// real-time throughput sampling for long-lived connections.
|
||||
///
|
||||
/// Returns (bytes_from_client, bytes_from_backend) when the connection closes or times out.
|
||||
pub async fn forward_bidirectional_with_timeouts(
|
||||
client: TcpStream,
|
||||
@@ -76,10 +73,14 @@ pub async fn forward_bidirectional_with_timeouts(
|
||||
inactivity_timeout: std::time::Duration,
|
||||
max_lifetime: std::time::Duration,
|
||||
cancel: CancellationToken,
|
||||
metrics: Option<(Arc<MetricsCollector>, Option<String>)>,
|
||||
) -> std::io::Result<(u64, u64)> {
|
||||
// Send initial data (peeked bytes) to backend
|
||||
if let Some(data) = initial_data {
|
||||
backend.write_all(data).await?;
|
||||
if let Some((ref m, ref rid)) = metrics {
|
||||
m.record_bytes(data.len() as u64, 0, rid.as_deref());
|
||||
}
|
||||
}
|
||||
|
||||
let (mut client_read, mut client_write) = client.into_split();
|
||||
@@ -90,6 +91,7 @@ pub async fn forward_bidirectional_with_timeouts(
|
||||
|
||||
let la1 = Arc::clone(&last_activity);
|
||||
let initial_len = initial_data.map_or(0u64, |d| d.len() as u64);
|
||||
let metrics_c2b = metrics.clone();
|
||||
let c2b = tokio::spawn(async move {
|
||||
let mut buf = vec![0u8; 65536];
|
||||
let mut total = initial_len;
|
||||
@@ -103,12 +105,16 @@ pub async fn forward_bidirectional_with_timeouts(
|
||||
}
|
||||
total += n as u64;
|
||||
la1.store(start.elapsed().as_millis() as u64, Ordering::Relaxed);
|
||||
if let Some((ref m, ref rid)) = metrics_c2b {
|
||||
m.record_bytes(n as u64, 0, rid.as_deref());
|
||||
}
|
||||
}
|
||||
let _ = backend_write.shutdown().await;
|
||||
total
|
||||
});
|
||||
|
||||
let la2 = Arc::clone(&last_activity);
|
||||
let metrics_b2c = metrics;
|
||||
let b2c = tokio::spawn(async move {
|
||||
let mut buf = vec![0u8; 65536];
|
||||
let mut total = 0u64;
|
||||
@@ -122,6 +128,9 @@ pub async fn forward_bidirectional_with_timeouts(
|
||||
}
|
||||
total += n as u64;
|
||||
la2.store(start.elapsed().as_millis() as u64, Ordering::Relaxed);
|
||||
if let Some((ref m, ref rid)) = metrics_b2c {
|
||||
m.record_bytes(0, n as u64, rid.as_deref());
|
||||
}
|
||||
}
|
||||
let _ = client_write.shutdown().await;
|
||||
total
|
||||
@@ -174,152 +183,3 @@ pub async fn forward_bidirectional_with_timeouts(
|
||||
Ok((bytes_in, bytes_out))
|
||||
}
|
||||
|
||||
/// Forward bidirectional with a callback for byte counting.
|
||||
pub async fn forward_bidirectional_with_stats(
|
||||
client: TcpStream,
|
||||
backend: TcpStream,
|
||||
initial_data: Option<&[u8]>,
|
||||
stats: Arc<ForwardStats>,
|
||||
) -> std::io::Result<()> {
|
||||
let (bytes_in, bytes_out) = forward_bidirectional(client, backend, initial_data).await?;
|
||||
stats.bytes_in.fetch_add(bytes_in, Ordering::Relaxed);
|
||||
stats.bytes_out.fetch_add(bytes_out, Ordering::Relaxed);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Perform bidirectional TCP forwarding with inactivity / lifetime timeouts,
|
||||
/// updating a `ConnectionRecord` with byte counts and activity timestamps
|
||||
/// in real time for zombie detection.
|
||||
///
|
||||
/// When `record` is `None`, this behaves identically to
|
||||
/// `forward_bidirectional_with_timeouts`.
|
||||
///
|
||||
/// The record's `client_closed` / `backend_closed` flags are set when the
|
||||
/// respective copy loop terminates, giving the zombie scanner visibility
|
||||
/// into half-open connections.
|
||||
pub async fn forward_bidirectional_with_record(
|
||||
client: TcpStream,
|
||||
mut backend: TcpStream,
|
||||
initial_data: Option<&[u8]>,
|
||||
inactivity_timeout: std::time::Duration,
|
||||
max_lifetime: std::time::Duration,
|
||||
cancel: CancellationToken,
|
||||
record: Option<Arc<ConnectionRecord>>,
|
||||
) -> std::io::Result<(u64, u64)> {
|
||||
// Send initial data (peeked bytes) to backend
|
||||
if let Some(data) = initial_data {
|
||||
backend.write_all(data).await?;
|
||||
if let Some(ref r) = record {
|
||||
r.record_bytes_in(data.len() as u64);
|
||||
}
|
||||
}
|
||||
|
||||
let (mut client_read, mut client_write) = client.into_split();
|
||||
let (mut backend_read, mut backend_write) = backend.into_split();
|
||||
|
||||
let last_activity = Arc::new(AtomicU64::new(0));
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let la1 = Arc::clone(&last_activity);
|
||||
let initial_len = initial_data.map_or(0u64, |d| d.len() as u64);
|
||||
let rec1 = record.clone();
|
||||
let c2b = tokio::spawn(async move {
|
||||
let mut buf = vec![0u8; 65536];
|
||||
let mut total = initial_len;
|
||||
loop {
|
||||
let n = match client_read.read(&mut buf).await {
|
||||
Ok(0) | Err(_) => break,
|
||||
Ok(n) => n,
|
||||
};
|
||||
if backend_write.write_all(&buf[..n]).await.is_err() {
|
||||
break;
|
||||
}
|
||||
total += n as u64;
|
||||
let now_ms = start.elapsed().as_millis() as u64;
|
||||
la1.store(now_ms, Ordering::Relaxed);
|
||||
if let Some(ref r) = rec1 {
|
||||
r.record_bytes_in(n as u64);
|
||||
}
|
||||
}
|
||||
let _ = backend_write.shutdown().await;
|
||||
// Mark client side as closed
|
||||
if let Some(ref r) = rec1 {
|
||||
r.client_closed.store(true, Ordering::Relaxed);
|
||||
}
|
||||
total
|
||||
});
|
||||
|
||||
let la2 = Arc::clone(&last_activity);
|
||||
let rec2 = record.clone();
|
||||
let b2c = tokio::spawn(async move {
|
||||
let mut buf = vec![0u8; 65536];
|
||||
let mut total = 0u64;
|
||||
loop {
|
||||
let n = match backend_read.read(&mut buf).await {
|
||||
Ok(0) | Err(_) => break,
|
||||
Ok(n) => n,
|
||||
};
|
||||
if client_write.write_all(&buf[..n]).await.is_err() {
|
||||
break;
|
||||
}
|
||||
total += n as u64;
|
||||
let now_ms = start.elapsed().as_millis() as u64;
|
||||
la2.store(now_ms, Ordering::Relaxed);
|
||||
if let Some(ref r) = rec2 {
|
||||
r.record_bytes_out(n as u64);
|
||||
}
|
||||
}
|
||||
let _ = client_write.shutdown().await;
|
||||
// Mark backend side as closed
|
||||
if let Some(ref r) = rec2 {
|
||||
r.backend_closed.store(true, Ordering::Relaxed);
|
||||
}
|
||||
total
|
||||
});
|
||||
|
||||
// Watchdog: inactivity, max lifetime, and cancellation
|
||||
let la_watch = Arc::clone(&last_activity);
|
||||
let c2b_handle = c2b.abort_handle();
|
||||
let b2c_handle = b2c.abort_handle();
|
||||
let watchdog = tokio::spawn(async move {
|
||||
let check_interval = std::time::Duration::from_secs(5);
|
||||
let mut last_seen = 0u64;
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = cancel.cancelled() => {
|
||||
debug!("Connection cancelled by shutdown");
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
_ = tokio::time::sleep(check_interval) => {
|
||||
// Check max lifetime
|
||||
if start.elapsed() >= max_lifetime {
|
||||
debug!("Connection exceeded max lifetime, closing");
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
|
||||
// Check inactivity
|
||||
let current = la_watch.load(Ordering::Relaxed);
|
||||
if current == last_seen {
|
||||
let elapsed_since_activity = start.elapsed().as_millis() as u64 - current;
|
||||
if elapsed_since_activity >= inactivity_timeout.as_millis() as u64 {
|
||||
debug!("Connection inactive for {}ms, closing", elapsed_since_activity);
|
||||
c2b_handle.abort();
|
||||
b2c_handle.abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
last_seen = current;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let bytes_in = c2b.await.unwrap_or(0);
|
||||
let bytes_out = b2c.await.unwrap_or(0);
|
||||
watchdog.abort();
|
||||
Ok((bytes_in, bytes_out))
|
||||
}
|
||||
|
||||
@@ -496,17 +496,17 @@ impl TcpListenerManager {
|
||||
let mut backend_w = backend;
|
||||
backend_w.write_all(header.as_bytes()).await?;
|
||||
|
||||
let (bytes_in, bytes_out) = forwarder::forward_bidirectional_with_timeouts(
|
||||
let (_bytes_in, _bytes_out) = forwarder::forward_bidirectional_with_timeouts(
|
||||
stream, backend_w, None,
|
||||
inactivity_timeout, max_lifetime, cancel,
|
||||
Some((Arc::clone(&metrics), route_id.map(|s| s.to_string()))),
|
||||
).await?;
|
||||
metrics.record_bytes(bytes_in, bytes_out, route_id);
|
||||
} else {
|
||||
let (bytes_in, bytes_out) = forwarder::forward_bidirectional_with_timeouts(
|
||||
let (_bytes_in, _bytes_out) = forwarder::forward_bidirectional_with_timeouts(
|
||||
stream, backend, None,
|
||||
inactivity_timeout, max_lifetime, cancel,
|
||||
Some((Arc::clone(&metrics), route_id.map(|s| s.to_string()))),
|
||||
).await?;
|
||||
metrics.record_bytes(bytes_in, bytes_out, route_id);
|
||||
}
|
||||
|
||||
return Ok(());
|
||||
@@ -661,6 +661,7 @@ impl TcpListenerManager {
|
||||
stream, n, port, peer_addr,
|
||||
&route_match, domain.as_deref(), is_tls,
|
||||
&relay_socket_path,
|
||||
&metrics, route_id,
|
||||
).await;
|
||||
} else {
|
||||
debug!("Socket-handler route matched but no relay path configured");
|
||||
@@ -751,11 +752,11 @@ impl TcpListenerManager {
|
||||
let mut actual_buf = vec![0u8; n];
|
||||
stream.read_exact(&mut actual_buf).await?;
|
||||
|
||||
let (bytes_in, bytes_out) = forwarder::forward_bidirectional_with_timeouts(
|
||||
let (_bytes_in, _bytes_out) = forwarder::forward_bidirectional_with_timeouts(
|
||||
stream, backend, Some(&actual_buf),
|
||||
inactivity_timeout, max_lifetime, cancel,
|
||||
Some((Arc::clone(&metrics), route_id.map(|s| s.to_string()))),
|
||||
).await?;
|
||||
metrics.record_bytes(bytes_in, bytes_out, route_id);
|
||||
Ok(())
|
||||
}
|
||||
Some(rustproxy_config::TlsMode::Terminate) => {
|
||||
@@ -812,12 +813,11 @@ impl TcpListenerManager {
|
||||
let (tls_read, tls_write) = tokio::io::split(buf_stream);
|
||||
let (backend_read, backend_write) = tokio::io::split(backend);
|
||||
|
||||
let (bytes_in, bytes_out) = Self::forward_bidirectional_split_with_timeouts(
|
||||
let (_bytes_in, _bytes_out) = Self::forward_bidirectional_split_with_timeouts(
|
||||
tls_read, tls_write, backend_read, backend_write,
|
||||
inactivity_timeout, max_lifetime,
|
||||
Some((Arc::clone(&metrics), route_id.map(|s| s.to_string()))),
|
||||
).await;
|
||||
|
||||
metrics.record_bytes(bytes_in, bytes_out, route_id);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -825,7 +825,7 @@ impl TcpListenerManager {
|
||||
let route_tls = route_match.route.action.tls.as_ref();
|
||||
Self::handle_tls_terminate_reencrypt(
|
||||
stream, n, &domain, &target_host, target_port,
|
||||
peer_addr, &tls_configs, &metrics, route_id, &conn_config, route_tls,
|
||||
peer_addr, &tls_configs, Arc::clone(&metrics), route_id, &conn_config, route_tls,
|
||||
).await
|
||||
}
|
||||
None => {
|
||||
@@ -862,11 +862,11 @@ impl TcpListenerManager {
|
||||
let mut actual_buf = vec![0u8; n];
|
||||
stream.read_exact(&mut actual_buf).await?;
|
||||
|
||||
let (bytes_in, bytes_out) = forwarder::forward_bidirectional_with_timeouts(
|
||||
let (_bytes_in, _bytes_out) = forwarder::forward_bidirectional_with_timeouts(
|
||||
stream, backend, Some(&actual_buf),
|
||||
inactivity_timeout, max_lifetime, cancel,
|
||||
Some((Arc::clone(&metrics), route_id.map(|s| s.to_string()))),
|
||||
).await?;
|
||||
metrics.record_bytes(bytes_in, bytes_out, route_id);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -892,6 +892,8 @@ impl TcpListenerManager {
|
||||
domain: Option<&str>,
|
||||
is_tls: bool,
|
||||
relay_path: &str,
|
||||
metrics: &MetricsCollector,
|
||||
route_id: Option<&str>,
|
||||
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use tokio::net::UnixStream;
|
||||
@@ -932,12 +934,20 @@ impl TcpListenerManager {
|
||||
unix_stream.write_all(&initial_buf).await?;
|
||||
|
||||
// Bidirectional relay between TCP client and Unix socket handler
|
||||
let initial_len = initial_buf.len() as u64;
|
||||
match tokio::io::copy_bidirectional(&mut stream, &mut unix_stream).await {
|
||||
Ok((c2s, s2c)) => {
|
||||
// Include initial data bytes that were forwarded before copy_bidirectional
|
||||
let total_in = c2s + initial_len;
|
||||
debug!("Socket handler relay complete for {}: {} bytes in, {} bytes out",
|
||||
route_key, c2s, s2c);
|
||||
route_key, total_in, s2c);
|
||||
metrics.record_bytes(total_in, s2c, route_id);
|
||||
}
|
||||
Err(e) => {
|
||||
// Still record the initial data even on error
|
||||
if initial_len > 0 {
|
||||
metrics.record_bytes(initial_len, 0, route_id);
|
||||
}
|
||||
debug!("Socket handler relay ended for {}: {}", route_key, e);
|
||||
}
|
||||
}
|
||||
@@ -954,7 +964,7 @@ impl TcpListenerManager {
|
||||
target_port: u16,
|
||||
peer_addr: std::net::SocketAddr,
|
||||
tls_configs: &HashMap<String, TlsCertConfig>,
|
||||
metrics: &MetricsCollector,
|
||||
metrics: Arc<MetricsCollector>,
|
||||
route_id: Option<&str>,
|
||||
conn_config: &ConnectionConfig,
|
||||
route_tls: Option<&rustproxy_config::RouteTls>,
|
||||
@@ -1019,12 +1029,12 @@ impl TcpListenerManager {
|
||||
}
|
||||
};
|
||||
|
||||
let (bytes_in, bytes_out) = Self::forward_bidirectional_split_with_timeouts(
|
||||
let (_bytes_in, _bytes_out) = Self::forward_bidirectional_split_with_timeouts(
|
||||
client_read, client_write, backend_read, backend_write,
|
||||
inactivity_timeout, max_lifetime,
|
||||
Some((metrics, route_id.map(|s| s.to_string()))),
|
||||
).await;
|
||||
|
||||
metrics.record_bytes(bytes_in, bytes_out, route_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1058,6 +1068,9 @@ impl TcpListenerManager {
|
||||
}
|
||||
|
||||
/// Forward bidirectional between two split streams with inactivity and lifetime timeouts.
|
||||
///
|
||||
/// When `metrics` is provided, bytes are reported per-chunk (lock-free) for
|
||||
/// real-time throughput measurement.
|
||||
async fn forward_bidirectional_split_with_timeouts<R1, W1, R2, W2>(
|
||||
mut client_read: R1,
|
||||
mut client_write: W1,
|
||||
@@ -1065,6 +1078,7 @@ impl TcpListenerManager {
|
||||
mut backend_write: W2,
|
||||
inactivity_timeout: std::time::Duration,
|
||||
max_lifetime: std::time::Duration,
|
||||
metrics: Option<(Arc<MetricsCollector>, Option<String>)>,
|
||||
) -> (u64, u64)
|
||||
where
|
||||
R1: tokio::io::AsyncRead + Unpin + Send + 'static,
|
||||
@@ -1080,6 +1094,7 @@ impl TcpListenerManager {
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let la1 = Arc::clone(&last_activity);
|
||||
let metrics_c2b = metrics.clone();
|
||||
let c2b = tokio::spawn(async move {
|
||||
let mut buf = vec![0u8; 65536];
|
||||
let mut total = 0u64;
|
||||
@@ -1096,12 +1111,16 @@ impl TcpListenerManager {
|
||||
start.elapsed().as_millis() as u64,
|
||||
Ordering::Relaxed,
|
||||
);
|
||||
if let Some((ref m, ref rid)) = metrics_c2b {
|
||||
m.record_bytes(n as u64, 0, rid.as_deref());
|
||||
}
|
||||
}
|
||||
let _ = backend_write.shutdown().await;
|
||||
total
|
||||
});
|
||||
|
||||
let la2 = Arc::clone(&last_activity);
|
||||
let metrics_b2c = metrics;
|
||||
let b2c = tokio::spawn(async move {
|
||||
let mut buf = vec![0u8; 65536];
|
||||
let mut total = 0u64;
|
||||
@@ -1118,6 +1137,9 @@ impl TcpListenerManager {
|
||||
start.elapsed().as_millis() as u64,
|
||||
Ordering::Relaxed,
|
||||
);
|
||||
if let Some((ref m, ref rid)) = metrics_b2c {
|
||||
m.record_bytes(0, n as u64, rid.as_deref());
|
||||
}
|
||||
}
|
||||
let _ = client_write.shutdown().await;
|
||||
total
|
||||
|
||||
@@ -71,6 +71,7 @@ pub struct RustProxy {
|
||||
cert_manager: Option<Arc<tokio::sync::Mutex<CertManager>>>,
|
||||
challenge_server: Option<challenge_server::ChallengeServer>,
|
||||
renewal_handle: Option<tokio::task::JoinHandle<()>>,
|
||||
sampling_handle: Option<tokio::task::JoinHandle<()>>,
|
||||
nft_manager: Option<NftManager>,
|
||||
started: bool,
|
||||
started_at: Option<Instant>,
|
||||
@@ -100,14 +101,19 @@ impl RustProxy {
|
||||
let cert_manager = Self::build_cert_manager(&options)
|
||||
.map(|cm| Arc::new(tokio::sync::Mutex::new(cm)));
|
||||
|
||||
let retention = options.metrics.as_ref()
|
||||
.and_then(|m| m.retention_seconds)
|
||||
.unwrap_or(3600) as usize;
|
||||
|
||||
Ok(Self {
|
||||
options,
|
||||
route_table: ArcSwap::from(Arc::new(route_manager)),
|
||||
listener_manager: None,
|
||||
metrics: Arc::new(MetricsCollector::new()),
|
||||
metrics: Arc::new(MetricsCollector::with_retention(retention)),
|
||||
cert_manager,
|
||||
challenge_server: None,
|
||||
renewal_handle: None,
|
||||
sampling_handle: None,
|
||||
nft_manager: None,
|
||||
started: false,
|
||||
started_at: None,
|
||||
@@ -276,6 +282,21 @@ impl RustProxy {
|
||||
self.started = true;
|
||||
self.started_at = Some(Instant::now());
|
||||
|
||||
// Start the throughput sampling task
|
||||
let metrics = Arc::clone(&self.metrics);
|
||||
let interval_ms = self.options.metrics.as_ref()
|
||||
.and_then(|m| m.sample_interval_ms)
|
||||
.unwrap_or(1000);
|
||||
self.sampling_handle = Some(tokio::spawn(async move {
|
||||
let mut interval = tokio::time::interval(
|
||||
std::time::Duration::from_millis(interval_ms)
|
||||
);
|
||||
loop {
|
||||
interval.tick().await;
|
||||
metrics.sample_all();
|
||||
}
|
||||
}));
|
||||
|
||||
// Apply NFTables rules for routes using nftables forwarding engine
|
||||
self.apply_nftables_rules(&self.options.routes.clone()).await;
|
||||
|
||||
@@ -478,6 +499,11 @@ impl RustProxy {
|
||||
|
||||
info!("Stopping RustProxy...");
|
||||
|
||||
// Stop sampling task
|
||||
if let Some(handle) = self.sampling_handle.take() {
|
||||
handle.abort();
|
||||
}
|
||||
|
||||
// Stop renewal timer
|
||||
if let Some(handle) = self.renewal_handle.take() {
|
||||
handle.abort();
|
||||
|
||||
Reference in New Issue
Block a user