fix(rustproxy-metrics): preserve high-throughput IPs in metrics snapshots when active-connection rankings are saturated

This commit is contained in:
2026-04-26 09:17:11 +00:00
parent cb71f32b90
commit 1ea290a085
3 changed files with 82 additions and 7 deletions
+75 -6
View File
@@ -144,7 +144,7 @@ pub struct Statistics {
/// Default retention for throughput samples (1 hour).
const DEFAULT_RETENTION_SECONDS: usize = 3600;
/// Maximum number of IPs to include in a snapshot (top by active connections).
/// Maximum number of IPs to include in a snapshot.
const MAX_IPS_IN_SNAPSHOT: usize = 100;
/// Maximum number of backends to include in a snapshot (top by total connections).
@@ -1064,8 +1064,8 @@ impl MetricsCollector {
);
}
// Collect per-IP metrics — only IPs with active connections or total > 0,
// capped at top MAX_IPS_IN_SNAPSHOT sorted by active count
// Collect per-IP metrics — capped to the IPs most relevant for either
// active connection visibility or bandwidth attribution.
let mut ip_entries: Vec<(String, u64, u64, u64, u64, u64, u64, HashMap<String, u64>)> =
Vec::new();
for entry in self.ip_total_connections.iter() {
@@ -1113,9 +1113,54 @@ impl MetricsCollector {
domain_requests,
));
}
// Sort by active connections descending, then cap
ip_entries.sort_by(|a, b| b.1.cmp(&a.1));
ip_entries.truncate(MAX_IPS_IN_SNAPSHOT);
if ip_entries.len() > MAX_IPS_IN_SNAPSHOT {
let mut selected = vec![false; ip_entries.len()];
let mut selected_count = 0usize;
let mut active_rank: Vec<usize> = (0..ip_entries.len()).collect();
active_rank.sort_by(|&a, &b| {
ip_entries[b]
.1
.cmp(&ip_entries[a].1)
.then_with(|| ip_entries[b].2.cmp(&ip_entries[a].2))
.then_with(|| ip_entries[a].0.cmp(&ip_entries[b].0))
});
let mut throughput_rank: Vec<usize> = (0..ip_entries.len()).collect();
throughput_rank.sort_by(|&a, &b| {
let a_tp = ip_entries[a].5.saturating_add(ip_entries[a].6);
let b_tp = ip_entries[b].5.saturating_add(ip_entries[b].6);
let a_bytes = ip_entries[a].3.saturating_add(ip_entries[a].4);
let b_bytes = ip_entries[b].3.saturating_add(ip_entries[b].4);
b_tp.cmp(&a_tp)
.then_with(|| b_bytes.cmp(&a_bytes))
.then_with(|| ip_entries[b].1.cmp(&ip_entries[a].1))
.then_with(|| ip_entries[a].0.cmp(&ip_entries[b].0))
});
for idx in active_rank.into_iter().take(MAX_IPS_IN_SNAPSHOT / 2) {
if !selected[idx] {
selected[idx] = true;
selected_count += 1;
}
}
for idx in throughput_rank {
if selected_count >= MAX_IPS_IN_SNAPSHOT {
break;
}
if !selected[idx] {
selected[idx] = true;
selected_count += 1;
}
}
ip_entries = ip_entries
.into_iter()
.enumerate()
.filter_map(|(idx, entry)| selected[idx].then_some(entry))
.collect();
}
let mut ips = std::collections::HashMap::new();
for (ip, active, total, bytes_in, bytes_out, tp_in, tp_out, domain_requests) in ip_entries {
@@ -1452,6 +1497,30 @@ mod tests {
assert!(collector.ip_connections.get("1.2.3.4").is_none());
}
#[test]
fn test_snapshot_retains_high_throughput_ip_over_many_active_ips() {
let collector = MetricsCollector::with_retention(60);
for i in 1..=(MAX_IPS_IN_SNAPSHOT + 20) {
let ip = format!("10.0.0.{}", i);
collector.connection_opened(Some("scanner-route"), Some(&ip));
collector.connection_opened(Some("scanner-route"), Some(&ip));
}
let busy_ip = "203.0.113.10";
collector.connection_opened(Some("download-route"), Some(busy_ip));
collector.record_bytes(0, 900_000, Some("download-route"), Some(busy_ip));
collector.sample_all();
let snapshot = collector.snapshot();
let busy_metrics = snapshot.ips.get(busy_ip).unwrap();
assert_eq!(snapshot.ips.len(), MAX_IPS_IN_SNAPSHOT);
assert_eq!(busy_metrics.active_connections, 1);
assert_eq!(busy_metrics.bytes_out, 900_000);
assert_eq!(busy_metrics.throughput_out_bytes_per_sec, 900_000);
}
#[test]
fn test_per_ip_full_eviction_on_last_close() {
let collector = MetricsCollector::with_retention(60);