fix(rustproxy): Use cooperative cancellation for background tasks, prune stale caches and metric entries, and switch tests to dynamic port allocation to avoid port conflicts

This commit is contained in:
2026-02-24 20:56:37 +00:00
parent 755c81c042
commit 33cd5330c4
24 changed files with 535 additions and 560 deletions

View File

@@ -239,21 +239,26 @@ impl MetricsCollector {
}
if let Some(ip) = source_ip {
self.ip_bytes_in
.entry(ip.to_string())
.or_insert_with(|| AtomicU64::new(0))
.fetch_add(bytes_in, Ordering::Relaxed);
self.ip_bytes_out
.entry(ip.to_string())
.or_insert_with(|| AtomicU64::new(0))
.fetch_add(bytes_out, Ordering::Relaxed);
// Only record per-IP stats if the IP still has active connections.
// This prevents orphaned entries when record_bytes races with
// connection_closed (which evicts all per-IP data on last close).
if self.ip_connections.contains_key(ip) {
self.ip_bytes_in
.entry(ip.to_string())
.or_insert_with(|| AtomicU64::new(0))
.fetch_add(bytes_in, Ordering::Relaxed);
self.ip_bytes_out
.entry(ip.to_string())
.or_insert_with(|| AtomicU64::new(0))
.fetch_add(bytes_out, Ordering::Relaxed);
// Accumulate into per-IP pending throughput counters (lock-free)
let entry = self.ip_pending_tp
.entry(ip.to_string())
.or_insert_with(|| (AtomicU64::new(0), AtomicU64::new(0)));
entry.0.fetch_add(bytes_in, Ordering::Relaxed);
entry.1.fetch_add(bytes_out, Ordering::Relaxed);
// Accumulate into per-IP pending throughput counters (lock-free)
let entry = self.ip_pending_tp
.entry(ip.to_string())
.or_insert_with(|| (AtomicU64::new(0), AtomicU64::new(0)));
entry.0.fetch_add(bytes_in, Ordering::Relaxed);
entry.1.fetch_add(bytes_out, Ordering::Relaxed);
}
}
}
@@ -347,6 +352,15 @@ impl MetricsCollector {
tracker.record_bytes(pending_reqs, 0);
tracker.sample();
}
// Safety-net: prune orphaned per-IP entries that have no corresponding
// ip_connections entry. This catches any entries created by a race between
// record_bytes and connection_closed.
self.ip_bytes_in.retain(|k, _| self.ip_connections.contains_key(k));
self.ip_bytes_out.retain(|k, _| self.ip_connections.contains_key(k));
self.ip_pending_tp.retain(|k, _| self.ip_connections.contains_key(k));
self.ip_throughput.retain(|k, _| self.ip_connections.contains_key(k));
self.ip_total_connections.retain(|k, _| self.ip_connections.contains_key(k));
}
/// Remove per-route metrics for route IDs that are no longer active.
@@ -733,6 +747,49 @@ mod tests {
assert!(collector.route_total_connections.get("route-c").is_some());
}
#[test]
fn test_record_bytes_after_close_no_orphan() {
let collector = MetricsCollector::with_retention(60);
// Open a connection, record bytes, then close
collector.connection_opened(Some("route-a"), Some("10.0.0.1"));
collector.record_bytes(100, 200, Some("route-a"), Some("10.0.0.1"));
collector.connection_closed(Some("route-a"), Some("10.0.0.1"));
// IP should be fully evicted
assert!(collector.ip_connections.get("10.0.0.1").is_none());
// Now record_bytes arrives late (simulates race) — should NOT re-create entries
collector.record_bytes(50, 75, Some("route-a"), Some("10.0.0.1"));
assert!(collector.ip_bytes_in.get("10.0.0.1").is_none());
assert!(collector.ip_bytes_out.get("10.0.0.1").is_none());
assert!(collector.ip_pending_tp.get("10.0.0.1").is_none());
// Global bytes should still be counted
assert_eq!(collector.total_bytes_in.load(Ordering::Relaxed), 150);
assert_eq!(collector.total_bytes_out.load(Ordering::Relaxed), 275);
}
#[test]
fn test_sample_all_prunes_orphaned_ip_entries() {
let collector = MetricsCollector::with_retention(60);
// Manually insert orphaned entries (simulates the race before the guard)
collector.ip_bytes_in.insert("orphan-ip".to_string(), AtomicU64::new(100));
collector.ip_bytes_out.insert("orphan-ip".to_string(), AtomicU64::new(200));
collector.ip_pending_tp.insert("orphan-ip".to_string(), (AtomicU64::new(0), AtomicU64::new(0)));
// No ip_connections entry for "orphan-ip"
assert!(collector.ip_connections.get("orphan-ip").is_none());
// sample_all should prune the orphans
collector.sample_all();
assert!(collector.ip_bytes_in.get("orphan-ip").is_none());
assert!(collector.ip_bytes_out.get("orphan-ip").is_none());
assert!(collector.ip_pending_tp.get("orphan-ip").is_none());
}
#[test]
fn test_throughput_history_in_snapshot() {
let collector = MetricsCollector::with_retention(60);