fix(metrics): fix metrics

This commit is contained in:
Juergen Kunz
2025-06-23 13:07:30 +00:00
parent cc9e76fade
commit caa15e539e
3 changed files with 110 additions and 42 deletions

View File

@ -2,6 +2,20 @@
## Byte Tracking and Metrics ## Byte Tracking and Metrics
### Throughput Drift Issue (Fixed)
**Problem**: Throughput numbers were gradually increasing over time for long-lived connections.
**Root Cause**: The `byRoute()` and `byIP()` methods were dividing cumulative total bytes (since connection start) by the window duration, causing rates to appear higher as connections aged:
- Hour 1: 1GB total / 60s = 17 MB/s ✓
- Hour 2: 2GB total / 60s = 34 MB/s ✗ (appears doubled!)
- Hour 3: 3GB total / 60s = 50 MB/s ✗ (keeps rising!)
**Solution**: Implemented snapshot-based byte tracking that calculates actual bytes transferred within each time window:
- Store periodic snapshots of byte counts with timestamps
- Calculate delta between window start and end snapshots
- Divide delta by window duration for accurate throughput
### What Gets Counted (Network Interface Throughput) ### What Gets Counted (Network Interface Throughput)
The byte tracking is designed to match network interface throughput (what Unifi/network monitoring tools show): The byte tracking is designed to match network interface throughput (what Unifi/network monitoring tools show):
@ -41,10 +55,13 @@ The byte tracking is designed to match network interface throughput (what Unifi/
The metrics system has three layers: The metrics system has three layers:
1. **Connection Records** (`record.bytesReceived/bytesSent`): Track total bytes per connection 1. **Connection Records** (`record.bytesReceived/bytesSent`): Track total bytes per connection
2. **ThroughputTracker**: Accumulates bytes between samples for rate calculations (bytes/second) 2. **ThroughputTracker**: Accumulates bytes between samples for global rate calculations (resets each second)
3. **connectionByteTrackers**: Track bytes per connection with timestamps for per-route/IP metrics 3. **connectionByteTrackers**: Track bytes per connection with snapshots for accurate windowed per-route/IP metrics
Total byte counts come from connection records only, preventing double counting. Key features:
- Global throughput uses sampling with accumulator reset (accurate)
- Per-route/IP throughput uses snapshots to calculate window-specific deltas (accurate)
- All byte counting happens exactly once at the data flow point
### Understanding "High" Byte Counts ### Understanding "High" Byte Counts

View File

@ -124,36 +124,50 @@ export class MetricsCollector implements IMetrics {
const now = Date.now(); const now = Date.now();
const windowStart = now - (windowSeconds * 1000); const windowStart = now - (windowSeconds * 1000);
// Aggregate bytes by route with proper time calculation // Aggregate bytes by route - calculate actual bytes transferred in window
const routeData = new Map<string, { bytesIn: number; bytesOut: number; totalDuration: number }>(); const routeData = new Map<string, { bytesIn: number; bytesOut: number }>();
for (const [_, tracker] of this.connectionByteTrackers) { for (const [_, tracker] of this.connectionByteTrackers) {
// Only include connections that were active within the window // Only include connections that were active within the window
if (tracker.lastUpdate > windowStart || tracker.startTime > windowStart) { if (tracker.lastUpdate > windowStart) {
// Calculate the actual duration this connection was active within the window let windowBytesIn = 0;
const connectionStart = Math.max(tracker.startTime, windowStart); let windowBytesOut = 0;
const connectionEnd = tracker.lastUpdate;
const durationInWindow = (connectionEnd - connectionStart) / 1000; // Convert to seconds
if (durationInWindow > 0) { if (tracker.windowSnapshots && tracker.windowSnapshots.length > 0) {
const current = routeData.get(tracker.routeName) || { bytesIn: 0, bytesOut: 0, totalDuration: 0 }; // Find the earliest snapshot within or just before the window
current.bytesIn += tracker.bytesIn; let startSnapshot = { timestamp: tracker.startTime, bytesIn: 0, bytesOut: 0 };
current.bytesOut += tracker.bytesOut; for (const snapshot of tracker.windowSnapshots) {
current.totalDuration += durationInWindow; if (snapshot.timestamp <= windowStart) {
routeData.set(tracker.routeName, current); startSnapshot = snapshot;
} else {
break;
} }
} }
// Calculate bytes transferred since window start
windowBytesIn = tracker.bytesIn - startSnapshot.bytesIn;
windowBytesOut = tracker.bytesOut - startSnapshot.bytesOut;
} else if (tracker.startTime > windowStart) {
// Connection started within window, use all its bytes
windowBytesIn = tracker.bytesIn;
windowBytesOut = tracker.bytesOut;
}
// Add to route totals
const current = routeData.get(tracker.routeName) || { bytesIn: 0, bytesOut: 0 };
current.bytesIn += windowBytesIn;
current.bytesOut += windowBytesOut;
routeData.set(tracker.routeName, current);
}
} }
// Convert to rates (bytes per second) // Convert to rates (bytes per second)
for (const [route, data] of routeData) { for (const [route, data] of routeData) {
if (data.totalDuration > 0) {
routeThroughput.set(route, { routeThroughput.set(route, {
in: Math.round(data.bytesIn / data.totalDuration), in: Math.round(data.bytesIn / windowSeconds),
out: Math.round(data.bytesOut / data.totalDuration) out: Math.round(data.bytesOut / windowSeconds)
}); });
} }
}
return routeThroughput; return routeThroughput;
}, },
@ -163,36 +177,50 @@ export class MetricsCollector implements IMetrics {
const now = Date.now(); const now = Date.now();
const windowStart = now - (windowSeconds * 1000); const windowStart = now - (windowSeconds * 1000);
// Aggregate bytes by IP with proper time calculation // Aggregate bytes by IP - calculate actual bytes transferred in window
const ipData = new Map<string, { bytesIn: number; bytesOut: number; totalDuration: number }>(); const ipData = new Map<string, { bytesIn: number; bytesOut: number }>();
for (const [_, tracker] of this.connectionByteTrackers) { for (const [_, tracker] of this.connectionByteTrackers) {
// Only include connections that were active within the window // Only include connections that were active within the window
if (tracker.lastUpdate > windowStart || tracker.startTime > windowStart) { if (tracker.lastUpdate > windowStart) {
// Calculate the actual duration this connection was active within the window let windowBytesIn = 0;
const connectionStart = Math.max(tracker.startTime, windowStart); let windowBytesOut = 0;
const connectionEnd = tracker.lastUpdate;
const durationInWindow = (connectionEnd - connectionStart) / 1000; // Convert to seconds
if (durationInWindow > 0) { if (tracker.windowSnapshots && tracker.windowSnapshots.length > 0) {
const current = ipData.get(tracker.remoteIP) || { bytesIn: 0, bytesOut: 0, totalDuration: 0 }; // Find the earliest snapshot within or just before the window
current.bytesIn += tracker.bytesIn; let startSnapshot = { timestamp: tracker.startTime, bytesIn: 0, bytesOut: 0 };
current.bytesOut += tracker.bytesOut; for (const snapshot of tracker.windowSnapshots) {
current.totalDuration += durationInWindow; if (snapshot.timestamp <= windowStart) {
ipData.set(tracker.remoteIP, current); startSnapshot = snapshot;
} else {
break;
} }
} }
// Calculate bytes transferred since window start
windowBytesIn = tracker.bytesIn - startSnapshot.bytesIn;
windowBytesOut = tracker.bytesOut - startSnapshot.bytesOut;
} else if (tracker.startTime > windowStart) {
// Connection started within window, use all its bytes
windowBytesIn = tracker.bytesIn;
windowBytesOut = tracker.bytesOut;
}
// Add to IP totals
const current = ipData.get(tracker.remoteIP) || { bytesIn: 0, bytesOut: 0 };
current.bytesIn += windowBytesIn;
current.bytesOut += windowBytesOut;
ipData.set(tracker.remoteIP, current);
}
} }
// Convert to rates (bytes per second) // Convert to rates (bytes per second)
for (const [ip, data] of ipData) { for (const [ip, data] of ipData) {
if (data.totalDuration > 0) {
ipThroughput.set(ip, { ipThroughput.set(ip, {
in: Math.round(data.bytesIn / data.totalDuration), in: Math.round(data.bytesIn / windowSeconds),
out: Math.round(data.bytesOut / data.totalDuration) out: Math.round(data.bytesOut / windowSeconds)
}); });
} }
}
return ipThroughput; return ipThroughput;
} }
@ -294,7 +322,8 @@ export class MetricsCollector implements IMetrics {
bytesIn: 0, bytesIn: 0,
bytesOut: 0, bytesOut: 0,
startTime: now, startTime: now,
lastUpdate: now lastUpdate: now,
windowSnapshots: [] // Initialize empty snapshots array
}); });
// Cleanup old request timestamps // Cleanup old request timestamps
@ -323,6 +352,22 @@ export class MetricsCollector implements IMetrics {
tracker.bytesIn += bytesIn; tracker.bytesIn += bytesIn;
tracker.bytesOut += bytesOut; tracker.bytesOut += bytesOut;
tracker.lastUpdate = Date.now(); tracker.lastUpdate = Date.now();
// Initialize snapshots array if not present
if (!tracker.windowSnapshots) {
tracker.windowSnapshots = [];
}
// Add current snapshot - we'll use these for accurate windowed calculations
tracker.windowSnapshots.push({
timestamp: Date.now(),
bytesIn: tracker.bytesIn,
bytesOut: tracker.bytesOut
});
// Keep only snapshots from last 5 minutes to prevent memory growth
const fiveMinutesAgo = Date.now() - 300000;
tracker.windowSnapshots = tracker.windowSnapshots.filter(s => s.timestamp > fiveMinutesAgo);
} }
} }

View File

@ -109,4 +109,10 @@ export interface IByteTracker {
bytesOut: number; bytesOut: number;
startTime: number; startTime: number;
lastUpdate: number; lastUpdate: number;
// Track bytes at window boundaries for rate calculation
windowSnapshots?: Array<{
timestamp: number;
bytesIn: number;
bytesOut: number;
}>;
} }