fix(metrics): fix metrics
This commit is contained in:
@ -2,6 +2,20 @@
|
|||||||
|
|
||||||
## Byte Tracking and Metrics
|
## Byte Tracking and Metrics
|
||||||
|
|
||||||
|
### Throughput Drift Issue (Fixed)
|
||||||
|
|
||||||
|
**Problem**: Throughput numbers were gradually increasing over time for long-lived connections.
|
||||||
|
|
||||||
|
**Root Cause**: The `byRoute()` and `byIP()` methods were dividing cumulative total bytes (since connection start) by the window duration, causing rates to appear higher as connections aged:
|
||||||
|
- Hour 1: 1GB total / 60s = 17 MB/s ✓
|
||||||
|
- Hour 2: 2GB total / 60s = 34 MB/s ✗ (appears doubled!)
|
||||||
|
- Hour 3: 3GB total / 60s = 50 MB/s ✗ (keeps rising!)
|
||||||
|
|
||||||
|
**Solution**: Implemented snapshot-based byte tracking that calculates actual bytes transferred within each time window:
|
||||||
|
- Store periodic snapshots of byte counts with timestamps
|
||||||
|
- Calculate delta between window start and end snapshots
|
||||||
|
- Divide delta by window duration for accurate throughput
|
||||||
|
|
||||||
### What Gets Counted (Network Interface Throughput)
|
### What Gets Counted (Network Interface Throughput)
|
||||||
|
|
||||||
The byte tracking is designed to match network interface throughput (what Unifi/network monitoring tools show):
|
The byte tracking is designed to match network interface throughput (what Unifi/network monitoring tools show):
|
||||||
@ -41,10 +55,13 @@ The byte tracking is designed to match network interface throughput (what Unifi/
|
|||||||
|
|
||||||
The metrics system has three layers:
|
The metrics system has three layers:
|
||||||
1. **Connection Records** (`record.bytesReceived/bytesSent`): Track total bytes per connection
|
1. **Connection Records** (`record.bytesReceived/bytesSent`): Track total bytes per connection
|
||||||
2. **ThroughputTracker**: Accumulates bytes between samples for rate calculations (bytes/second)
|
2. **ThroughputTracker**: Accumulates bytes between samples for global rate calculations (resets each second)
|
||||||
3. **connectionByteTrackers**: Track bytes per connection with timestamps for per-route/IP metrics
|
3. **connectionByteTrackers**: Track bytes per connection with snapshots for accurate windowed per-route/IP metrics
|
||||||
|
|
||||||
Total byte counts come from connection records only, preventing double counting.
|
Key features:
|
||||||
|
- Global throughput uses sampling with accumulator reset (accurate)
|
||||||
|
- Per-route/IP throughput uses snapshots to calculate window-specific deltas (accurate)
|
||||||
|
- All byte counting happens exactly once at the data flow point
|
||||||
|
|
||||||
### Understanding "High" Byte Counts
|
### Understanding "High" Byte Counts
|
||||||
|
|
||||||
|
@ -124,36 +124,50 @@ export class MetricsCollector implements IMetrics {
|
|||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
const windowStart = now - (windowSeconds * 1000);
|
const windowStart = now - (windowSeconds * 1000);
|
||||||
|
|
||||||
// Aggregate bytes by route with proper time calculation
|
// Aggregate bytes by route - calculate actual bytes transferred in window
|
||||||
const routeData = new Map<string, { bytesIn: number; bytesOut: number; totalDuration: number }>();
|
const routeData = new Map<string, { bytesIn: number; bytesOut: number }>();
|
||||||
|
|
||||||
for (const [_, tracker] of this.connectionByteTrackers) {
|
for (const [_, tracker] of this.connectionByteTrackers) {
|
||||||
// Only include connections that were active within the window
|
// Only include connections that were active within the window
|
||||||
if (tracker.lastUpdate > windowStart || tracker.startTime > windowStart) {
|
if (tracker.lastUpdate > windowStart) {
|
||||||
// Calculate the actual duration this connection was active within the window
|
let windowBytesIn = 0;
|
||||||
const connectionStart = Math.max(tracker.startTime, windowStart);
|
let windowBytesOut = 0;
|
||||||
const connectionEnd = tracker.lastUpdate;
|
|
||||||
const durationInWindow = (connectionEnd - connectionStart) / 1000; // Convert to seconds
|
|
||||||
|
|
||||||
if (durationInWindow > 0) {
|
if (tracker.windowSnapshots && tracker.windowSnapshots.length > 0) {
|
||||||
const current = routeData.get(tracker.routeName) || { bytesIn: 0, bytesOut: 0, totalDuration: 0 };
|
// Find the earliest snapshot within or just before the window
|
||||||
current.bytesIn += tracker.bytesIn;
|
let startSnapshot = { timestamp: tracker.startTime, bytesIn: 0, bytesOut: 0 };
|
||||||
current.bytesOut += tracker.bytesOut;
|
for (const snapshot of tracker.windowSnapshots) {
|
||||||
current.totalDuration += durationInWindow;
|
if (snapshot.timestamp <= windowStart) {
|
||||||
routeData.set(tracker.routeName, current);
|
startSnapshot = snapshot;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Calculate bytes transferred since window start
|
||||||
|
windowBytesIn = tracker.bytesIn - startSnapshot.bytesIn;
|
||||||
|
windowBytesOut = tracker.bytesOut - startSnapshot.bytesOut;
|
||||||
|
} else if (tracker.startTime > windowStart) {
|
||||||
|
// Connection started within window, use all its bytes
|
||||||
|
windowBytesIn = tracker.bytesIn;
|
||||||
|
windowBytesOut = tracker.bytesOut;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add to route totals
|
||||||
|
const current = routeData.get(tracker.routeName) || { bytesIn: 0, bytesOut: 0 };
|
||||||
|
current.bytesIn += windowBytesIn;
|
||||||
|
current.bytesOut += windowBytesOut;
|
||||||
|
routeData.set(tracker.routeName, current);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert to rates (bytes per second)
|
// Convert to rates (bytes per second)
|
||||||
for (const [route, data] of routeData) {
|
for (const [route, data] of routeData) {
|
||||||
if (data.totalDuration > 0) {
|
|
||||||
routeThroughput.set(route, {
|
routeThroughput.set(route, {
|
||||||
in: Math.round(data.bytesIn / data.totalDuration),
|
in: Math.round(data.bytesIn / windowSeconds),
|
||||||
out: Math.round(data.bytesOut / data.totalDuration)
|
out: Math.round(data.bytesOut / windowSeconds)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return routeThroughput;
|
return routeThroughput;
|
||||||
},
|
},
|
||||||
@ -163,36 +177,50 @@ export class MetricsCollector implements IMetrics {
|
|||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
const windowStart = now - (windowSeconds * 1000);
|
const windowStart = now - (windowSeconds * 1000);
|
||||||
|
|
||||||
// Aggregate bytes by IP with proper time calculation
|
// Aggregate bytes by IP - calculate actual bytes transferred in window
|
||||||
const ipData = new Map<string, { bytesIn: number; bytesOut: number; totalDuration: number }>();
|
const ipData = new Map<string, { bytesIn: number; bytesOut: number }>();
|
||||||
|
|
||||||
for (const [_, tracker] of this.connectionByteTrackers) {
|
for (const [_, tracker] of this.connectionByteTrackers) {
|
||||||
// Only include connections that were active within the window
|
// Only include connections that were active within the window
|
||||||
if (tracker.lastUpdate > windowStart || tracker.startTime > windowStart) {
|
if (tracker.lastUpdate > windowStart) {
|
||||||
// Calculate the actual duration this connection was active within the window
|
let windowBytesIn = 0;
|
||||||
const connectionStart = Math.max(tracker.startTime, windowStart);
|
let windowBytesOut = 0;
|
||||||
const connectionEnd = tracker.lastUpdate;
|
|
||||||
const durationInWindow = (connectionEnd - connectionStart) / 1000; // Convert to seconds
|
|
||||||
|
|
||||||
if (durationInWindow > 0) {
|
if (tracker.windowSnapshots && tracker.windowSnapshots.length > 0) {
|
||||||
const current = ipData.get(tracker.remoteIP) || { bytesIn: 0, bytesOut: 0, totalDuration: 0 };
|
// Find the earliest snapshot within or just before the window
|
||||||
current.bytesIn += tracker.bytesIn;
|
let startSnapshot = { timestamp: tracker.startTime, bytesIn: 0, bytesOut: 0 };
|
||||||
current.bytesOut += tracker.bytesOut;
|
for (const snapshot of tracker.windowSnapshots) {
|
||||||
current.totalDuration += durationInWindow;
|
if (snapshot.timestamp <= windowStart) {
|
||||||
ipData.set(tracker.remoteIP, current);
|
startSnapshot = snapshot;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Calculate bytes transferred since window start
|
||||||
|
windowBytesIn = tracker.bytesIn - startSnapshot.bytesIn;
|
||||||
|
windowBytesOut = tracker.bytesOut - startSnapshot.bytesOut;
|
||||||
|
} else if (tracker.startTime > windowStart) {
|
||||||
|
// Connection started within window, use all its bytes
|
||||||
|
windowBytesIn = tracker.bytesIn;
|
||||||
|
windowBytesOut = tracker.bytesOut;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add to IP totals
|
||||||
|
const current = ipData.get(tracker.remoteIP) || { bytesIn: 0, bytesOut: 0 };
|
||||||
|
current.bytesIn += windowBytesIn;
|
||||||
|
current.bytesOut += windowBytesOut;
|
||||||
|
ipData.set(tracker.remoteIP, current);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert to rates (bytes per second)
|
// Convert to rates (bytes per second)
|
||||||
for (const [ip, data] of ipData) {
|
for (const [ip, data] of ipData) {
|
||||||
if (data.totalDuration > 0) {
|
|
||||||
ipThroughput.set(ip, {
|
ipThroughput.set(ip, {
|
||||||
in: Math.round(data.bytesIn / data.totalDuration),
|
in: Math.round(data.bytesIn / windowSeconds),
|
||||||
out: Math.round(data.bytesOut / data.totalDuration)
|
out: Math.round(data.bytesOut / windowSeconds)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return ipThroughput;
|
return ipThroughput;
|
||||||
}
|
}
|
||||||
@ -294,7 +322,8 @@ export class MetricsCollector implements IMetrics {
|
|||||||
bytesIn: 0,
|
bytesIn: 0,
|
||||||
bytesOut: 0,
|
bytesOut: 0,
|
||||||
startTime: now,
|
startTime: now,
|
||||||
lastUpdate: now
|
lastUpdate: now,
|
||||||
|
windowSnapshots: [] // Initialize empty snapshots array
|
||||||
});
|
});
|
||||||
|
|
||||||
// Cleanup old request timestamps
|
// Cleanup old request timestamps
|
||||||
@ -323,6 +352,22 @@ export class MetricsCollector implements IMetrics {
|
|||||||
tracker.bytesIn += bytesIn;
|
tracker.bytesIn += bytesIn;
|
||||||
tracker.bytesOut += bytesOut;
|
tracker.bytesOut += bytesOut;
|
||||||
tracker.lastUpdate = Date.now();
|
tracker.lastUpdate = Date.now();
|
||||||
|
|
||||||
|
// Initialize snapshots array if not present
|
||||||
|
if (!tracker.windowSnapshots) {
|
||||||
|
tracker.windowSnapshots = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add current snapshot - we'll use these for accurate windowed calculations
|
||||||
|
tracker.windowSnapshots.push({
|
||||||
|
timestamp: Date.now(),
|
||||||
|
bytesIn: tracker.bytesIn,
|
||||||
|
bytesOut: tracker.bytesOut
|
||||||
|
});
|
||||||
|
|
||||||
|
// Keep only snapshots from last 5 minutes to prevent memory growth
|
||||||
|
const fiveMinutesAgo = Date.now() - 300000;
|
||||||
|
tracker.windowSnapshots = tracker.windowSnapshots.filter(s => s.timestamp > fiveMinutesAgo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,4 +109,10 @@ export interface IByteTracker {
|
|||||||
bytesOut: number;
|
bytesOut: number;
|
||||||
startTime: number;
|
startTime: number;
|
||||||
lastUpdate: number;
|
lastUpdate: number;
|
||||||
|
// Track bytes at window boundaries for rate calculation
|
||||||
|
windowSnapshots?: Array<{
|
||||||
|
timestamp: number;
|
||||||
|
bytesIn: number;
|
||||||
|
bytesOut: number;
|
||||||
|
}>;
|
||||||
}
|
}
|
Reference in New Issue
Block a user