fix(monitoring): use a per-second ring buffer for DNS query metrics, improve DNS logging rate limiting and security event aggregation, and bump smartmta dependency
This commit is contained in:
@@ -35,7 +35,9 @@ export class MetricsManager {
|
||||
queryTypes: {} as Record<string, number>,
|
||||
topDomains: new Map<string, number>(),
|
||||
lastResetDate: new Date().toDateString(),
|
||||
queryTimestamps: [] as number[], // Track query timestamps for rate calculation
|
||||
// Per-second query count ring buffer (300 entries = 5 minutes)
|
||||
queryRing: new Int32Array(300),
|
||||
queryRingLastSecond: 0, // last epoch second that was written
|
||||
responseTimes: [] as number[], // Track response times in ms
|
||||
recentQueries: [] as Array<{ timestamp: number; domain: string; type: string; answered: boolean; responseTimeMs: number }>,
|
||||
};
|
||||
@@ -95,12 +97,13 @@ export class MetricsManager {
|
||||
this.dnsMetrics.cacheMisses = 0;
|
||||
this.dnsMetrics.queryTypes = {};
|
||||
this.dnsMetrics.topDomains.clear();
|
||||
this.dnsMetrics.queryTimestamps = [];
|
||||
this.dnsMetrics.queryRing.fill(0);
|
||||
this.dnsMetrics.queryRingLastSecond = 0;
|
||||
this.dnsMetrics.responseTimes = [];
|
||||
this.dnsMetrics.recentQueries = [];
|
||||
this.dnsMetrics.lastResetDate = currentDate;
|
||||
}
|
||||
|
||||
|
||||
if (currentDate !== this.securityMetrics.lastResetDate) {
|
||||
this.securityMetrics.blockedIPs = 0;
|
||||
this.securityMetrics.authFailures = 0;
|
||||
@@ -111,15 +114,6 @@ export class MetricsManager {
|
||||
this.securityMetrics.lastResetDate = currentDate;
|
||||
}
|
||||
|
||||
// Prune old query timestamps (keep last 5 minutes)
|
||||
const fiveMinutesAgo = Date.now() - 300000;
|
||||
const idx = this.dnsMetrics.queryTimestamps.findIndex(ts => ts >= fiveMinutesAgo);
|
||||
if (idx > 0) {
|
||||
this.dnsMetrics.queryTimestamps = this.dnsMetrics.queryTimestamps.slice(idx);
|
||||
} else if (idx === -1) {
|
||||
this.dnsMetrics.queryTimestamps = [];
|
||||
}
|
||||
|
||||
// Prune old time-series buckets every minute (don't wait for lazy query)
|
||||
this.pruneOldBuckets();
|
||||
}, 60000); // Check every minute
|
||||
@@ -150,16 +144,16 @@ export class MetricsManager {
|
||||
const smartMetricsData = await this.smartMetrics.getMetrics();
|
||||
const proxyMetrics = this.dcRouter.smartProxy ? this.dcRouter.smartProxy.getMetrics() : null;
|
||||
const proxyStats = this.dcRouter.smartProxy ? await this.dcRouter.smartProxy.getStatistics() : null;
|
||||
const { heapUsed, heapTotal, external, rss } = process.memoryUsage();
|
||||
|
||||
return {
|
||||
uptime: process.uptime(),
|
||||
startTime: Date.now() - (process.uptime() * 1000),
|
||||
memoryUsage: {
|
||||
heapUsed: process.memoryUsage().heapUsed,
|
||||
heapTotal: process.memoryUsage().heapTotal,
|
||||
external: process.memoryUsage().external,
|
||||
rss: process.memoryUsage().rss,
|
||||
// Add SmartMetrics memory data
|
||||
heapUsed,
|
||||
heapTotal,
|
||||
external,
|
||||
rss,
|
||||
maxMemoryMB: this.smartMetrics.maxMemoryMB,
|
||||
actualUsageBytes: smartMetricsData.memoryUsageBytes,
|
||||
actualUsagePercentage: smartMetricsData.memoryPercentage,
|
||||
@@ -228,11 +222,8 @@ export class MetricsManager {
|
||||
.slice(0, 10)
|
||||
.map(([domain, count]) => ({ domain, count }));
|
||||
|
||||
// Calculate queries per second from recent timestamps
|
||||
const now = Date.now();
|
||||
const oneMinuteAgo = now - 60000;
|
||||
const recentQueries = this.dnsMetrics.queryTimestamps.filter(ts => ts >= oneMinuteAgo);
|
||||
const queriesPerSecond = recentQueries.length / 60;
|
||||
// Calculate queries per second from ring buffer (sum last 60 seconds)
|
||||
const queriesPerSecond = this.getQueryRingSum(60) / 60;
|
||||
|
||||
// Calculate average response time
|
||||
const avgResponseTime = this.dnsMetrics.responseTimes.length > 0
|
||||
@@ -436,8 +427,8 @@ export class MetricsManager {
|
||||
this.dnsMetrics.cacheMisses++;
|
||||
}
|
||||
|
||||
// Track query timestamp (pruning moved to resetInterval to avoid O(n) per query)
|
||||
this.dnsMetrics.queryTimestamps.push(Date.now());
|
||||
// Increment per-second query counter in ring buffer
|
||||
this.incrementQueryRing();
|
||||
|
||||
// Track response time if provided
|
||||
if (responseTimeMs) {
|
||||
@@ -609,7 +600,7 @@ export class MetricsManager {
|
||||
requestsPerSecond,
|
||||
requestsTotal,
|
||||
};
|
||||
}, 200); // Use 200ms cache for more frequent updates
|
||||
}, 1000); // 1s cache — matches typical dashboard poll interval
|
||||
}
|
||||
|
||||
// --- Time-series helpers ---
|
||||
@@ -638,6 +629,63 @@ export class MetricsManager {
|
||||
bucket.queries++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Increment the per-second query counter in the ring buffer.
|
||||
* Zeros any stale slots between the last write and the current second.
|
||||
*/
|
||||
private incrementQueryRing(): void {
|
||||
const currentSecond = Math.floor(Date.now() / 1000);
|
||||
const ring = this.dnsMetrics.queryRing;
|
||||
const last = this.dnsMetrics.queryRingLastSecond;
|
||||
|
||||
if (last === 0) {
|
||||
// First call — zero and anchor
|
||||
ring.fill(0);
|
||||
this.dnsMetrics.queryRingLastSecond = currentSecond;
|
||||
ring[currentSecond % ring.length] = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
const gap = currentSecond - last;
|
||||
if (gap >= ring.length) {
|
||||
// Entire ring is stale — clear all
|
||||
ring.fill(0);
|
||||
} else if (gap > 0) {
|
||||
// Zero slots from (last+1) to currentSecond (inclusive)
|
||||
for (let s = last + 1; s <= currentSecond; s++) {
|
||||
ring[s % ring.length] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
this.dnsMetrics.queryRingLastSecond = currentSecond;
|
||||
ring[currentSecond % ring.length]++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sum query counts from the ring buffer for the last N seconds.
|
||||
*/
|
||||
private getQueryRingSum(seconds: number): number {
|
||||
const currentSecond = Math.floor(Date.now() / 1000);
|
||||
const ring = this.dnsMetrics.queryRing;
|
||||
const last = this.dnsMetrics.queryRingLastSecond;
|
||||
|
||||
if (last === 0) return 0;
|
||||
|
||||
// First, zero stale slots so reads are accurate even without writes
|
||||
const gap = currentSecond - last;
|
||||
if (gap >= ring.length) return 0; // all data is stale
|
||||
|
||||
let sum = 0;
|
||||
const limit = Math.min(seconds, ring.length);
|
||||
for (let i = 0; i < limit; i++) {
|
||||
const sec = currentSecond - i;
|
||||
if (sec < last - (ring.length - 1)) break; // slot is from older cycle
|
||||
if (sec > last) continue; // no writes yet for this second
|
||||
sum += ring[sec % ring.length];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
private pruneOldBuckets(): void {
|
||||
const cutoff = Date.now() - 86400000; // 24h
|
||||
for (const key of this.emailMinuteBuckets.keys()) {
|
||||
|
||||
Reference in New Issue
Block a user