fix(monitoring): use a per-second ring buffer for DNS query metrics, improve DNS logging rate limiting and security event aggregation, and bump smartmta dependency

This commit is contained in:
2026-03-02 15:06:26 +00:00
parent 17deb481e0
commit 94fa0f04d8
8 changed files with 128 additions and 76 deletions

View File

@@ -35,7 +35,9 @@ export class MetricsManager {
queryTypes: {} as Record<string, number>,
topDomains: new Map<string, number>(),
lastResetDate: new Date().toDateString(),
queryTimestamps: [] as number[], // Track query timestamps for rate calculation
// Per-second query count ring buffer (300 entries = 5 minutes)
queryRing: new Int32Array(300),
queryRingLastSecond: 0, // last epoch second that was written
responseTimes: [] as number[], // Track response times in ms
recentQueries: [] as Array<{ timestamp: number; domain: string; type: string; answered: boolean; responseTimeMs: number }>,
};
@@ -95,12 +97,13 @@ export class MetricsManager {
this.dnsMetrics.cacheMisses = 0;
this.dnsMetrics.queryTypes = {};
this.dnsMetrics.topDomains.clear();
this.dnsMetrics.queryTimestamps = [];
this.dnsMetrics.queryRing.fill(0);
this.dnsMetrics.queryRingLastSecond = 0;
this.dnsMetrics.responseTimes = [];
this.dnsMetrics.recentQueries = [];
this.dnsMetrics.lastResetDate = currentDate;
}
if (currentDate !== this.securityMetrics.lastResetDate) {
this.securityMetrics.blockedIPs = 0;
this.securityMetrics.authFailures = 0;
@@ -111,15 +114,6 @@ export class MetricsManager {
this.securityMetrics.lastResetDate = currentDate;
}
// Prune old query timestamps (keep last 5 minutes)
const fiveMinutesAgo = Date.now() - 300000;
const idx = this.dnsMetrics.queryTimestamps.findIndex(ts => ts >= fiveMinutesAgo);
if (idx > 0) {
this.dnsMetrics.queryTimestamps = this.dnsMetrics.queryTimestamps.slice(idx);
} else if (idx === -1) {
this.dnsMetrics.queryTimestamps = [];
}
// Prune old time-series buckets every minute (don't wait for lazy query)
this.pruneOldBuckets();
}, 60000); // Check every minute
@@ -150,16 +144,16 @@ export class MetricsManager {
const smartMetricsData = await this.smartMetrics.getMetrics();
const proxyMetrics = this.dcRouter.smartProxy ? this.dcRouter.smartProxy.getMetrics() : null;
const proxyStats = this.dcRouter.smartProxy ? await this.dcRouter.smartProxy.getStatistics() : null;
const { heapUsed, heapTotal, external, rss } = process.memoryUsage();
return {
uptime: process.uptime(),
startTime: Date.now() - (process.uptime() * 1000),
memoryUsage: {
heapUsed: process.memoryUsage().heapUsed,
heapTotal: process.memoryUsage().heapTotal,
external: process.memoryUsage().external,
rss: process.memoryUsage().rss,
// Add SmartMetrics memory data
heapUsed,
heapTotal,
external,
rss,
maxMemoryMB: this.smartMetrics.maxMemoryMB,
actualUsageBytes: smartMetricsData.memoryUsageBytes,
actualUsagePercentage: smartMetricsData.memoryPercentage,
@@ -228,11 +222,8 @@ export class MetricsManager {
.slice(0, 10)
.map(([domain, count]) => ({ domain, count }));
// Calculate queries per second from recent timestamps
const now = Date.now();
const oneMinuteAgo = now - 60000;
const recentQueries = this.dnsMetrics.queryTimestamps.filter(ts => ts >= oneMinuteAgo);
const queriesPerSecond = recentQueries.length / 60;
// Calculate queries per second from ring buffer (sum last 60 seconds)
const queriesPerSecond = this.getQueryRingSum(60) / 60;
// Calculate average response time
const avgResponseTime = this.dnsMetrics.responseTimes.length > 0
@@ -436,8 +427,8 @@ export class MetricsManager {
this.dnsMetrics.cacheMisses++;
}
// Track query timestamp (pruning moved to resetInterval to avoid O(n) per query)
this.dnsMetrics.queryTimestamps.push(Date.now());
// Increment per-second query counter in ring buffer
this.incrementQueryRing();
// Track response time if provided
if (responseTimeMs) {
@@ -609,7 +600,7 @@ export class MetricsManager {
requestsPerSecond,
requestsTotal,
};
}, 200); // Use 200ms cache for more frequent updates
}, 1000); // 1s cache — matches typical dashboard poll interval
}
// --- Time-series helpers ---
@@ -638,6 +629,63 @@ export class MetricsManager {
bucket.queries++;
}
/**
* Increment the per-second query counter in the ring buffer.
* Zeros any stale slots between the last write and the current second.
*/
private incrementQueryRing(): void {
const currentSecond = Math.floor(Date.now() / 1000);
const ring = this.dnsMetrics.queryRing;
const last = this.dnsMetrics.queryRingLastSecond;
if (last === 0) {
// First call — zero and anchor
ring.fill(0);
this.dnsMetrics.queryRingLastSecond = currentSecond;
ring[currentSecond % ring.length] = 1;
return;
}
const gap = currentSecond - last;
if (gap >= ring.length) {
// Entire ring is stale — clear all
ring.fill(0);
} else if (gap > 0) {
// Zero slots from (last+1) to currentSecond (inclusive)
for (let s = last + 1; s <= currentSecond; s++) {
ring[s % ring.length] = 0;
}
}
this.dnsMetrics.queryRingLastSecond = currentSecond;
ring[currentSecond % ring.length]++;
}
/**
* Sum query counts from the ring buffer for the last N seconds.
*/
private getQueryRingSum(seconds: number): number {
const currentSecond = Math.floor(Date.now() / 1000);
const ring = this.dnsMetrics.queryRing;
const last = this.dnsMetrics.queryRingLastSecond;
if (last === 0) return 0;
// First, zero stale slots so reads are accurate even without writes
const gap = currentSecond - last;
if (gap >= ring.length) return 0; // all data is stale
let sum = 0;
const limit = Math.min(seconds, ring.length);
for (let i = 0; i < limit; i++) {
const sec = currentSecond - i;
if (sec < last - (ring.length - 1)) break; // slot is from older cycle
if (sec > last) continue; // no writes yet for this second
sum += ring[sec % ring.length];
}
return sum;
}
private pruneOldBuckets(): void {
const cutoff = Date.now() - 86400000; // 24h
for (const key of this.emailMinuteBuckets.keys()) {