fix(monitoring): use a per-second ring buffer for DNS query metrics, improve DNS logging rate limiting and security event aggregation, and bump smartmta dependency

This commit is contained in:
2026-03-02 15:06:26 +00:00
parent 17deb481e0
commit 94fa0f04d8
8 changed files with 128 additions and 76 deletions

View File

@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@serve.zone/dcrouter',
version: '10.1.4',
version: '10.1.5',
description: 'A multifaceted routing service handling mail and SMS delivery functions.'
}

View File

@@ -222,7 +222,8 @@ export class DcRouter {
public detectedPublicIp: string | null = null;
// DNS query logging rate limiter state
private dnsLogWindow: number[] = [];
private dnsLogWindowSecond: number = 0; // epoch second of current window
private dnsLogWindowCount: number = 0; // queries logged this second
private dnsBatchCount: number = 0;
private dnsBatchTimer: ReturnType<typeof setTimeout> | null = null;
@@ -901,7 +902,8 @@ export class DcRouter {
}
this.dnsBatchTimer = null;
this.dnsBatchCount = 0;
this.dnsLogWindow = [];
this.dnsLogWindowSecond = 0;
this.dnsLogWindowCount = 0;
}
await this.opsServer.stop();
@@ -1312,11 +1314,14 @@ export class DcRouter {
}
// Adaptive logging: individual logs up to 2/sec, then batch
const now = Date.now();
this.dnsLogWindow = this.dnsLogWindow.filter(t => now - t < 1000);
const nowSec = Math.floor(Date.now() / 1000);
if (nowSec !== this.dnsLogWindowSecond) {
this.dnsLogWindowSecond = nowSec;
this.dnsLogWindowCount = 0;
}
if (this.dnsLogWindow.length < 2) {
this.dnsLogWindow.push(now);
if (this.dnsLogWindowCount < 2) {
this.dnsLogWindowCount++;
const summary = event.questions.map(q => `${q.type} ${q.name}`).join(', ');
logger.log('info', `DNS query: ${summary} (${event.responseTimeMs}ms, ${event.answered ? 'answered' : 'unanswered'})`, { zone: 'dns' });
} else {

View File

@@ -35,7 +35,9 @@ export class MetricsManager {
queryTypes: {} as Record<string, number>,
topDomains: new Map<string, number>(),
lastResetDate: new Date().toDateString(),
queryTimestamps: [] as number[], // Track query timestamps for rate calculation
// Per-second query count ring buffer (300 entries = 5 minutes)
queryRing: new Int32Array(300),
queryRingLastSecond: 0, // last epoch second that was written
responseTimes: [] as number[], // Track response times in ms
recentQueries: [] as Array<{ timestamp: number; domain: string; type: string; answered: boolean; responseTimeMs: number }>,
};
@@ -95,12 +97,13 @@ export class MetricsManager {
this.dnsMetrics.cacheMisses = 0;
this.dnsMetrics.queryTypes = {};
this.dnsMetrics.topDomains.clear();
this.dnsMetrics.queryTimestamps = [];
this.dnsMetrics.queryRing.fill(0);
this.dnsMetrics.queryRingLastSecond = 0;
this.dnsMetrics.responseTimes = [];
this.dnsMetrics.recentQueries = [];
this.dnsMetrics.lastResetDate = currentDate;
}
if (currentDate !== this.securityMetrics.lastResetDate) {
this.securityMetrics.blockedIPs = 0;
this.securityMetrics.authFailures = 0;
@@ -111,15 +114,6 @@ export class MetricsManager {
this.securityMetrics.lastResetDate = currentDate;
}
// Prune old query timestamps (keep last 5 minutes)
const fiveMinutesAgo = Date.now() - 300000;
const idx = this.dnsMetrics.queryTimestamps.findIndex(ts => ts >= fiveMinutesAgo);
if (idx > 0) {
this.dnsMetrics.queryTimestamps = this.dnsMetrics.queryTimestamps.slice(idx);
} else if (idx === -1) {
this.dnsMetrics.queryTimestamps = [];
}
// Prune old time-series buckets every minute (don't wait for lazy query)
this.pruneOldBuckets();
}, 60000); // Check every minute
@@ -150,16 +144,16 @@ export class MetricsManager {
const smartMetricsData = await this.smartMetrics.getMetrics();
const proxyMetrics = this.dcRouter.smartProxy ? this.dcRouter.smartProxy.getMetrics() : null;
const proxyStats = this.dcRouter.smartProxy ? await this.dcRouter.smartProxy.getStatistics() : null;
const { heapUsed, heapTotal, external, rss } = process.memoryUsage();
return {
uptime: process.uptime(),
startTime: Date.now() - (process.uptime() * 1000),
memoryUsage: {
heapUsed: process.memoryUsage().heapUsed,
heapTotal: process.memoryUsage().heapTotal,
external: process.memoryUsage().external,
rss: process.memoryUsage().rss,
// Add SmartMetrics memory data
heapUsed,
heapTotal,
external,
rss,
maxMemoryMB: this.smartMetrics.maxMemoryMB,
actualUsageBytes: smartMetricsData.memoryUsageBytes,
actualUsagePercentage: smartMetricsData.memoryPercentage,
@@ -228,11 +222,8 @@ export class MetricsManager {
.slice(0, 10)
.map(([domain, count]) => ({ domain, count }));
// Calculate queries per second from recent timestamps
const now = Date.now();
const oneMinuteAgo = now - 60000;
const recentQueries = this.dnsMetrics.queryTimestamps.filter(ts => ts >= oneMinuteAgo);
const queriesPerSecond = recentQueries.length / 60;
// Calculate queries per second from ring buffer (sum last 60 seconds)
const queriesPerSecond = this.getQueryRingSum(60) / 60;
// Calculate average response time
const avgResponseTime = this.dnsMetrics.responseTimes.length > 0
@@ -436,8 +427,8 @@ export class MetricsManager {
this.dnsMetrics.cacheMisses++;
}
// Track query timestamp (pruning moved to resetInterval to avoid O(n) per query)
this.dnsMetrics.queryTimestamps.push(Date.now());
// Increment per-second query counter in ring buffer
this.incrementQueryRing();
// Track response time if provided
if (responseTimeMs) {
@@ -609,7 +600,7 @@ export class MetricsManager {
requestsPerSecond,
requestsTotal,
};
}, 200); // Use 200ms cache for more frequent updates
}, 1000); // 1s cache — matches typical dashboard poll interval
}
// --- Time-series helpers ---
@@ -638,6 +629,63 @@ export class MetricsManager {
bucket.queries++;
}
/**
* Increment the per-second query counter in the ring buffer.
* Zeros any stale slots between the last write and the current second.
*/
private incrementQueryRing(): void {
const currentSecond = Math.floor(Date.now() / 1000);
const ring = this.dnsMetrics.queryRing;
const last = this.dnsMetrics.queryRingLastSecond;
if (last === 0) {
// First call — zero and anchor
ring.fill(0);
this.dnsMetrics.queryRingLastSecond = currentSecond;
ring[currentSecond % ring.length] = 1;
return;
}
const gap = currentSecond - last;
if (gap >= ring.length) {
// Entire ring is stale — clear all
ring.fill(0);
} else if (gap > 0) {
// Zero slots from (last+1) to currentSecond (inclusive)
for (let s = last + 1; s <= currentSecond; s++) {
ring[s % ring.length] = 0;
}
}
this.dnsMetrics.queryRingLastSecond = currentSecond;
ring[currentSecond % ring.length]++;
}
/**
* Sum query counts from the ring buffer for the last N seconds.
*/
private getQueryRingSum(seconds: number): number {
const currentSecond = Math.floor(Date.now() / 1000);
const ring = this.dnsMetrics.queryRing;
const last = this.dnsMetrics.queryRingLastSecond;
if (last === 0) return 0;
// First, zero stale slots so reads are accurate even without writes
const gap = currentSecond - last;
if (gap >= ring.length) return 0; // all data is stale
let sum = 0;
const limit = Math.min(seconds, ring.length);
for (let i = 0; i < limit; i++) {
const sec = currentSecond - i;
if (sec < last - (ring.length - 1)) break; // slot is from older cycle
if (sec > last) continue; // no writes yet for this second
sum += ring[sec % ring.length];
}
return sum;
}
private pruneOldBuckets(): void {
const cutoff = Date.now() - 86400000; // 24h
for (const key of this.emailMinuteBuckets.keys()) {

View File

@@ -162,8 +162,9 @@ export class SecurityLogger {
}
}
// Return most recent events up to limit
// Return most recent events up to limit (slice first to avoid mutating source)
return filteredEvents
.slice()
.sort((a, b) => b.timestamp - a.timestamp)
.slice(0, limit);
}
@@ -249,58 +250,46 @@ export class SecurityLogger {
topIPs: Array<{ ip: string; count: number }>;
topDomains: Array<{ domain: string; count: number }>;
} {
// Filter by time window if provided
let events = this.securityEvents;
if (timeWindow) {
const cutoff = Date.now() - timeWindow;
events = events.filter(e => e.timestamp >= cutoff);
const cutoff = timeWindow ? Date.now() - timeWindow : 0;
// Initialize counters
const byLevel = {} as Record<SecurityLogLevel, number>;
for (const level of Object.values(SecurityLogLevel)) {
byLevel[level] = 0;
}
const byType = {} as Record<SecurityEventType, number>;
for (const type of Object.values(SecurityEventType)) {
byType[type] = 0;
}
// Count by level
const byLevel = Object.values(SecurityLogLevel).reduce((acc, level) => {
acc[level] = events.filter(e => e.level === level).length;
return acc;
}, {} as Record<SecurityLogLevel, number>);
// Count by type
const byType = Object.values(SecurityEventType).reduce((acc, type) => {
acc[type] = events.filter(e => e.type === type).length;
return acc;
}, {} as Record<SecurityEventType, number>);
// Count by IP
const ipCounts = new Map<string, number>();
events.forEach(e => {
const domainCounts = new Map<string, number>();
// Single pass over all events
let total = 0;
for (const e of this.securityEvents) {
if (cutoff && e.timestamp < cutoff) continue;
total++;
byLevel[e.level]++;
byType[e.type]++;
if (e.ipAddress) {
ipCounts.set(e.ipAddress, (ipCounts.get(e.ipAddress) || 0) + 1);
}
});
// Count by domain
const domainCounts = new Map<string, number>();
events.forEach(e => {
if (e.domain) {
domainCounts.set(e.domain, (domainCounts.get(e.domain) || 0) + 1);
}
});
}
// Sort and limit top entries
const topIPs = Array.from(ipCounts.entries())
.map(([ip, count]) => ({ ip, count }))
.sort((a, b) => b.count - a.count)
.slice(0, 10);
const topDomains = Array.from(domainCounts.entries())
.map(([domain, count]) => ({ domain, count }))
.sort((a, b) => b.count - a.count)
.slice(0, 10);
return {
total: events.length,
byLevel,
byType,
topIPs,
topDomains
};
return { total, byLevel, byType, topIPs, topDomains };
}
}