2022-07-27 12:00:38 +02:00
|
|
|
import * as plugins from './smartmetrics.plugins.js';
|
|
|
|
|
import * as interfaces from './smartmetrics.interfaces.js';
|
2021-08-12 23:19:39 +02:00
|
|
|
|
|
|
|
|
export class SmartMetrics {
|
|
|
|
|
public started = false;
|
|
|
|
|
public sourceNameArg: string;
|
|
|
|
|
public logger: plugins.smartlog.Smartlog;
|
2026-02-19 09:51:34 +00:00
|
|
|
public registry: plugins.prom.Registry;
|
2023-07-02 22:17:27 +02:00
|
|
|
public maxMemoryMB: number;
|
2026-02-19 09:51:34 +00:00
|
|
|
|
2025-06-09 10:31:25 +00:00
|
|
|
// Prometheus gauges for custom metrics
|
2026-02-19 09:51:34 +00:00
|
|
|
private cpuPercentageGauge: plugins.prom.Gauge;
|
|
|
|
|
private memoryPercentageGauge: plugins.prom.Gauge;
|
|
|
|
|
private memoryUsageBytesGauge: plugins.prom.Gauge;
|
2026-02-19 10:10:58 +00:00
|
|
|
private systemCpuPercentGauge: plugins.prom.Gauge;
|
|
|
|
|
private systemMemUsedPercentGauge: plugins.prom.Gauge;
|
|
|
|
|
private systemMemUsedBytesGauge: plugins.prom.Gauge;
|
|
|
|
|
private systemLoadAvg1Gauge: plugins.prom.Gauge;
|
|
|
|
|
private systemLoadAvg5Gauge: plugins.prom.Gauge;
|
|
|
|
|
private systemLoadAvg15Gauge: plugins.prom.Gauge;
|
2026-02-19 09:51:34 +00:00
|
|
|
|
2025-06-09 10:31:25 +00:00
|
|
|
// HTTP server for Prometheus endpoint
|
|
|
|
|
private prometheusServer?: plugins.http.Server;
|
|
|
|
|
private prometheusPort?: number;
|
2021-08-12 23:19:39 +02:00
|
|
|
|
2026-02-19 09:46:46 +00:00
|
|
|
public setup() {
|
2026-02-19 09:51:34 +00:00
|
|
|
this.registry = new plugins.prom.Registry();
|
|
|
|
|
plugins.prom.collectDefaultMetrics(this.registry);
|
|
|
|
|
|
2025-06-09 10:31:25 +00:00
|
|
|
// Initialize custom gauges
|
2026-02-19 09:51:34 +00:00
|
|
|
this.cpuPercentageGauge = new plugins.prom.Gauge({
|
2025-06-09 10:31:25 +00:00
|
|
|
name: 'smartmetrics_cpu_percentage',
|
|
|
|
|
help: 'Current CPU usage percentage',
|
|
|
|
|
registers: [this.registry]
|
|
|
|
|
});
|
2026-02-19 09:51:34 +00:00
|
|
|
|
|
|
|
|
this.memoryPercentageGauge = new plugins.prom.Gauge({
|
2025-06-09 10:31:25 +00:00
|
|
|
name: 'smartmetrics_memory_percentage',
|
|
|
|
|
help: 'Current memory usage percentage',
|
|
|
|
|
registers: [this.registry]
|
|
|
|
|
});
|
2026-02-19 09:51:34 +00:00
|
|
|
|
|
|
|
|
this.memoryUsageBytesGauge = new plugins.prom.Gauge({
|
2025-06-09 10:31:25 +00:00
|
|
|
name: 'smartmetrics_memory_usage_bytes',
|
|
|
|
|
help: 'Current memory usage in bytes',
|
|
|
|
|
registers: [this.registry]
|
|
|
|
|
});
|
2026-02-19 10:10:58 +00:00
|
|
|
|
|
|
|
|
this.systemCpuPercentGauge = new plugins.prom.Gauge({
|
|
|
|
|
name: 'smartmetrics_system_cpu_percent',
|
|
|
|
|
help: 'System-wide CPU usage percentage',
|
|
|
|
|
registers: [this.registry]
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
this.systemMemUsedPercentGauge = new plugins.prom.Gauge({
|
|
|
|
|
name: 'smartmetrics_system_memory_used_percent',
|
|
|
|
|
help: 'System-wide memory usage percentage',
|
|
|
|
|
registers: [this.registry]
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
this.systemMemUsedBytesGauge = new plugins.prom.Gauge({
|
|
|
|
|
name: 'smartmetrics_system_memory_used_bytes',
|
|
|
|
|
help: 'System-wide memory used in bytes',
|
|
|
|
|
registers: [this.registry]
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
this.systemLoadAvg1Gauge = new plugins.prom.Gauge({
|
|
|
|
|
name: 'smartmetrics_system_load_avg_1',
|
|
|
|
|
help: 'System 1-minute load average',
|
|
|
|
|
registers: [this.registry]
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
this.systemLoadAvg5Gauge = new plugins.prom.Gauge({
|
|
|
|
|
name: 'smartmetrics_system_load_avg_5',
|
|
|
|
|
help: 'System 5-minute load average',
|
|
|
|
|
registers: [this.registry]
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
this.systemLoadAvg15Gauge = new plugins.prom.Gauge({
|
|
|
|
|
name: 'smartmetrics_system_load_avg_15',
|
|
|
|
|
help: 'System 15-minute load average',
|
|
|
|
|
registers: [this.registry]
|
|
|
|
|
});
|
2021-08-12 23:19:39 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
constructor(loggerArg: plugins.smartlog.Smartlog, sourceNameArg: string) {
|
|
|
|
|
this.logger = loggerArg;
|
|
|
|
|
this.sourceNameArg = sourceNameArg;
|
|
|
|
|
this.setup();
|
2023-07-02 22:17:27 +02:00
|
|
|
this.checkMemoryLimits();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private checkMemoryLimits() {
|
2026-02-19 09:46:46 +00:00
|
|
|
const heapStats = plugins.v8.getHeapStatistics();
|
|
|
|
|
const maxHeapSizeMB = heapStats.heap_size_limit / 1024 / 1024;
|
|
|
|
|
const totalSystemMemoryMB = plugins.os.totalmem() / 1024 / 1024;
|
2023-07-02 22:17:27 +02:00
|
|
|
|
2023-07-03 11:08:27 +02:00
|
|
|
let dockerMemoryLimitMB = totalSystemMemoryMB;
|
2026-02-19 09:46:46 +00:00
|
|
|
|
|
|
|
|
// Try cgroup v2 first, then fall back to cgroup v1
|
2023-07-02 22:17:27 +02:00
|
|
|
try {
|
2026-02-19 09:46:46 +00:00
|
|
|
const cgroupV2 = plugins.fs.readFileSync('/sys/fs/cgroup/memory.max', 'utf8').trim();
|
|
|
|
|
if (cgroupV2 !== 'max') {
|
|
|
|
|
dockerMemoryLimitMB = parseInt(cgroupV2, 10) / 1024 / 1024;
|
|
|
|
|
}
|
|
|
|
|
} catch {
|
|
|
|
|
try {
|
|
|
|
|
const cgroupV1 = plugins.fs.readFileSync(
|
|
|
|
|
'/sys/fs/cgroup/memory/memory.limit_in_bytes',
|
|
|
|
|
'utf8'
|
|
|
|
|
).trim();
|
|
|
|
|
dockerMemoryLimitMB = parseInt(cgroupV1, 10) / 1024 / 1024;
|
|
|
|
|
} catch {
|
|
|
|
|
// Not running in a container — use system memory
|
|
|
|
|
}
|
2023-07-02 22:17:27 +02:00
|
|
|
}
|
|
|
|
|
|
2026-02-19 09:46:46 +00:00
|
|
|
// Pick the most restrictive limit
|
2023-07-02 22:23:46 +02:00
|
|
|
this.maxMemoryMB = Math.min(totalSystemMemoryMB, dockerMemoryLimitMB, maxHeapSizeMB);
|
2021-08-12 23:19:39 +02:00
|
|
|
}
|
|
|
|
|
|
2021-08-14 17:01:54 +02:00
|
|
|
public start() {
|
|
|
|
|
const unattendedStart = async () => {
|
|
|
|
|
if (this.started) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
this.started = true;
|
|
|
|
|
while (this.started) {
|
|
|
|
|
this.logger.log('info', `sending heartbeat for ${this.sourceNameArg} with metrics`, {
|
|
|
|
|
eventType: 'heartbeat',
|
|
|
|
|
metrics: await this.getMetrics(),
|
|
|
|
|
});
|
2021-08-19 19:22:29 +02:00
|
|
|
await plugins.smartdelay.delayFor(20000, null, true);
|
2021-08-14 17:01:54 +02:00
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
unattendedStart();
|
2021-08-12 23:19:39 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public formatBytes(bytes: number, decimals = 2) {
|
|
|
|
|
if (bytes === 0) return '0 Bytes';
|
|
|
|
|
|
|
|
|
|
const k = 1024;
|
|
|
|
|
const dm = decimals < 0 ? 0 : decimals;
|
|
|
|
|
const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'];
|
|
|
|
|
|
|
|
|
|
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
|
|
|
|
|
|
|
|
return parseFloat((bytes / Math.pow(k, i)).toFixed(dm)) + ' ' + sizes[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public async getMetrics() {
|
2026-02-19 09:46:46 +00:00
|
|
|
let pids: number[] = [];
|
|
|
|
|
try {
|
2026-02-19 09:51:34 +00:00
|
|
|
pids = await plugins.pidtree.getChildPids(process.pid);
|
2026-02-19 09:46:46 +00:00
|
|
|
} catch {
|
|
|
|
|
// pidtree can fail if process tree cannot be read
|
|
|
|
|
}
|
2026-02-19 09:51:34 +00:00
|
|
|
const stats = await plugins.pidusage.getPidUsage([process.pid, ...pids]);
|
2021-08-14 17:01:54 +02:00
|
|
|
|
2026-02-19 10:10:58 +00:00
|
|
|
// Aggregate normalized CPU (0-100% of total machine) across process tree
|
2021-08-14 17:01:54 +02:00
|
|
|
let cpuPercentage = 0;
|
2026-02-19 10:10:58 +00:00
|
|
|
for (const stat of Object.values(stats)) {
|
|
|
|
|
if (!stat) continue;
|
|
|
|
|
cpuPercentage += stat.cpuNormalizedPercent;
|
2021-08-14 17:01:54 +02:00
|
|
|
}
|
2021-08-14 22:47:31 +02:00
|
|
|
let cpuUsageText = `${Math.round(cpuPercentage * 100) / 100} %`;
|
2021-08-14 17:01:54 +02:00
|
|
|
|
|
|
|
|
let memoryUsageBytes = 0;
|
2026-02-19 10:10:58 +00:00
|
|
|
for (const stat of Object.values(stats)) {
|
|
|
|
|
if (!stat) continue;
|
|
|
|
|
memoryUsageBytes += stat.memory;
|
2021-08-14 17:01:54 +02:00
|
|
|
}
|
2023-07-02 22:17:27 +02:00
|
|
|
|
2023-07-03 11:08:27 +02:00
|
|
|
let memoryPercentage =
|
|
|
|
|
Math.round((memoryUsageBytes / (this.maxMemoryMB * 1024 * 1024)) * 100 * 100) / 100;
|
|
|
|
|
let memoryUsageText = `${memoryPercentage}% | ${this.formatBytes(
|
|
|
|
|
memoryUsageBytes
|
|
|
|
|
)} / ${this.formatBytes(this.maxMemoryMB * 1024 * 1024)}`;
|
2021-08-14 17:01:54 +02:00
|
|
|
|
2026-02-19 10:10:58 +00:00
|
|
|
// Get system-wide metrics
|
|
|
|
|
const systemUsage = await plugins.sysusage.getSystemUsage();
|
|
|
|
|
|
2025-06-09 10:31:25 +00:00
|
|
|
// Update Prometheus gauges with current values
|
|
|
|
|
if (this.cpuPercentageGauge) {
|
|
|
|
|
this.cpuPercentageGauge.set(cpuPercentage);
|
|
|
|
|
}
|
|
|
|
|
if (this.memoryPercentageGauge) {
|
|
|
|
|
this.memoryPercentageGauge.set(memoryPercentage);
|
|
|
|
|
}
|
|
|
|
|
if (this.memoryUsageBytesGauge) {
|
|
|
|
|
this.memoryUsageBytesGauge.set(memoryUsageBytes);
|
|
|
|
|
}
|
2026-02-19 10:10:58 +00:00
|
|
|
if (this.systemCpuPercentGauge) {
|
|
|
|
|
this.systemCpuPercentGauge.set(systemUsage.cpuPercent);
|
|
|
|
|
}
|
|
|
|
|
if (this.systemMemUsedPercentGauge) {
|
|
|
|
|
this.systemMemUsedPercentGauge.set(systemUsage.memUsedPercent);
|
|
|
|
|
}
|
|
|
|
|
if (this.systemMemUsedBytesGauge) {
|
|
|
|
|
this.systemMemUsedBytesGauge.set(systemUsage.memUsedBytes);
|
|
|
|
|
}
|
|
|
|
|
if (this.systemLoadAvg1Gauge) {
|
|
|
|
|
this.systemLoadAvg1Gauge.set(systemUsage.loadAvg1);
|
|
|
|
|
}
|
|
|
|
|
if (this.systemLoadAvg5Gauge) {
|
|
|
|
|
this.systemLoadAvg5Gauge.set(systemUsage.loadAvg5);
|
|
|
|
|
}
|
|
|
|
|
if (this.systemLoadAvg15Gauge) {
|
|
|
|
|
this.systemLoadAvg15Gauge.set(systemUsage.loadAvg15);
|
|
|
|
|
}
|
2021-08-14 21:37:52 +02:00
|
|
|
|
2025-06-09 12:07:24 +00:00
|
|
|
// Calculate Node.js metrics directly
|
|
|
|
|
const cpuUsage = process.cpuUsage();
|
2026-02-19 10:10:58 +00:00
|
|
|
const process_cpu_seconds_total = (cpuUsage.user + cpuUsage.system) / 1000000;
|
2026-02-19 09:51:34 +00:00
|
|
|
|
2025-06-09 12:07:24 +00:00
|
|
|
const heapStats = plugins.v8.getHeapStatistics();
|
|
|
|
|
const nodejs_heap_size_total_bytes = heapStats.total_heap_size;
|
2026-02-19 09:51:34 +00:00
|
|
|
|
2025-06-09 12:07:24 +00:00
|
|
|
const nodejs_active_handles_total = 0;
|
|
|
|
|
const nodejs_active_requests_total = 0;
|
2026-02-19 09:51:34 +00:00
|
|
|
|
2021-08-12 23:19:39 +02:00
|
|
|
const returnMetrics: interfaces.IMetricsSnapshot = {
|
2025-06-09 12:07:24 +00:00
|
|
|
process_cpu_seconds_total,
|
|
|
|
|
nodejs_active_handles_total,
|
|
|
|
|
nodejs_active_requests_total,
|
|
|
|
|
nodejs_heap_size_total_bytes,
|
2021-08-14 17:01:54 +02:00
|
|
|
cpuPercentage,
|
|
|
|
|
cpuUsageText,
|
|
|
|
|
memoryPercentage,
|
|
|
|
|
memoryUsageBytes,
|
|
|
|
|
memoryUsageText,
|
2026-02-19 10:10:58 +00:00
|
|
|
systemCpuPercent: systemUsage.cpuPercent,
|
|
|
|
|
systemMemTotalBytes: systemUsage.memTotalBytes,
|
|
|
|
|
systemMemAvailableBytes: systemUsage.memAvailableBytes,
|
|
|
|
|
systemMemUsedBytes: systemUsage.memUsedBytes,
|
|
|
|
|
systemMemUsedPercent: systemUsage.memUsedPercent,
|
|
|
|
|
systemLoadAvg1: systemUsage.loadAvg1,
|
|
|
|
|
systemLoadAvg5: systemUsage.loadAvg5,
|
|
|
|
|
systemLoadAvg15: systemUsage.loadAvg15,
|
2021-08-12 23:19:39 +02:00
|
|
|
};
|
|
|
|
|
return returnMetrics;
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-09 10:31:25 +00:00
|
|
|
public async getPrometheusFormattedMetrics(): Promise<string> {
|
|
|
|
|
// Update metrics to ensure gauges have latest values
|
|
|
|
|
await this.getMetrics();
|
2026-02-19 09:51:34 +00:00
|
|
|
|
2025-06-09 10:31:25 +00:00
|
|
|
// Return Prometheus text exposition format
|
|
|
|
|
return await this.registry.metrics();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public enablePrometheusEndpoint(port: number = 9090): void {
|
|
|
|
|
if (this.prometheusServer) {
|
|
|
|
|
this.logger.log('warn', 'Prometheus endpoint is already running');
|
|
|
|
|
return;
|
|
|
|
|
}
|
2026-02-19 09:51:34 +00:00
|
|
|
|
2025-06-09 10:31:25 +00:00
|
|
|
this.prometheusServer = plugins.http.createServer(async (req, res) => {
|
|
|
|
|
if (req.url === '/metrics' && req.method === 'GET') {
|
|
|
|
|
try {
|
|
|
|
|
const metrics = await this.getPrometheusFormattedMetrics();
|
|
|
|
|
res.writeHead(200, { 'Content-Type': 'text/plain; version=0.0.4' });
|
|
|
|
|
res.end(metrics);
|
|
|
|
|
} catch (error) {
|
|
|
|
|
res.writeHead(500, { 'Content-Type': 'text/plain' });
|
|
|
|
|
res.end('Error generating metrics');
|
|
|
|
|
this.logger.log('error', 'Error generating Prometheus metrics', error);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
res.writeHead(404, { 'Content-Type': 'text/plain' });
|
|
|
|
|
res.end('Not Found');
|
|
|
|
|
}
|
|
|
|
|
});
|
2026-02-19 09:51:34 +00:00
|
|
|
|
2025-06-09 10:31:25 +00:00
|
|
|
this.prometheusPort = port;
|
|
|
|
|
this.prometheusServer.listen(port, () => {
|
|
|
|
|
this.logger.log('info', `Prometheus metrics endpoint available at http://localhost:${port}/metrics`);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public disablePrometheusEndpoint(): void {
|
|
|
|
|
if (!this.prometheusServer) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
2026-02-19 09:51:34 +00:00
|
|
|
|
2025-06-09 12:07:24 +00:00
|
|
|
const port = this.prometheusPort;
|
2025-06-09 10:31:25 +00:00
|
|
|
this.prometheusServer.close(() => {
|
2025-06-09 12:07:24 +00:00
|
|
|
this.logger.log('info', `Prometheus metrics endpoint on port ${port} has been shut down`);
|
2025-06-09 10:31:25 +00:00
|
|
|
});
|
2026-02-19 09:51:34 +00:00
|
|
|
|
2025-06-09 10:31:25 +00:00
|
|
|
this.prometheusServer = undefined;
|
|
|
|
|
this.prometheusPort = undefined;
|
|
|
|
|
}
|
|
|
|
|
|
2021-08-14 17:02:40 +02:00
|
|
|
public stop() {
|
|
|
|
|
this.started = false;
|
2025-06-09 10:31:25 +00:00
|
|
|
this.disablePrometheusEndpoint();
|
2021-08-14 17:02:40 +02:00
|
|
|
}
|
2021-08-12 23:19:39 +02:00
|
|
|
}
|