BREAKING CHANGE(smartmetrics): add system-wide metrics collection, Prometheus gauges, and normalized CPU reporting
This commit is contained in:
@@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@push.rocks/smartmetrics',
|
||||
version: '2.0.11',
|
||||
version: '3.0.0',
|
||||
description: 'A package for easy collection and reporting of system and process metrics.'
|
||||
}
|
||||
|
||||
@@ -12,6 +12,12 @@ export class SmartMetrics {
|
||||
private cpuPercentageGauge: plugins.prom.Gauge;
|
||||
private memoryPercentageGauge: plugins.prom.Gauge;
|
||||
private memoryUsageBytesGauge: plugins.prom.Gauge;
|
||||
private systemCpuPercentGauge: plugins.prom.Gauge;
|
||||
private systemMemUsedPercentGauge: plugins.prom.Gauge;
|
||||
private systemMemUsedBytesGauge: plugins.prom.Gauge;
|
||||
private systemLoadAvg1Gauge: plugins.prom.Gauge;
|
||||
private systemLoadAvg5Gauge: plugins.prom.Gauge;
|
||||
private systemLoadAvg15Gauge: plugins.prom.Gauge;
|
||||
|
||||
// HTTP server for Prometheus endpoint
|
||||
private prometheusServer?: plugins.http.Server;
|
||||
@@ -39,6 +45,42 @@ export class SmartMetrics {
|
||||
help: 'Current memory usage in bytes',
|
||||
registers: [this.registry]
|
||||
});
|
||||
|
||||
this.systemCpuPercentGauge = new plugins.prom.Gauge({
|
||||
name: 'smartmetrics_system_cpu_percent',
|
||||
help: 'System-wide CPU usage percentage',
|
||||
registers: [this.registry]
|
||||
});
|
||||
|
||||
this.systemMemUsedPercentGauge = new plugins.prom.Gauge({
|
||||
name: 'smartmetrics_system_memory_used_percent',
|
||||
help: 'System-wide memory usage percentage',
|
||||
registers: [this.registry]
|
||||
});
|
||||
|
||||
this.systemMemUsedBytesGauge = new plugins.prom.Gauge({
|
||||
name: 'smartmetrics_system_memory_used_bytes',
|
||||
help: 'System-wide memory used in bytes',
|
||||
registers: [this.registry]
|
||||
});
|
||||
|
||||
this.systemLoadAvg1Gauge = new plugins.prom.Gauge({
|
||||
name: 'smartmetrics_system_load_avg_1',
|
||||
help: 'System 1-minute load average',
|
||||
registers: [this.registry]
|
||||
});
|
||||
|
||||
this.systemLoadAvg5Gauge = new plugins.prom.Gauge({
|
||||
name: 'smartmetrics_system_load_avg_5',
|
||||
help: 'System 5-minute load average',
|
||||
registers: [this.registry]
|
||||
});
|
||||
|
||||
this.systemLoadAvg15Gauge = new plugins.prom.Gauge({
|
||||
name: 'smartmetrics_system_load_avg_15',
|
||||
help: 'System 15-minute load average',
|
||||
registers: [this.registry]
|
||||
});
|
||||
}
|
||||
|
||||
constructor(loggerArg: plugins.smartlog.Smartlog, sourceNameArg: string) {
|
||||
@@ -115,17 +157,18 @@ export class SmartMetrics {
|
||||
}
|
||||
const stats = await plugins.pidusage.getPidUsage([process.pid, ...pids]);
|
||||
|
||||
// Aggregate normalized CPU (0-100% of total machine) across process tree
|
||||
let cpuPercentage = 0;
|
||||
for (const stat of Object.keys(stats)) {
|
||||
if (!stats[stat as any]) continue;
|
||||
cpuPercentage += stats[stat as any].cpu;
|
||||
for (const stat of Object.values(stats)) {
|
||||
if (!stat) continue;
|
||||
cpuPercentage += stat.cpuNormalizedPercent;
|
||||
}
|
||||
let cpuUsageText = `${Math.round(cpuPercentage * 100) / 100} %`;
|
||||
|
||||
let memoryUsageBytes = 0;
|
||||
for (const stat of Object.keys(stats)) {
|
||||
if (!stats[stat as any]) continue;
|
||||
memoryUsageBytes += stats[stat as any].memory;
|
||||
for (const stat of Object.values(stats)) {
|
||||
if (!stat) continue;
|
||||
memoryUsageBytes += stat.memory;
|
||||
}
|
||||
|
||||
let memoryPercentage =
|
||||
@@ -134,6 +177,9 @@ export class SmartMetrics {
|
||||
memoryUsageBytes
|
||||
)} / ${this.formatBytes(this.maxMemoryMB * 1024 * 1024)}`;
|
||||
|
||||
// Get system-wide metrics
|
||||
const systemUsage = await plugins.sysusage.getSystemUsage();
|
||||
|
||||
// Update Prometheus gauges with current values
|
||||
if (this.cpuPercentageGauge) {
|
||||
this.cpuPercentageGauge.set(cpuPercentage);
|
||||
@@ -144,16 +190,32 @@ export class SmartMetrics {
|
||||
if (this.memoryUsageBytesGauge) {
|
||||
this.memoryUsageBytesGauge.set(memoryUsageBytes);
|
||||
}
|
||||
if (this.systemCpuPercentGauge) {
|
||||
this.systemCpuPercentGauge.set(systemUsage.cpuPercent);
|
||||
}
|
||||
if (this.systemMemUsedPercentGauge) {
|
||||
this.systemMemUsedPercentGauge.set(systemUsage.memUsedPercent);
|
||||
}
|
||||
if (this.systemMemUsedBytesGauge) {
|
||||
this.systemMemUsedBytesGauge.set(systemUsage.memUsedBytes);
|
||||
}
|
||||
if (this.systemLoadAvg1Gauge) {
|
||||
this.systemLoadAvg1Gauge.set(systemUsage.loadAvg1);
|
||||
}
|
||||
if (this.systemLoadAvg5Gauge) {
|
||||
this.systemLoadAvg5Gauge.set(systemUsage.loadAvg5);
|
||||
}
|
||||
if (this.systemLoadAvg15Gauge) {
|
||||
this.systemLoadAvg15Gauge.set(systemUsage.loadAvg15);
|
||||
}
|
||||
|
||||
// Calculate Node.js metrics directly
|
||||
const cpuUsage = process.cpuUsage();
|
||||
const process_cpu_seconds_total = (cpuUsage.user + cpuUsage.system) / 1000000; // Convert from microseconds to seconds
|
||||
const process_cpu_seconds_total = (cpuUsage.user + cpuUsage.system) / 1000000;
|
||||
|
||||
const heapStats = plugins.v8.getHeapStatistics();
|
||||
const nodejs_heap_size_total_bytes = heapStats.total_heap_size;
|
||||
|
||||
// Note: Active handles and requests are internal Node.js metrics that require deprecated APIs
|
||||
// We return 0 here, but the Prometheus default collectors will track the real values
|
||||
const nodejs_active_handles_total = 0;
|
||||
const nodejs_active_requests_total = 0;
|
||||
|
||||
@@ -167,6 +229,14 @@ export class SmartMetrics {
|
||||
memoryPercentage,
|
||||
memoryUsageBytes,
|
||||
memoryUsageText,
|
||||
systemCpuPercent: systemUsage.cpuPercent,
|
||||
systemMemTotalBytes: systemUsage.memTotalBytes,
|
||||
systemMemAvailableBytes: systemUsage.memAvailableBytes,
|
||||
systemMemUsedBytes: systemUsage.memUsedBytes,
|
||||
systemMemUsedPercent: systemUsage.memUsedPercent,
|
||||
systemLoadAvg1: systemUsage.loadAvg1,
|
||||
systemLoadAvg5: systemUsage.loadAvg5,
|
||||
systemLoadAvg15: systemUsage.loadAvg15,
|
||||
};
|
||||
return returnMetrics;
|
||||
}
|
||||
|
||||
@@ -1,11 +1,21 @@
|
||||
export interface IMetricsSnapshot {
|
||||
// existing process/node fields
|
||||
process_cpu_seconds_total: number;
|
||||
nodejs_active_handles_total: number;
|
||||
nodejs_active_requests_total: number;
|
||||
nodejs_heap_size_total_bytes: number;
|
||||
cpuPercentage: number;
|
||||
cpuPercentage: number; // normalized to 0-100% of total machine
|
||||
cpuUsageText: string;
|
||||
memoryPercentage: number;
|
||||
memoryUsageBytes: number;
|
||||
memoryUsageText: string;
|
||||
// system-wide fields
|
||||
systemCpuPercent: number;
|
||||
systemMemTotalBytes: number;
|
||||
systemMemAvailableBytes: number;
|
||||
systemMemUsedBytes: number;
|
||||
systemMemUsedPercent: number;
|
||||
systemLoadAvg1: number;
|
||||
systemLoadAvg5: number;
|
||||
systemLoadAvg15: number;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
import * as fs from 'fs';
|
||||
import * as os from 'os';
|
||||
import { execSync } from 'child_process';
|
||||
|
||||
// CPU core count (cached at module load)
|
||||
const cpuCoreCount = typeof os.availableParallelism === 'function'
|
||||
? os.availableParallelism()
|
||||
: os.cpus().length;
|
||||
|
||||
// Cached system constants
|
||||
let clkTck: number | null = null;
|
||||
let pageSize: number | null = null;
|
||||
@@ -61,8 +67,10 @@ function hrtimeSeconds(): number {
|
||||
}
|
||||
|
||||
export interface IPidUsageResult {
|
||||
cpu: number;
|
||||
memory: number;
|
||||
cpu: number; // raw per-core CPU% (can exceed 100%)
|
||||
cpuCoreCount: number; // number of CPU cores on the machine
|
||||
cpuNormalizedPercent: number; // cpu / coreCount — 0-100% of total machine
|
||||
memory: number; // RSS in bytes
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -95,12 +103,16 @@ export async function getPidUsage(
|
||||
|
||||
result[pid] = {
|
||||
cpu: cpuPercent,
|
||||
cpuCoreCount,
|
||||
cpuNormalizedPercent: cpuPercent / cpuCoreCount,
|
||||
memory: memoryBytes,
|
||||
};
|
||||
} else {
|
||||
// First call for this PID — no delta available, report 0% cpu
|
||||
result[pid] = {
|
||||
cpu: 0,
|
||||
cpuCoreCount,
|
||||
cpuNormalizedPercent: 0,
|
||||
memory: memoryBytes,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -16,5 +16,6 @@ export { smartdelay, smartlog };
|
||||
import * as pidtree from './smartmetrics.pidtree.js';
|
||||
import * as pidusage from './smartmetrics.pidusage.js';
|
||||
import * as prom from './smartmetrics.prom.js';
|
||||
import * as sysusage from './smartmetrics.sysusage.js';
|
||||
|
||||
export { pidtree, pidusage, prom };
|
||||
export { pidtree, pidusage, prom, sysusage };
|
||||
|
||||
94
ts/smartmetrics.sysusage.ts
Normal file
94
ts/smartmetrics.sysusage.ts
Normal file
@@ -0,0 +1,94 @@
|
||||
import * as fs from 'fs';
|
||||
import * as os from 'os';
|
||||
|
||||
export interface ISystemUsageResult {
|
||||
cpuPercent: number; // 0-100% system-wide CPU utilization
|
||||
memTotalBytes: number; // total physical RAM
|
||||
memAvailableBytes: number; // available memory
|
||||
memUsedBytes: number; // memTotal - memAvailable
|
||||
memUsedPercent: number; // 0-100%
|
||||
loadAvg1: number; // 1-min load average
|
||||
loadAvg5: number; // 5-min load average
|
||||
loadAvg15: number; // 15-min load average
|
||||
}
|
||||
|
||||
// History for system CPU delta tracking
|
||||
interface ICpuSnapshot {
|
||||
idle: number;
|
||||
total: number;
|
||||
}
|
||||
|
||||
let prevCpuSnapshot: ICpuSnapshot | null = null;
|
||||
|
||||
function readProcStat(): ICpuSnapshot | null {
|
||||
try {
|
||||
const content = fs.readFileSync('/proc/stat', 'utf8');
|
||||
const firstLine = content.split('\n')[0]; // "cpu user nice system idle iowait irq softirq steal ..."
|
||||
const parts = firstLine.split(/\s+/).slice(1).map(Number);
|
||||
// parts: [user, nice, system, idle, iowait, irq, softirq, steal, ...]
|
||||
const idle = parts[3] + (parts[4] || 0); // idle + iowait
|
||||
const total = parts.reduce((sum, v) => sum + v, 0);
|
||||
return { idle, total };
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function getMemoryInfo(): { totalBytes: number; availableBytes: number } {
|
||||
try {
|
||||
const content = fs.readFileSync('/proc/meminfo', 'utf8');
|
||||
let memTotal = 0;
|
||||
let memAvailable = 0;
|
||||
for (const line of content.split('\n')) {
|
||||
if (line.startsWith('MemTotal:')) {
|
||||
memTotal = parseInt(line.split(/\s+/)[1], 10) * 1024; // kB to bytes
|
||||
} else if (line.startsWith('MemAvailable:')) {
|
||||
memAvailable = parseInt(line.split(/\s+/)[1], 10) * 1024;
|
||||
}
|
||||
}
|
||||
if (memTotal > 0 && memAvailable > 0) {
|
||||
return { totalBytes: memTotal, availableBytes: memAvailable };
|
||||
}
|
||||
} catch {
|
||||
// fall through to os fallback
|
||||
}
|
||||
// Fallback using os module
|
||||
const totalBytes = os.totalmem();
|
||||
const availableBytes = os.freemem();
|
||||
return { totalBytes, availableBytes };
|
||||
}
|
||||
|
||||
export async function getSystemUsage(): Promise<ISystemUsageResult> {
|
||||
// CPU
|
||||
let cpuPercent = 0;
|
||||
const currentSnapshot = readProcStat();
|
||||
if (currentSnapshot && prevCpuSnapshot) {
|
||||
const totalDelta = currentSnapshot.total - prevCpuSnapshot.total;
|
||||
const idleDelta = currentSnapshot.idle - prevCpuSnapshot.idle;
|
||||
if (totalDelta > 0) {
|
||||
cpuPercent = ((totalDelta - idleDelta) / totalDelta) * 100;
|
||||
}
|
||||
}
|
||||
if (currentSnapshot) {
|
||||
prevCpuSnapshot = currentSnapshot;
|
||||
}
|
||||
|
||||
// Memory
|
||||
const mem = getMemoryInfo();
|
||||
const memUsedBytes = mem.totalBytes - mem.availableBytes;
|
||||
const memUsedPercent = mem.totalBytes > 0 ? (memUsedBytes / mem.totalBytes) * 100 : 0;
|
||||
|
||||
// Load averages
|
||||
const [loadAvg1, loadAvg5, loadAvg15] = os.loadavg();
|
||||
|
||||
return {
|
||||
cpuPercent,
|
||||
memTotalBytes: mem.totalBytes,
|
||||
memAvailableBytes: mem.availableBytes,
|
||||
memUsedBytes,
|
||||
memUsedPercent,
|
||||
loadAvg1,
|
||||
loadAvg5,
|
||||
loadAvg15,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user