feat: Implement Prometheus metrics exposure in SmartMetrics

- Added Prometheus gauges for CPU and memory metrics. - Implemented HTTP server to expose metrics at /metrics endpoint. - Created methods to enable and disable the Prometheus endpoint. - Updated getMetrics() to set gauge values. - Added tests for Prometheus metrics functionality. - Updated documentation plan for Prometheus integration.
2025-06-09 10:31:25 +00:00
parent 9413e0323a
commit 34b09ed7a7
8 changed files with 7836 additions and 3777 deletions
--- a/.gitea/workflows/default_tags.yaml
+++ b/.gitea/workflows/default_tags.yaml
@@ -119,6 +119,6 @@ jobs:
        run: |
          npmci node install stable
          npmci npm install
-          pnpm install -g @gitzone/tsdoc
+          pnpm install -g @git.zone/tsdoc
          npmci command tsdoc
        continue-on-error: true
--- a/package.json
+++ b/package.json
@@ -8,17 +8,16 @@
  "author": "Lossless GmbH",
  "license": "MIT",
  "scripts": {
-    "test": "(tstest test/ --web)",
+    "test": "(tstest test/ --verbose)",
    "build": "(tsbuild --web)",
    "buildDocs": "tsdoc"
  },
  "devDependencies": {
-    "@gitzone/tsbuild": "^2.1.66",
-    "@gitzone/tsbundle": "^2.0.8",
-    "@gitzone/tsrun": "^1.2.44",
-    "@gitzone/tstest": "^1.0.77",
-    "@push.rocks/tapbundle": "^5.0.12",
-    "@types/node": "^20.4.8"
+    "@git.zone/tsbuild": "^2.6.4",
+    "@git.zone/tsbundle": "^2.0.8",
+    "@git.zone/tsrun": "^1.2.44",
+    "@git.zone/tstest": "^2.3.1",
+    "@types/node": "^22.15.30"
  },
  "browserslist": [
    "last 1 chrome versions"
@@ -40,8 +39,8 @@
    "@push.rocks/smartlog": "^3.0.2",
    "@types/pidusage": "^2.0.2",
    "pidtree": "^0.6.0",
-    "pidusage": "^3.0.2",
-    "prom-client": "^14.2.0"
+    "pidusage": "^4.0.1",
+    "prom-client": "^15.1.3"
  },
  "type": "module",
  "keywords": [
@@ -64,5 +63,6 @@
  "repository": {
    "type": "git",
    "url": "https://code.foss.global/push.rocks/smartmetrics.git"
-  }
-}
+  },
+  "packageManager": "pnpm@10.11.0+sha512.6540583f41cc5f628eb3d9773ecee802f4f9ef9923cc45b69890fb47991d4b092964694ec3a4f738a420c918a333062c8b925d312f42e4f0c263eb603551f977"
+}
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/readme.plan.md
+++ b/readme.plan.md
@@ -0,0 +1,81 @@
+# Prometheus Metrics Implementation Plan
+
+`cat /home/philkunz/.claude/CLAUDE.md`
+
+## Overview
+Add Prometheus metrics exposure functionality to SmartMetrics while maintaining backward compatibility with existing `getMetrics()` method.
+
+## Implementation Tasks
+
+### 1. Add HTTP Server Dependencies
+- [x] Check if we need to add any HTTP server dependency to package.json
+- [x] Import necessary modules in smartmetrics.plugins.ts
+
+### 2. Create Prometheus Gauges in SmartMetrics Class
+- [x] Add private properties for custom gauges:
+  - [x] `private cpuPercentageGauge: plugins.promClient.Gauge<string>`
+  - [x] `private memoryPercentageGauge: plugins.promClient.Gauge<string>`
+  - [x] `private memoryUsageBytesGauge: plugins.promClient.Gauge<string>`
+- [x] Initialize gauges in `setup()` method with appropriate names and help text:
+  - [x] `smartmetrics_cpu_percentage` - "Current CPU usage percentage"
+  - [x] `smartmetrics_memory_percentage` - "Current memory usage percentage"
+  - [x] `smartmetrics_memory_usage_bytes` - "Current memory usage in bytes"
+
+### 3. Update getMetrics() Method
+- [x] After calculating metrics, update the Prometheus gauges:
+  - [x] `this.cpuPercentageGauge.set(cpuPercentage)`
+  - [x] `this.memoryPercentageGauge.set(memoryPercentage)`
+  - [x] `this.memoryUsageBytesGauge.set(memoryUsageBytes)`
+- [x] Ensure gauges are only updated if they exist (defensive programming)
+
+### 4. Add getPrometheusFormattedMetrics() Method
+- [x] Create new public async method `getPrometheusFormattedMetrics(): Promise<string>`
+- [x] Call `this.getMetrics()` to ensure gauges are updated with latest values
+- [x] Return `await this.registry.metrics()` to get Prometheus text format
+
+### 5. Add HTTP Server Properties
+- [x] Add private property for HTTP server: `private prometheusServer?: any`
+- [x] Add private property for server port: `private prometheusPort?: number`
+
+### 6. Implement enablePrometheusEndpoint() Method
+- [x] Create new public method `enablePrometheusEndpoint(port: number = 9090): void`
+- [x] Check if server is already running, if so, log warning and return
+- [x] Create minimal HTTP server using Node.js built-in `http` module:
+  - [x] Listen on specified port
+  - [x] Handle GET requests to `/metrics` endpoint
+  - [x] Return Prometheus-formatted metrics with correct Content-Type header
+  - [x] Handle other routes with 404
+- [x] Store server reference and port for later cleanup
+- [x] Log info message about endpoint availability
+
+### 7. Add disablePrometheusEndpoint() Method
+- [x] Create new public method `disablePrometheusEndpoint(): void`
+- [x] Check if server exists, if not, return
+- [x] Close the HTTP server
+- [x] Clear server reference and port
+- [x] Log info message about endpoint shutdown
+
+### 8. Update stop() Method
+- [x] Call `disablePrometheusEndpoint()` to ensure clean shutdown
+
+### 9. Add Tests
+- [x] Add test for `getPrometheusFormattedMetrics()`:
+  - [x] Verify it returns a string
+  - [x] Verify it contains expected metric names
+  - [x] Verify format matches Prometheus text exposition format
+- [x] Add test for `enablePrometheusEndpoint()`:
+  - [x] Start endpoint on test port (e.g., 19090)
+  - [x] Make HTTP request to `/metrics`
+  - [x] Verify response has correct Content-Type
+  - [x] Verify response contains metrics
+  - [x] Clean up by calling `disablePrometheusEndpoint()`
+
+### 10. Update Documentation
+- [ ] Add usage example in readme.md for Prometheus integration
+- [ ] Document the new methods in code comments
+
+## Notes
+- Using Node.js built-in `http` module to avoid adding unnecessary dependencies
+- Default port 9090 is commonly used for metrics endpoints
+- Maintaining backward compatibility - existing functionality unchanged
+- Prometheus text format example: `metric_name{label="value"} 123.45`
--- a/test/test.ts
+++ b/test/test.ts
@@ -1,6 +1,6 @@
 import * as plugins from './plugins.js';

-import { expect, tap } from '@push.rocks/tapbundle';
+import { expect, tap } from '@git.zone/tstest/tapbundle';
 import * as smartmetrics from '../ts/index.js';

 let testSmartMetrics: smartmetrics.SmartMetrics;
@@ -24,4 +24,55 @@ tap.test('should produce valid metrics', async (tools) => {
  console.log(await testSmartMetrics.getMetrics());
 });

+tap.test('should return Prometheus formatted metrics', async () => {
+  const prometheusMetrics = await testSmartMetrics.getPrometheusFormattedMetrics();
+  expect(prometheusMetrics).toBeTypeofString();
+  expect(prometheusMetrics).toContain('smartmetrics_cpu_percentage');
+  expect(prometheusMetrics).toContain('smartmetrics_memory_percentage');
+  expect(prometheusMetrics).toContain('smartmetrics_memory_usage_bytes');
+  expect(prometheusMetrics).toContain('# HELP');
+  expect(prometheusMetrics).toContain('# TYPE');
+});
+
+tap.test('should enable Prometheus endpoint', async (tools) => {
+  const testPort = 19090;
+  testSmartMetrics.enablePrometheusEndpoint(testPort);
+  
+  // Give the server time to start
+  await tools.delayFor(1000);
+  
+  // Test the endpoint
+  const response = await fetch(`http://localhost:${testPort}/metrics`);
+  expect(response.status).toEqual(200);
+  expect(response.headers.get('content-type')).toEqual('text/plain; version=0.0.4');
+  
+  const metricsText = await response.text();
+  expect(metricsText).toContain('smartmetrics_cpu_percentage');
+  expect(metricsText).toContain('smartmetrics_memory_percentage');
+  expect(metricsText).toContain('smartmetrics_memory_usage_bytes');
+});
+
+tap.test('should handle 404 for non-metrics endpoints', async () => {
+  const response = await fetch('http://localhost:19090/notfound');
+  expect(response.status).toEqual(404);
+  const text = await response.text();
+  expect(text).toEqual('Not Found');
+});
+
+tap.test('should disable Prometheus endpoint', async () => {
+  testSmartMetrics.disablePrometheusEndpoint();
+  
+  // Give the server time to shut down
+  await new Promise(resolve => setTimeout(resolve, 1000));
+  
+  // Verify the endpoint is no longer accessible
+  try {
+    await fetch('http://localhost:19090/metrics');
+    throw new Error('Should have failed to connect');
+  } catch (error) {
+    // Expected to fail
+    expect(error.message).toContain('fetch failed');
+  }
+});
+
 tap.start();
--- a/ts/smartmetrics.classes.smartmetrics.ts
+++ b/ts/smartmetrics.classes.smartmetrics.ts
@@ -7,11 +7,39 @@ export class SmartMetrics {
  public logger: plugins.smartlog.Smartlog;
  public registry: plugins.promClient.Registry;
  public maxMemoryMB: number;
+  
+  // Prometheus gauges for custom metrics
+  private cpuPercentageGauge: plugins.promClient.Gauge<string>;
+  private memoryPercentageGauge: plugins.promClient.Gauge<string>;
+  private memoryUsageBytesGauge: plugins.promClient.Gauge<string>;
+  
+  // HTTP server for Prometheus endpoint
+  private prometheusServer?: plugins.http.Server;
+  private prometheusPort?: number;

  public async setup() {
    const collectDefaultMetrics = plugins.promClient.collectDefaultMetrics;
    this.registry = new plugins.promClient.Registry();
    collectDefaultMetrics({ register: this.registry });
+    
+    // Initialize custom gauges
+    this.cpuPercentageGauge = new plugins.promClient.Gauge({
+      name: 'smartmetrics_cpu_percentage',
+      help: 'Current CPU usage percentage',
+      registers: [this.registry]
+    });
+    
+    this.memoryPercentageGauge = new plugins.promClient.Gauge({
+      name: 'smartmetrics_memory_percentage',
+      help: 'Current memory usage percentage',
+      registers: [this.registry]
+    });
+    
+    this.memoryUsageBytesGauge = new plugins.promClient.Gauge({
+      name: 'smartmetrics_memory_usage_bytes',
+      help: 'Current memory usage in bytes',
+      registers: [this.registry]
+    });
  }

  constructor(loggerArg: plugins.smartlog.Smartlog, sourceNameArg: string) {
@@ -100,6 +128,17 @@ export class SmartMetrics {
    )} / ${this.formatBytes(this.maxMemoryMB * 1024 * 1024)}`;

    console.log(`${cpuUsageText} ||| ${memoryUsageText} `);
+    
+    // Update Prometheus gauges with current values
+    if (this.cpuPercentageGauge) {
+      this.cpuPercentageGauge.set(cpuPercentage);
+    }
+    if (this.memoryPercentageGauge) {
+      this.memoryPercentageGauge.set(memoryPercentage);
+    }
+    if (this.memoryUsageBytesGauge) {
+      this.memoryUsageBytesGauge.set(memoryUsageBytes);
+    }

    const returnMetrics: interfaces.IMetricsSnapshot = {
      process_cpu_seconds_total: (
@@ -127,7 +166,58 @@ export class SmartMetrics {
    return returnMetrics;
  }

+  public async getPrometheusFormattedMetrics(): Promise<string> {
+    // Update metrics to ensure gauges have latest values
+    await this.getMetrics();
+    
+    // Return Prometheus text exposition format
+    return await this.registry.metrics();
+  }
+
+  public enablePrometheusEndpoint(port: number = 9090): void {
+    if (this.prometheusServer) {
+      this.logger.log('warn', 'Prometheus endpoint is already running');
+      return;
+    }
+    
+    this.prometheusServer = plugins.http.createServer(async (req, res) => {
+      if (req.url === '/metrics' && req.method === 'GET') {
+        try {
+          const metrics = await this.getPrometheusFormattedMetrics();
+          res.writeHead(200, { 'Content-Type': 'text/plain; version=0.0.4' });
+          res.end(metrics);
+        } catch (error) {
+          res.writeHead(500, { 'Content-Type': 'text/plain' });
+          res.end('Error generating metrics');
+          this.logger.log('error', 'Error generating Prometheus metrics', error);
+        }
+      } else {
+        res.writeHead(404, { 'Content-Type': 'text/plain' });
+        res.end('Not Found');
+      }
+    });
+    
+    this.prometheusPort = port;
+    this.prometheusServer.listen(port, () => {
+      this.logger.log('info', `Prometheus metrics endpoint available at http://localhost:${port}/metrics`);
+    });
+  }
+
+  public disablePrometheusEndpoint(): void {
+    if (!this.prometheusServer) {
+      return;
+    }
+    
+    this.prometheusServer.close(() => {
+      this.logger.log('info', `Prometheus metrics endpoint on port ${this.prometheusPort} has been shut down`);
+    });
+    
+    this.prometheusServer = undefined;
+    this.prometheusPort = undefined;
+  }
+
  public stop() {
    this.started = false;
+    this.disablePrometheusEndpoint();
  }
 }
--- a/ts/smartmetrics.interfaces.ts
+++ b/ts/smartmetrics.interfaces.ts
@@ -1,4 +1,3 @@
-// this might be extracted into a package @pushrocks/smartmetrics-interfaces in the future
 export interface IMetricsSnapshot {
  process_cpu_seconds_total: number;
  nodejs_active_handles_total: number;
--- a/ts/smartmetrics.plugins.ts
+++ b/ts/smartmetrics.plugins.ts
@@ -2,8 +2,9 @@
 import * as v8 from 'v8';
 import * as os from 'os';
 import * as fs from 'fs';
+import * as http from 'http';

-export { v8, os, fs };
+export { v8, os, fs, http };

 // pushrocks scope
 import * as smartdelay from '@push.rocks/smartdelay';