feat(metrics): count api requests auth failures and 5xxs
CI / Type Check & Lint (push) Successful in 6s
CI / Build Test (Current Platform) (push) Successful in 6s
CI / Build All Platforms (push) Successful in 39s

This commit is contained in:
2026-04-21 13:15:34 +00:00
parent 6541b2db1c
commit 3762fc661e
2 changed files with 132 additions and 0 deletions
+49
View File
@@ -31,6 +31,9 @@ export class ApiServer {
private clusterCoordinator: ClusterCoordinator;
private clusterHandler: ClusterHandler;
private startTime: number = 0;
private requestCounts = new Map<string, number>();
private authFailureCounts = new Map<string, number>();
private serverErrorCounts = new Map<string, number>();
constructor(
config: IApiConfig,
@@ -131,18 +134,21 @@ export class ApiServer {
if (path.startsWith('/_cluster')) {
await this.clusterHandler.handle(req, res, path, url);
this.recordRequest(path, res.statusCode);
return;
}
// Health check endpoint (no auth required)
if (path === '/health' || path === '/healthz') {
await this.handleHealthCheck(res);
this.recordRequest(path, res.statusCode);
return;
}
// Metrics endpoint (no auth required)
if (path === '/metrics') {
await this.handleMetrics(res);
this.recordRequest(path, res.statusCode);
return;
}
@@ -156,6 +162,7 @@ export class ApiServer {
// Log request
const duration = Date.now() - startTime;
this.recordRequest(path, res.statusCode);
logger.dim(`${req.method} ${path} - ${res.statusCode} (${duration}ms)`);
}
@@ -268,6 +275,28 @@ export class ApiServer {
metrics.push(`# TYPE modelgrid_gpus_total gauge`);
metrics.push(`modelgrid_gpus_total ${gpus.length}`);
for (const [path, count] of this.requestCounts.entries()) {
metrics.push(`# HELP modelgrid_api_requests_total Total API requests by path`);
metrics.push(`# TYPE modelgrid_api_requests_total counter`);
metrics.push(`modelgrid_api_requests_total{path="${this.escapeMetricLabel(path)}"} ${count}`);
}
for (const [path, count] of this.authFailureCounts.entries()) {
metrics.push(`# HELP modelgrid_api_auth_failures_total Total authentication failures by path`);
metrics.push(`# TYPE modelgrid_api_auth_failures_total counter`);
metrics.push(
`modelgrid_api_auth_failures_total{path="${this.escapeMetricLabel(path)}"} ${count}`,
);
}
for (const [path, count] of this.serverErrorCounts.entries()) {
metrics.push(`# HELP modelgrid_api_server_errors_total Total 5xx responses by path`);
metrics.push(`# TYPE modelgrid_api_server_errors_total counter`);
metrics.push(
`modelgrid_api_server_errors_total{path="${this.escapeMetricLabel(path)}"} ${count}`,
);
}
res.writeHead(200, { 'Content-Type': 'text/plain; charset=utf-8' });
res.end(metrics.join('\n') + '\n');
} catch (error) {
@@ -310,4 +339,24 @@ export class ApiServer {
uptime: this.startTime ? Math.floor((Date.now() - this.startTime) / 1000) : 0,
};
}
private recordRequest(path: string, statusCode: number): void {
this.incrementMetric(this.requestCounts, path);
if (statusCode === 401) {
this.incrementMetric(this.authFailureCounts, path);
}
if (statusCode >= 500) {
this.incrementMetric(this.serverErrorCounts, path);
}
}
private incrementMetric(metric: Map<string, number>, path: string): void {
metric.set(path, (metric.get(path) || 0) + 1);
}
private escapeMetricLabel(value: string): string {
return value.replaceAll('\\', '\\\\').replaceAll('"', '\\"');
}
}