From 9608540792cc80c3e42ba462491452510bf15167 Mon Sep 17 00:00:00 2001 From: Juergen Kunz Date: Tue, 21 Apr 2026 13:30:20 +0000 Subject: [PATCH] feat(api): enforce per-minute request rate limits --- test/api-server_test.ts | 76 +++++++++++++++++++++++++++++++++++++++++ ts/api/server.ts | 42 +++++++++++++++++++++++ 2 files changed, 118 insertions(+) diff --git a/test/api-server_test.ts b/test/api-server_test.ts index 21812e3..55588f1 100644 --- a/test/api-server_test.ts +++ b/test/api-server_test.ts @@ -173,3 +173,79 @@ Deno.test('ApiServer metrics expose 5xx counts for failing endpoints', async () await server.stop(); } }); + +Deno.test('ApiServer enforces api rate limits while exempting health and metrics', async () => { + const port = 19200 + Math.floor(Math.random() * 1000); + const server = new ApiServer( + { + host: '127.0.0.1', + port, + apiKeys: ['valid-key'], + rateLimit: 2, + cors: false, + corsOrigins: [], + }, + { + async getAllStatus() { + return new Map(); + }, + async getAllAvailableModels() { + return new Map(); + }, + } as never, + { + async getAllModels() { + return []; + }, + } as never, + {} as never, + { + getStatus() { + return { + localNode: null, + nodes: [], + models: {}, + desiredDeployments: [], + }; + }, + } as never, + ); + + (server as unknown as { + gpuDetector: { detectGpus: () => Promise }; + }).gpuDetector = { + async detectGpus() { + return []; + }, + }; + + await server.start(); + + try { + const requestHeaders = { + Authorization: 'Bearer valid-key', + }; + + const first = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders }); + assertEquals(first.status, 200); + await first.text(); + + const second = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders }); + assertEquals(second.status, 200); + await second.text(); + + const third = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders }); + assertEquals(third.status, 429); + assertEquals((await third.json()).error.type, 'rate_limit_exceeded'); + + const health = await fetch(`http://127.0.0.1:${port}/health`); + assertEquals(health.status, 200); + await health.text(); + + const metrics = await fetch(`http://127.0.0.1:${port}/metrics`); + assertEquals(metrics.status, 200); + await metrics.text(); + } finally { + await server.stop(); + } +}); diff --git a/ts/api/server.ts b/ts/api/server.ts index bb4f773..d26aecc 100644 --- a/ts/api/server.ts +++ b/ts/api/server.ts @@ -34,6 +34,7 @@ export class ApiServer { private requestCounts = new Map(); private authFailureCounts = new Map(); private serverErrorCounts = new Map(); + private rateLimitBuckets = new Map(); constructor( config: IApiConfig, @@ -152,6 +153,12 @@ export class ApiServer { return; } + if (!this.isRequestWithinRateLimit(req)) { + this.sendError(res, 429, 'Rate limit exceeded', 'rate_limit_exceeded'); + this.recordRequest(path, res.statusCode); + return; + } + // Route request try { await this.router.route(req, res, path); @@ -352,6 +359,41 @@ export class ApiServer { } } + private isRequestWithinRateLimit(req: http.IncomingMessage): boolean { + const configuredLimit = this.config.rateLimit; + if (!configuredLimit || configuredLimit <= 0) { + return true; + } + + const key = this.getRateLimitKey(req); + const now = Date.now(); + const windowMs = 60 * 1000; + const bucket = this.rateLimitBuckets.get(key); + + if (!bucket || now - bucket.windowStart >= windowMs) { + this.rateLimitBuckets.set(key, { count: 1, windowStart: now }); + return true; + } + + if (bucket.count >= configuredLimit) { + return false; + } + + bucket.count += 1; + return true; + } + + private getRateLimitKey(req: http.IncomingMessage): string { + if (typeof req.headers.authorization === 'string') { + const match = req.headers.authorization.match(/^Bearer\s+(.+)$/i); + if (match) { + return `api_key:${match[1]}`; + } + } + + return `ip:${req.socket.remoteAddress || 'unknown'}`; + } + private incrementMetric(metric: Map, path: string): void { metric.set(path, (metric.get(path) || 0) + 1); }