From 9608540792cc80c3e42ba462491452510bf15167 Mon Sep 17 00:00:00 2001
From: Juergen Kunz <juergen@foss.global>
Date: Tue, 21 Apr 2026 13:30:20 +0000
Subject: [PATCH] feat(api): enforce per-minute request rate limits

---
 test/api-server_test.ts | 76 +++++++++++++++++++++++++++++++++++++++++
 ts/api/server.ts        | 42 +++++++++++++++++++++++
 2 files changed, 118 insertions(+)

diff --git a/test/api-server_test.ts b/test/api-server_test.ts
index 21812e3..55588f1 100644
--- a/test/api-server_test.ts
+++ b/test/api-server_test.ts
@@ -173,3 +173,79 @@ Deno.test('ApiServer metrics expose 5xx counts for failing endpoints', async ()
     await server.stop();
   }
 });
+
+Deno.test('ApiServer enforces api rate limits while exempting health and metrics', async () => {
+  const port = 19200 + Math.floor(Math.random() * 1000);
+  const server = new ApiServer(
+    {
+      host: '127.0.0.1',
+      port,
+      apiKeys: ['valid-key'],
+      rateLimit: 2,
+      cors: false,
+      corsOrigins: [],
+    },
+    {
+      async getAllStatus() {
+        return new Map();
+      },
+      async getAllAvailableModels() {
+        return new Map();
+      },
+    } as never,
+    {
+      async getAllModels() {
+        return [];
+      },
+    } as never,
+    {} as never,
+    {
+      getStatus() {
+        return {
+          localNode: null,
+          nodes: [],
+          models: {},
+          desiredDeployments: [],
+        };
+      },
+    } as never,
+  );
+
+  (server as unknown as {
+    gpuDetector: { detectGpus: () => Promise<unknown[]> };
+  }).gpuDetector = {
+    async detectGpus() {
+      return [];
+    },
+  };
+
+  await server.start();
+
+  try {
+    const requestHeaders = {
+      Authorization: 'Bearer valid-key',
+    };
+
+    const first = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders });
+    assertEquals(first.status, 200);
+    await first.text();
+
+    const second = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders });
+    assertEquals(second.status, 200);
+    await second.text();
+
+    const third = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders });
+    assertEquals(third.status, 429);
+    assertEquals((await third.json()).error.type, 'rate_limit_exceeded');
+
+    const health = await fetch(`http://127.0.0.1:${port}/health`);
+    assertEquals(health.status, 200);
+    await health.text();
+
+    const metrics = await fetch(`http://127.0.0.1:${port}/metrics`);
+    assertEquals(metrics.status, 200);
+    await metrics.text();
+  } finally {
+    await server.stop();
+  }
+});
diff --git a/ts/api/server.ts b/ts/api/server.ts
index bb4f773..d26aecc 100644
--- a/ts/api/server.ts
+++ b/ts/api/server.ts
@@ -34,6 +34,7 @@ export class ApiServer {
   private requestCounts = new Map<string, number>();
   private authFailureCounts = new Map<string, number>();
   private serverErrorCounts = new Map<string, number>();
+  private rateLimitBuckets = new Map<string, { count: number; windowStart: number }>();
 
   constructor(
     config: IApiConfig,
@@ -152,6 +153,12 @@ export class ApiServer {
       return;
     }
 
+    if (!this.isRequestWithinRateLimit(req)) {
+      this.sendError(res, 429, 'Rate limit exceeded', 'rate_limit_exceeded');
+      this.recordRequest(path, res.statusCode);
+      return;
+    }
+
     // Route request
     try {
       await this.router.route(req, res, path);
@@ -352,6 +359,41 @@ export class ApiServer {
     }
   }
 
+  private isRequestWithinRateLimit(req: http.IncomingMessage): boolean {
+    const configuredLimit = this.config.rateLimit;
+    if (!configuredLimit || configuredLimit <= 0) {
+      return true;
+    }
+
+    const key = this.getRateLimitKey(req);
+    const now = Date.now();
+    const windowMs = 60 * 1000;
+    const bucket = this.rateLimitBuckets.get(key);
+
+    if (!bucket || now - bucket.windowStart >= windowMs) {
+      this.rateLimitBuckets.set(key, { count: 1, windowStart: now });
+      return true;
+    }
+
+    if (bucket.count >= configuredLimit) {
+      return false;
+    }
+
+    bucket.count += 1;
+    return true;
+  }
+
+  private getRateLimitKey(req: http.IncomingMessage): string {
+    if (typeof req.headers.authorization === 'string') {
+      const match = req.headers.authorization.match(/^Bearer\s+(.+)$/i);
+      if (match) {
+        return `api_key:${match[1]}`;
+      }
+    }
+
+    return `ip:${req.socket.remoteAddress || 'unknown'}`;
+  }
+
   private incrementMetric(metric: Map<string, number>, path: string): void {
     metric.set(path, (metric.get(path) || 0) + 1);
   }