import { assertEquals } from 'jsr:@std/assert@^1.0.0'; import { ApiServer } from '../ts/api/server.ts'; Deno.test('ApiServer serves health metrics and authenticated model listings', async () => { const port = 18100 + Math.floor(Math.random() * 1000); const server = new ApiServer( { host: '127.0.0.1', port, apiKeys: ['valid-key'], cors: false, corsOrigins: [], }, { async getAllStatus() { return new Map([ ['vllm-1', { running: true, health: 'healthy' }], ]); }, async getAllAvailableModels() { return new Map([ ['meta-llama/Llama-3.1-8B-Instruct', [{ type: 'vllm' }]], ]); }, } as never, { async getAllModels() { return [ { id: 'meta-llama/Llama-3.1-8B-Instruct', engine: 'vllm', source: { repo: 'meta-llama/Llama-3.1-8B-Instruct' }, capabilities: { chat: true }, requirements: { minVramGb: 18 }, }, ]; }, } as never, {} as never, { getStatus() { return { localNode: null, nodes: [], models: {}, desiredDeployments: [], }; }, } as never, { gpuDetector: { async detectGpus() { return [{ id: 'nvidia-0' }]; }, } as never, }, ); await server.start(); try { const healthResponse = await fetch(`http://127.0.0.1:${port}/health`); const healthBody = await healthResponse.json(); assertEquals(healthResponse.status, 200); assertEquals(healthBody.status, 'ok'); assertEquals(healthBody.models, 1); assertEquals(Array.isArray(healthBody.reasons), true); assertEquals(healthBody.reasons.length, 0); assertEquals(typeof healthResponse.headers.get('x-request-id'), 'string'); const metricsResponse = await fetch(`http://127.0.0.1:${port}/metrics`); const metricsBody = await metricsResponse.text(); assertEquals(metricsResponse.status, 200); assertEquals(metricsBody.includes('modelgrid_uptime_seconds'), true); assertEquals(metricsBody.includes('modelgrid_models_available 1'), true); const unauthenticatedModels = await fetch(`http://127.0.0.1:${port}/v1/models`); const unauthenticatedBody = await unauthenticatedModels.json(); assertEquals(unauthenticatedModels.status, 401); assertEquals(unauthenticatedBody.error.type, 'authentication_error'); const authenticatedModels = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: { Authorization: 'Bearer valid-key', 'X-Request-Id': 'req-test-models', }, }); const authenticatedBody = await authenticatedModels.json(); assertEquals(authenticatedModels.status, 200); assertEquals(authenticatedBody.object, 'list'); assertEquals(authenticatedBody.data[0].id, 'meta-llama/Llama-3.1-8B-Instruct'); assertEquals(authenticatedModels.headers.get('x-request-id'), 'req-test-models'); const metricsAfterRequests = await fetch(`http://127.0.0.1:${port}/metrics`); const metricsAfterRequestsBody = await metricsAfterRequests.text(); assertEquals( metricsAfterRequestsBody.includes('modelgrid_api_requests_total{path="/v1/models"} 2'), true, ); assertEquals( metricsAfterRequestsBody.includes('modelgrid_api_auth_failures_total{path="/v1/models"} 1'), true, ); } finally { await server.stop(); } }); Deno.test('ApiServer metrics expose 5xx counts for failing endpoints', async () => { const port = 19100 + Math.floor(Math.random() * 1000); let failModelListing = true; const server = new ApiServer( { host: '127.0.0.1', port, apiKeys: ['valid-key'], cors: false, corsOrigins: [], }, { async getAllStatus() { return new Map(); }, async getAllAvailableModels() { if (failModelListing) { failModelListing = false; throw new Error('models unavailable'); } return new Map(); }, } as never, { async getAllModels() { return []; }, } as never, {} as never, { getStatus() { return { localNode: null, nodes: [], models: {}, desiredDeployments: [], }; }, } as never, { gpuDetector: { async detectGpus() { return []; }, } as never, }, ); await server.start(); try { const failedModels = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: { Authorization: 'Bearer valid-key', }, }); assertEquals(failedModels.status, 500); await failedModels.text(); const metricsResponse = await fetch(`http://127.0.0.1:${port}/metrics`); const metricsBody = await metricsResponse.text(); assertEquals( metricsBody.includes('modelgrid_api_server_errors_total{path="/v1/models"} 1'), true, ); } finally { await server.stop(); } }); Deno.test('ApiServer health reports degraded reasons', async () => { const port = 19300 + Math.floor(Math.random() * 1000); const server = new ApiServer( { host: '127.0.0.1', port, apiKeys: ['valid-key'], cors: false, corsOrigins: [], }, { async getAllStatus() { return new Map([ ['vllm-1', { running: false, health: 'unhealthy' }], ]); }, async getAllAvailableModels() { return new Map(); }, } as never, { async getAllModels() { return []; }, } as never, {} as never, { getStatus() { return { localNode: null, nodes: [], models: {}, desiredDeployments: [], }; }, } as never, { gpuDetector: { async detectGpus() { return [{ id: 'nvidia-0' }]; }, } as never, }, ); await server.start(); try { const response = await fetch(`http://127.0.0.1:${port}/health`); const body = await response.json(); assertEquals(response.status, 503); assertEquals(body.status, 'degraded'); assertEquals(body.reasons.includes('unhealthy_container'), true); assertEquals(body.reasons.includes('no_models_available'), true); } finally { await server.stop(); } }); Deno.test('ApiServer enforces api rate limits while exempting health and metrics', async () => { const port = 19200 + Math.floor(Math.random() * 1000); const server = new ApiServer( { host: '127.0.0.1', port, apiKeys: ['valid-key'], rateLimit: 2, cors: false, corsOrigins: [], }, { async getAllStatus() { return new Map(); }, async getAllAvailableModels() { return new Map([ ['meta-llama/Llama-3.1-8B-Instruct', [{ type: 'vllm' }]], ]); }, } as never, { async getAllModels() { return []; }, } as never, {} as never, { getStatus() { return { localNode: null, nodes: [], models: {}, desiredDeployments: [], }; }, } as never, { gpuDetector: { async detectGpus() { return []; }, } as never, }, ); await server.start(); try { const requestHeaders = { Authorization: 'Bearer valid-key', }; const first = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders }); assertEquals(first.status, 200); await first.text(); const second = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders }); assertEquals(second.status, 200); await second.text(); const third = await fetch(`http://127.0.0.1:${port}/v1/models`, { headers: requestHeaders }); assertEquals(third.status, 429); assertEquals((await third.json()).error.type, 'rate_limit_exceeded'); const health = await fetch(`http://127.0.0.1:${port}/health`); assertEquals(health.status, 200); await health.text(); const metrics = await fetch(`http://127.0.0.1:${port}/metrics`); assertEquals(metrics.status, 200); await metrics.text(); } finally { await server.stop(); } });