diff --git a/test/api-server_test.ts b/test/api-server_test.ts index 6682f80..149b105 100644 --- a/test/api-server_test.ts +++ b/test/api-server_test.ts @@ -64,6 +64,8 @@ Deno.test('ApiServer serves health metrics and authenticated model listings', as assertEquals(healthResponse.status, 200); assertEquals(healthBody.status, 'ok'); assertEquals(healthBody.models, 1); + assertEquals(Array.isArray(healthBody.reasons), true); + assertEquals(healthBody.reasons.length, 0); assertEquals(typeof healthResponse.headers.get('x-request-id'), 'string'); const metricsResponse = await fetch(`http://127.0.0.1:${port}/metrics`); @@ -175,6 +177,66 @@ Deno.test('ApiServer metrics expose 5xx counts for failing endpoints', async () } }); +Deno.test('ApiServer health reports degraded reasons', async () => { + const port = 19300 + Math.floor(Math.random() * 1000); + const server = new ApiServer( + { + host: '127.0.0.1', + port, + apiKeys: ['valid-key'], + cors: false, + corsOrigins: [], + }, + { + async getAllStatus() { + return new Map([ + ['vllm-1', { running: false, health: 'unhealthy' }], + ]); + }, + async getAllAvailableModels() { + return new Map(); + }, + } as never, + { + async getAllModels() { + return []; + }, + } as never, + {} as never, + { + getStatus() { + return { + localNode: null, + nodes: [], + models: {}, + desiredDeployments: [], + }; + }, + } as never, + { + gpuDetector: { + async detectGpus() { + return [{ id: 'nvidia-0' }]; + }, + } as never, + }, + ); + + await server.start(); + + try { + const response = await fetch(`http://127.0.0.1:${port}/health`); + const body = await response.json(); + + assertEquals(response.status, 503); + assertEquals(body.status, 'degraded'); + assertEquals(body.reasons.includes('unhealthy_container'), true); + assertEquals(body.reasons.includes('no_models_available'), true); + } finally { + await server.stop(); + } +}); + Deno.test('ApiServer enforces api rate limits while exempting health and metrics', async () => { const port = 19200 + Math.floor(Math.random() * 1000); const server = new ApiServer( @@ -191,7 +253,9 @@ Deno.test('ApiServer enforces api rate limits while exempting health and metrics return new Map(); }, async getAllAvailableModels() { - return new Map(); + return new Map([ + ['meta-llama/Llama-3.1-8B-Instruct', [{ type: 'vllm' }]], + ]); }, } as never, { diff --git a/ts/api/server.ts b/ts/api/server.ts index 520f89d..1c147a3 100644 --- a/ts/api/server.ts +++ b/ts/api/server.ts @@ -210,6 +210,7 @@ export class ApiServer { const models = await this.containerManager.getAllAvailableModels(); let status: 'ok' | 'degraded' | 'error' = 'ok'; + const reasons = new Set<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>(); const containerHealth: Record = {}; const gpuStatus: Record = {}; @@ -220,6 +221,7 @@ export class ApiServer { } else { containerHealth[id] = 'unhealthy'; status = 'degraded'; + reasons.add('unhealthy_container'); } } @@ -228,8 +230,14 @@ export class ApiServer { gpuStatus[gpu.id] = 'available'; } + if (models.size === 0) { + status = 'degraded'; + reasons.add('no_models_available'); + } + const response: IHealthResponse = { status, + reasons: Array.from(reasons), version: VERSION, uptime: Math.floor((Date.now() - this.startTime) / 1000), containers: statuses.size, @@ -247,6 +255,7 @@ export class ApiServer { res.writeHead(500, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ status: 'error', + reasons: ['gpu_detection_failed'], error: error instanceof Error ? error.message : String(error), })); } diff --git a/ts/interfaces/api.ts b/ts/interfaces/api.ts index 761bc02..617a9c1 100644 --- a/ts/interfaces/api.ts +++ b/ts/interfaces/api.ts @@ -309,6 +309,8 @@ export interface IApiError { export interface IHealthResponse { /** Status */ status: 'ok' | 'degraded' | 'error'; + /** Machine-readable reasons for degraded or error states */ + reasons?: Array<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>; /** Version */ version: string; /** Uptime in seconds */ diff --git a/ts/ui/server.ts b/ts/ui/server.ts index cb52d21..6cf707c 100644 --- a/ts/ui/server.ts +++ b/ts/ui/server.ts @@ -151,6 +151,7 @@ export class UiServer { const gpus = await this.gpuDetector.detectGpus(); let status: 'ok' | 'degraded' | 'error' = 'ok'; + const reasons = new Set<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>(); const containerHealth: Record = {}; const gpuStatus: Record = {}; @@ -160,14 +161,21 @@ export class UiServer { } else { containerHealth[id] = 'unhealthy'; status = 'degraded'; + reasons.add('unhealthy_container'); } } for (const gpu of gpus) { gpuStatus[gpu.id] = 'available'; } + if (models.size === 0) { + status = 'degraded'; + reasons.add('no_models_available'); + } + const health: IHealthResponse = { status, + reasons: Array.from(reasons), version: VERSION, uptime: Math.floor((Date.now() - this.startTime) / 1000), containers: statuses.size,