feat(health): include degraded reasons in responses

2026-04-21 13:34:58 +00:00
parent 703cceb512
commit 9022c8dbf3
4 changed files with 84 additions and 1 deletions
@@ -64,6 +64,8 @@ Deno.test('ApiServer serves health metrics and authenticated model listings', as
    assertEquals(healthResponse.status, 200);
    assertEquals(healthBody.status, 'ok');
    assertEquals(healthBody.models, 1);
+    assertEquals(Array.isArray(healthBody.reasons), true);
+    assertEquals(healthBody.reasons.length, 0);
    assertEquals(typeof healthResponse.headers.get('x-request-id'), 'string');

    const metricsResponse = await fetch(`http://127.0.0.1:${port}/metrics`);
@@ -175,6 +177,66 @@ Deno.test('ApiServer metrics expose 5xx counts for failing endpoints', async ()
  }
 });

+Deno.test('ApiServer health reports degraded reasons', async () => {
+  const port = 19300 + Math.floor(Math.random() * 1000);
+  const server = new ApiServer(
+    {
+      host: '127.0.0.1',
+      port,
+      apiKeys: ['valid-key'],
+      cors: false,
+      corsOrigins: [],
+    },
+    {
+      async getAllStatus() {
+        return new Map([
+          ['vllm-1', { running: false, health: 'unhealthy' }],
+        ]);
+      },
+      async getAllAvailableModels() {
+        return new Map();
+      },
+    } as never,
+    {
+      async getAllModels() {
+        return [];
+      },
+    } as never,
+    {} as never,
+    {
+      getStatus() {
+        return {
+          localNode: null,
+          nodes: [],
+          models: {},
+          desiredDeployments: [],
+        };
+      },
+    } as never,
+    {
+      gpuDetector: {
+        async detectGpus() {
+          return [{ id: 'nvidia-0' }];
+        },
+      } as never,
+    },
+  );
+
+  await server.start();
+
+  try {
+    const response = await fetch(`http://127.0.0.1:${port}/health`);
+    const body = await response.json();
+
+    assertEquals(response.status, 503);
+    assertEquals(body.status, 'degraded');
+    assertEquals(body.reasons.includes('unhealthy_container'), true);
+    assertEquals(body.reasons.includes('no_models_available'), true);
+  } finally {
+    await server.stop();
+  }
+});
+
 Deno.test('ApiServer enforces api rate limits while exempting health and metrics', async () => {
  const port = 19200 + Math.floor(Math.random() * 1000);
  const server = new ApiServer(
@@ -191,7 +253,9 @@ Deno.test('ApiServer enforces api rate limits while exempting health and metrics
        return new Map();
      },
      async getAllAvailableModels() {
-        return new Map();
+        return new Map([
+          ['meta-llama/Llama-3.1-8B-Instruct', [{ type: 'vllm' }]],
+        ]);
      },
    } as never,
    {