feat(health): include degraded reasons in responses

This commit is contained in:
2026-04-21 13:34:58 +00:00
parent 703cceb512
commit 9022c8dbf3
4 changed files with 84 additions and 1 deletions
+65 -1
View File
@@ -64,6 +64,8 @@ Deno.test('ApiServer serves health metrics and authenticated model listings', as
assertEquals(healthResponse.status, 200);
assertEquals(healthBody.status, 'ok');
assertEquals(healthBody.models, 1);
assertEquals(Array.isArray(healthBody.reasons), true);
assertEquals(healthBody.reasons.length, 0);
assertEquals(typeof healthResponse.headers.get('x-request-id'), 'string');
const metricsResponse = await fetch(`http://127.0.0.1:${port}/metrics`);
@@ -175,6 +177,66 @@ Deno.test('ApiServer metrics expose 5xx counts for failing endpoints', async ()
}
});
Deno.test('ApiServer health reports degraded reasons', async () => {
const port = 19300 + Math.floor(Math.random() * 1000);
const server = new ApiServer(
{
host: '127.0.0.1',
port,
apiKeys: ['valid-key'],
cors: false,
corsOrigins: [],
},
{
async getAllStatus() {
return new Map([
['vllm-1', { running: false, health: 'unhealthy' }],
]);
},
async getAllAvailableModels() {
return new Map();
},
} as never,
{
async getAllModels() {
return [];
},
} as never,
{} as never,
{
getStatus() {
return {
localNode: null,
nodes: [],
models: {},
desiredDeployments: [],
};
},
} as never,
{
gpuDetector: {
async detectGpus() {
return [{ id: 'nvidia-0' }];
},
} as never,
},
);
await server.start();
try {
const response = await fetch(`http://127.0.0.1:${port}/health`);
const body = await response.json();
assertEquals(response.status, 503);
assertEquals(body.status, 'degraded');
assertEquals(body.reasons.includes('unhealthy_container'), true);
assertEquals(body.reasons.includes('no_models_available'), true);
} finally {
await server.stop();
}
});
Deno.test('ApiServer enforces api rate limits while exempting health and metrics', async () => {
const port = 19200 + Math.floor(Math.random() * 1000);
const server = new ApiServer(
@@ -191,7 +253,9 @@ Deno.test('ApiServer enforces api rate limits while exempting health and metrics
return new Map();
},
async getAllAvailableModels() {
return new Map();
return new Map([
['meta-llama/Llama-3.1-8B-Instruct', [{ type: 'vllm' }]],
]);
},
} as never,
{