feat(health): include degraded reasons in responses
This commit is contained in:
+65
-1
@@ -64,6 +64,8 @@ Deno.test('ApiServer serves health metrics and authenticated model listings', as
|
||||
assertEquals(healthResponse.status, 200);
|
||||
assertEquals(healthBody.status, 'ok');
|
||||
assertEquals(healthBody.models, 1);
|
||||
assertEquals(Array.isArray(healthBody.reasons), true);
|
||||
assertEquals(healthBody.reasons.length, 0);
|
||||
assertEquals(typeof healthResponse.headers.get('x-request-id'), 'string');
|
||||
|
||||
const metricsResponse = await fetch(`http://127.0.0.1:${port}/metrics`);
|
||||
@@ -175,6 +177,66 @@ Deno.test('ApiServer metrics expose 5xx counts for failing endpoints', async ()
|
||||
}
|
||||
});
|
||||
|
||||
Deno.test('ApiServer health reports degraded reasons', async () => {
|
||||
const port = 19300 + Math.floor(Math.random() * 1000);
|
||||
const server = new ApiServer(
|
||||
{
|
||||
host: '127.0.0.1',
|
||||
port,
|
||||
apiKeys: ['valid-key'],
|
||||
cors: false,
|
||||
corsOrigins: [],
|
||||
},
|
||||
{
|
||||
async getAllStatus() {
|
||||
return new Map([
|
||||
['vllm-1', { running: false, health: 'unhealthy' }],
|
||||
]);
|
||||
},
|
||||
async getAllAvailableModels() {
|
||||
return new Map();
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
async getAllModels() {
|
||||
return [];
|
||||
},
|
||||
} as never,
|
||||
{} as never,
|
||||
{
|
||||
getStatus() {
|
||||
return {
|
||||
localNode: null,
|
||||
nodes: [],
|
||||
models: {},
|
||||
desiredDeployments: [],
|
||||
};
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
gpuDetector: {
|
||||
async detectGpus() {
|
||||
return [{ id: 'nvidia-0' }];
|
||||
},
|
||||
} as never,
|
||||
},
|
||||
);
|
||||
|
||||
await server.start();
|
||||
|
||||
try {
|
||||
const response = await fetch(`http://127.0.0.1:${port}/health`);
|
||||
const body = await response.json();
|
||||
|
||||
assertEquals(response.status, 503);
|
||||
assertEquals(body.status, 'degraded');
|
||||
assertEquals(body.reasons.includes('unhealthy_container'), true);
|
||||
assertEquals(body.reasons.includes('no_models_available'), true);
|
||||
} finally {
|
||||
await server.stop();
|
||||
}
|
||||
});
|
||||
|
||||
Deno.test('ApiServer enforces api rate limits while exempting health and metrics', async () => {
|
||||
const port = 19200 + Math.floor(Math.random() * 1000);
|
||||
const server = new ApiServer(
|
||||
@@ -191,7 +253,9 @@ Deno.test('ApiServer enforces api rate limits while exempting health and metrics
|
||||
return new Map();
|
||||
},
|
||||
async getAllAvailableModels() {
|
||||
return new Map();
|
||||
return new Map([
|
||||
['meta-llama/Llama-3.1-8B-Instruct', [{ type: 'vllm' }]],
|
||||
]);
|
||||
},
|
||||
} as never,
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user