feat(health): include degraded reasons in responses
This commit is contained in:
+65
-1
@@ -64,6 +64,8 @@ Deno.test('ApiServer serves health metrics and authenticated model listings', as
|
|||||||
assertEquals(healthResponse.status, 200);
|
assertEquals(healthResponse.status, 200);
|
||||||
assertEquals(healthBody.status, 'ok');
|
assertEquals(healthBody.status, 'ok');
|
||||||
assertEquals(healthBody.models, 1);
|
assertEquals(healthBody.models, 1);
|
||||||
|
assertEquals(Array.isArray(healthBody.reasons), true);
|
||||||
|
assertEquals(healthBody.reasons.length, 0);
|
||||||
assertEquals(typeof healthResponse.headers.get('x-request-id'), 'string');
|
assertEquals(typeof healthResponse.headers.get('x-request-id'), 'string');
|
||||||
|
|
||||||
const metricsResponse = await fetch(`http://127.0.0.1:${port}/metrics`);
|
const metricsResponse = await fetch(`http://127.0.0.1:${port}/metrics`);
|
||||||
@@ -175,6 +177,66 @@ Deno.test('ApiServer metrics expose 5xx counts for failing endpoints', async ()
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
Deno.test('ApiServer health reports degraded reasons', async () => {
|
||||||
|
const port = 19300 + Math.floor(Math.random() * 1000);
|
||||||
|
const server = new ApiServer(
|
||||||
|
{
|
||||||
|
host: '127.0.0.1',
|
||||||
|
port,
|
||||||
|
apiKeys: ['valid-key'],
|
||||||
|
cors: false,
|
||||||
|
corsOrigins: [],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
async getAllStatus() {
|
||||||
|
return new Map([
|
||||||
|
['vllm-1', { running: false, health: 'unhealthy' }],
|
||||||
|
]);
|
||||||
|
},
|
||||||
|
async getAllAvailableModels() {
|
||||||
|
return new Map();
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{
|
||||||
|
async getAllModels() {
|
||||||
|
return [];
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{} as never,
|
||||||
|
{
|
||||||
|
getStatus() {
|
||||||
|
return {
|
||||||
|
localNode: null,
|
||||||
|
nodes: [],
|
||||||
|
models: {},
|
||||||
|
desiredDeployments: [],
|
||||||
|
};
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
{
|
||||||
|
gpuDetector: {
|
||||||
|
async detectGpus() {
|
||||||
|
return [{ id: 'nvidia-0' }];
|
||||||
|
},
|
||||||
|
} as never,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
await server.start();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(`http://127.0.0.1:${port}/health`);
|
||||||
|
const body = await response.json();
|
||||||
|
|
||||||
|
assertEquals(response.status, 503);
|
||||||
|
assertEquals(body.status, 'degraded');
|
||||||
|
assertEquals(body.reasons.includes('unhealthy_container'), true);
|
||||||
|
assertEquals(body.reasons.includes('no_models_available'), true);
|
||||||
|
} finally {
|
||||||
|
await server.stop();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
Deno.test('ApiServer enforces api rate limits while exempting health and metrics', async () => {
|
Deno.test('ApiServer enforces api rate limits while exempting health and metrics', async () => {
|
||||||
const port = 19200 + Math.floor(Math.random() * 1000);
|
const port = 19200 + Math.floor(Math.random() * 1000);
|
||||||
const server = new ApiServer(
|
const server = new ApiServer(
|
||||||
@@ -191,7 +253,9 @@ Deno.test('ApiServer enforces api rate limits while exempting health and metrics
|
|||||||
return new Map();
|
return new Map();
|
||||||
},
|
},
|
||||||
async getAllAvailableModels() {
|
async getAllAvailableModels() {
|
||||||
return new Map();
|
return new Map([
|
||||||
|
['meta-llama/Llama-3.1-8B-Instruct', [{ type: 'vllm' }]],
|
||||||
|
]);
|
||||||
},
|
},
|
||||||
} as never,
|
} as never,
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -210,6 +210,7 @@ export class ApiServer {
|
|||||||
const models = await this.containerManager.getAllAvailableModels();
|
const models = await this.containerManager.getAllAvailableModels();
|
||||||
|
|
||||||
let status: 'ok' | 'degraded' | 'error' = 'ok';
|
let status: 'ok' | 'degraded' | 'error' = 'ok';
|
||||||
|
const reasons = new Set<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>();
|
||||||
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
|
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
|
||||||
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
|
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
|
||||||
|
|
||||||
@@ -220,6 +221,7 @@ export class ApiServer {
|
|||||||
} else {
|
} else {
|
||||||
containerHealth[id] = 'unhealthy';
|
containerHealth[id] = 'unhealthy';
|
||||||
status = 'degraded';
|
status = 'degraded';
|
||||||
|
reasons.add('unhealthy_container');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -228,8 +230,14 @@ export class ApiServer {
|
|||||||
gpuStatus[gpu.id] = 'available';
|
gpuStatus[gpu.id] = 'available';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (models.size === 0) {
|
||||||
|
status = 'degraded';
|
||||||
|
reasons.add('no_models_available');
|
||||||
|
}
|
||||||
|
|
||||||
const response: IHealthResponse = {
|
const response: IHealthResponse = {
|
||||||
status,
|
status,
|
||||||
|
reasons: Array.from(reasons),
|
||||||
version: VERSION,
|
version: VERSION,
|
||||||
uptime: Math.floor((Date.now() - this.startTime) / 1000),
|
uptime: Math.floor((Date.now() - this.startTime) / 1000),
|
||||||
containers: statuses.size,
|
containers: statuses.size,
|
||||||
@@ -247,6 +255,7 @@ export class ApiServer {
|
|||||||
res.writeHead(500, { 'Content-Type': 'application/json' });
|
res.writeHead(500, { 'Content-Type': 'application/json' });
|
||||||
res.end(JSON.stringify({
|
res.end(JSON.stringify({
|
||||||
status: 'error',
|
status: 'error',
|
||||||
|
reasons: ['gpu_detection_failed'],
|
||||||
error: error instanceof Error ? error.message : String(error),
|
error: error instanceof Error ? error.message : String(error),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -309,6 +309,8 @@ export interface IApiError {
|
|||||||
export interface IHealthResponse {
|
export interface IHealthResponse {
|
||||||
/** Status */
|
/** Status */
|
||||||
status: 'ok' | 'degraded' | 'error';
|
status: 'ok' | 'degraded' | 'error';
|
||||||
|
/** Machine-readable reasons for degraded or error states */
|
||||||
|
reasons?: Array<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>;
|
||||||
/** Version */
|
/** Version */
|
||||||
version: string;
|
version: string;
|
||||||
/** Uptime in seconds */
|
/** Uptime in seconds */
|
||||||
|
|||||||
@@ -151,6 +151,7 @@ export class UiServer {
|
|||||||
const gpus = await this.gpuDetector.detectGpus();
|
const gpus = await this.gpuDetector.detectGpus();
|
||||||
|
|
||||||
let status: 'ok' | 'degraded' | 'error' = 'ok';
|
let status: 'ok' | 'degraded' | 'error' = 'ok';
|
||||||
|
const reasons = new Set<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>();
|
||||||
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
|
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
|
||||||
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
|
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
|
||||||
|
|
||||||
@@ -160,14 +161,21 @@ export class UiServer {
|
|||||||
} else {
|
} else {
|
||||||
containerHealth[id] = 'unhealthy';
|
containerHealth[id] = 'unhealthy';
|
||||||
status = 'degraded';
|
status = 'degraded';
|
||||||
|
reasons.add('unhealthy_container');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (const gpu of gpus) {
|
for (const gpu of gpus) {
|
||||||
gpuStatus[gpu.id] = 'available';
|
gpuStatus[gpu.id] = 'available';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (models.size === 0) {
|
||||||
|
status = 'degraded';
|
||||||
|
reasons.add('no_models_available');
|
||||||
|
}
|
||||||
|
|
||||||
const health: IHealthResponse = {
|
const health: IHealthResponse = {
|
||||||
status,
|
status,
|
||||||
|
reasons: Array.from(reasons),
|
||||||
version: VERSION,
|
version: VERSION,
|
||||||
uptime: Math.floor((Date.now() - this.startTime) / 1000),
|
uptime: Math.floor((Date.now() - this.startTime) / 1000),
|
||||||
containers: statuses.size,
|
containers: statuses.size,
|
||||||
|
|||||||
Reference in New Issue
Block a user