refactor(health): share health snapshot computation
This commit is contained in:
+8
-39
@@ -16,6 +16,7 @@ import { ModelRegistry } from '../models/registry.ts';
|
||||
import { ModelLoader } from '../models/loader.ts';
|
||||
import { GpuDetector } from '../hardware/gpu-detector.ts';
|
||||
import { ClusterHandler } from './handlers/cluster.ts';
|
||||
import { buildHealthSnapshot } from '../helpers/health.ts';
|
||||
|
||||
interface IApiServerOptions {
|
||||
gpuDetector?: GpuDetector;
|
||||
@@ -209,47 +210,15 @@ export class ApiServer {
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
const models = await this.containerManager.getAllAvailableModels();
|
||||
|
||||
let status: 'ok' | 'degraded' | 'error' = 'ok';
|
||||
const reasons = new Set<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>();
|
||||
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
|
||||
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
|
||||
|
||||
// Check container health
|
||||
for (const [id, containerStatus] of statuses) {
|
||||
if (containerStatus.running && containerStatus.health === 'healthy') {
|
||||
containerHealth[id] = 'healthy';
|
||||
} else {
|
||||
containerHealth[id] = 'unhealthy';
|
||||
status = 'degraded';
|
||||
reasons.add('unhealthy_container');
|
||||
}
|
||||
}
|
||||
|
||||
// Check GPU status
|
||||
for (const gpu of gpus) {
|
||||
gpuStatus[gpu.id] = 'available';
|
||||
}
|
||||
|
||||
if (models.size === 0) {
|
||||
status = 'degraded';
|
||||
reasons.add('no_models_available');
|
||||
}
|
||||
|
||||
const response: IHealthResponse = {
|
||||
status,
|
||||
reasons: Array.from(reasons),
|
||||
const response: IHealthResponse = buildHealthSnapshot({
|
||||
statuses,
|
||||
modelCount: models.size,
|
||||
gpus,
|
||||
startTime: this.startTime,
|
||||
version: VERSION,
|
||||
uptime: Math.floor((Date.now() - this.startTime) / 1000),
|
||||
containers: statuses.size,
|
||||
models: models.size,
|
||||
gpus: gpus.length,
|
||||
details: {
|
||||
containers: containerHealth,
|
||||
gpus: gpuStatus,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
res.writeHead(status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' });
|
||||
res.writeHead(response.status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(response, null, 2));
|
||||
} catch (error) {
|
||||
res.writeHead(500, { 'Content-Type': 'application/json' });
|
||||
|
||||
Reference in New Issue
Block a user