refactor(health): share health snapshot computation

This commit is contained in:
2026-04-21 13:36:07 +00:00
parent 9022c8dbf3
commit 405fff91af
3 changed files with 64 additions and 74 deletions
+8 -39
View File
@@ -16,6 +16,7 @@ import { ModelRegistry } from '../models/registry.ts';
import { ModelLoader } from '../models/loader.ts'; import { ModelLoader } from '../models/loader.ts';
import { GpuDetector } from '../hardware/gpu-detector.ts'; import { GpuDetector } from '../hardware/gpu-detector.ts';
import { ClusterHandler } from './handlers/cluster.ts'; import { ClusterHandler } from './handlers/cluster.ts';
import { buildHealthSnapshot } from '../helpers/health.ts';
interface IApiServerOptions { interface IApiServerOptions {
gpuDetector?: GpuDetector; gpuDetector?: GpuDetector;
@@ -209,47 +210,15 @@ export class ApiServer {
const gpus = await this.gpuDetector.detectGpus(); const gpus = await this.gpuDetector.detectGpus();
const models = await this.containerManager.getAllAvailableModels(); const models = await this.containerManager.getAllAvailableModels();
let status: 'ok' | 'degraded' | 'error' = 'ok'; const response: IHealthResponse = buildHealthSnapshot({
const reasons = new Set<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>(); statuses,
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {}; modelCount: models.size,
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {}; gpus,
startTime: this.startTime,
// Check container health
for (const [id, containerStatus] of statuses) {
if (containerStatus.running && containerStatus.health === 'healthy') {
containerHealth[id] = 'healthy';
} else {
containerHealth[id] = 'unhealthy';
status = 'degraded';
reasons.add('unhealthy_container');
}
}
// Check GPU status
for (const gpu of gpus) {
gpuStatus[gpu.id] = 'available';
}
if (models.size === 0) {
status = 'degraded';
reasons.add('no_models_available');
}
const response: IHealthResponse = {
status,
reasons: Array.from(reasons),
version: VERSION, version: VERSION,
uptime: Math.floor((Date.now() - this.startTime) / 1000), });
containers: statuses.size,
models: models.size,
gpus: gpus.length,
details: {
containers: containerHealth,
gpus: gpuStatus,
},
};
res.writeHead(status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' }); res.writeHead(response.status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(response, null, 2)); res.end(JSON.stringify(response, null, 2));
} catch (error) { } catch (error) {
res.writeHead(500, { 'Content-Type': 'application/json' }); res.writeHead(500, { 'Content-Type': 'application/json' });
+49
View File
@@ -0,0 +1,49 @@
import type { IHealthResponse } from '../interfaces/api.ts';
import type { IContainerStatus } from '../interfaces/container.ts';
import type { IGpuInfo } from '../interfaces/gpu.ts';
export function buildHealthSnapshot(options: {
statuses: Map<string, IContainerStatus>;
modelCount: number;
gpus: IGpuInfo[];
startTime: number;
version: string;
}): IHealthResponse {
let status: 'ok' | 'degraded' | 'error' = 'ok';
const reasons = new Set<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>();
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
for (const [id, containerStatus] of options.statuses) {
if (containerStatus.running && containerStatus.health === 'healthy') {
containerHealth[id] = 'healthy';
} else {
containerHealth[id] = 'unhealthy';
status = 'degraded';
reasons.add('unhealthy_container');
}
}
for (const gpu of options.gpus) {
gpuStatus[gpu.id] = 'available';
}
if (options.modelCount === 0) {
status = 'degraded';
reasons.add('no_models_available');
}
return {
status,
reasons: Array.from(reasons),
version: options.version,
uptime: Math.floor((Date.now() - options.startTime) / 1000),
containers: options.statuses.size,
models: options.modelCount,
gpus: options.gpus.length,
details: {
containers: containerHealth,
gpus: gpuStatus,
},
};
}
+7 -35
View File
@@ -22,6 +22,7 @@ import { VERSION } from '../constants.ts';
import type { ContainerManager } from '../containers/container-manager.ts'; import type { ContainerManager } from '../containers/container-manager.ts';
import type { ClusterManager } from '../cluster/cluster-manager.ts'; import type { ClusterManager } from '../cluster/cluster-manager.ts';
import { GpuDetector } from '../hardware/gpu-detector.ts'; import { GpuDetector } from '../hardware/gpu-detector.ts';
import { buildHealthSnapshot } from '../helpers/health.ts';
interface IBundledFile { interface IBundledFile {
path: string; path: string;
@@ -150,42 +151,13 @@ export class UiServer {
const models = await this.containerManager.getAllAvailableModels(); const models = await this.containerManager.getAllAvailableModels();
const gpus = await this.gpuDetector.detectGpus(); const gpus = await this.gpuDetector.detectGpus();
let status: 'ok' | 'degraded' | 'error' = 'ok'; const health: IHealthResponse = buildHealthSnapshot({
const reasons = new Set<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>(); statuses,
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {}; modelCount: models.size,
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {}; gpus,
startTime: this.startTime,
for (const [id, s] of statuses) {
if (s.running && s.health === 'healthy') {
containerHealth[id] = 'healthy';
} else {
containerHealth[id] = 'unhealthy';
status = 'degraded';
reasons.add('unhealthy_container');
}
}
for (const gpu of gpus) {
gpuStatus[gpu.id] = 'available';
}
if (models.size === 0) {
status = 'degraded';
reasons.add('no_models_available');
}
const health: IHealthResponse = {
status,
reasons: Array.from(reasons),
version: VERSION, version: VERSION,
uptime: Math.floor((Date.now() - this.startTime) / 1000), });
containers: statuses.size,
models: models.size,
gpus: gpus.length,
details: {
containers: containerHealth,
gpus: gpuStatus,
},
};
const clusterConfig = this.clusterManager.getConfig(); const clusterConfig = this.clusterManager.getConfig();