refactor(health): share health snapshot computation
This commit is contained in:
+8
-39
@@ -16,6 +16,7 @@ import { ModelRegistry } from '../models/registry.ts';
|
||||
import { ModelLoader } from '../models/loader.ts';
|
||||
import { GpuDetector } from '../hardware/gpu-detector.ts';
|
||||
import { ClusterHandler } from './handlers/cluster.ts';
|
||||
import { buildHealthSnapshot } from '../helpers/health.ts';
|
||||
|
||||
interface IApiServerOptions {
|
||||
gpuDetector?: GpuDetector;
|
||||
@@ -209,47 +210,15 @@ export class ApiServer {
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
const models = await this.containerManager.getAllAvailableModels();
|
||||
|
||||
let status: 'ok' | 'degraded' | 'error' = 'ok';
|
||||
const reasons = new Set<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>();
|
||||
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
|
||||
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
|
||||
|
||||
// Check container health
|
||||
for (const [id, containerStatus] of statuses) {
|
||||
if (containerStatus.running && containerStatus.health === 'healthy') {
|
||||
containerHealth[id] = 'healthy';
|
||||
} else {
|
||||
containerHealth[id] = 'unhealthy';
|
||||
status = 'degraded';
|
||||
reasons.add('unhealthy_container');
|
||||
}
|
||||
}
|
||||
|
||||
// Check GPU status
|
||||
for (const gpu of gpus) {
|
||||
gpuStatus[gpu.id] = 'available';
|
||||
}
|
||||
|
||||
if (models.size === 0) {
|
||||
status = 'degraded';
|
||||
reasons.add('no_models_available');
|
||||
}
|
||||
|
||||
const response: IHealthResponse = {
|
||||
status,
|
||||
reasons: Array.from(reasons),
|
||||
const response: IHealthResponse = buildHealthSnapshot({
|
||||
statuses,
|
||||
modelCount: models.size,
|
||||
gpus,
|
||||
startTime: this.startTime,
|
||||
version: VERSION,
|
||||
uptime: Math.floor((Date.now() - this.startTime) / 1000),
|
||||
containers: statuses.size,
|
||||
models: models.size,
|
||||
gpus: gpus.length,
|
||||
details: {
|
||||
containers: containerHealth,
|
||||
gpus: gpuStatus,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
res.writeHead(status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' });
|
||||
res.writeHead(response.status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(response, null, 2));
|
||||
} catch (error) {
|
||||
res.writeHead(500, { 'Content-Type': 'application/json' });
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
import type { IHealthResponse } from '../interfaces/api.ts';
|
||||
import type { IContainerStatus } from '../interfaces/container.ts';
|
||||
import type { IGpuInfo } from '../interfaces/gpu.ts';
|
||||
|
||||
export function buildHealthSnapshot(options: {
|
||||
statuses: Map<string, IContainerStatus>;
|
||||
modelCount: number;
|
||||
gpus: IGpuInfo[];
|
||||
startTime: number;
|
||||
version: string;
|
||||
}): IHealthResponse {
|
||||
let status: 'ok' | 'degraded' | 'error' = 'ok';
|
||||
const reasons = new Set<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>();
|
||||
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
|
||||
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
|
||||
|
||||
for (const [id, containerStatus] of options.statuses) {
|
||||
if (containerStatus.running && containerStatus.health === 'healthy') {
|
||||
containerHealth[id] = 'healthy';
|
||||
} else {
|
||||
containerHealth[id] = 'unhealthy';
|
||||
status = 'degraded';
|
||||
reasons.add('unhealthy_container');
|
||||
}
|
||||
}
|
||||
|
||||
for (const gpu of options.gpus) {
|
||||
gpuStatus[gpu.id] = 'available';
|
||||
}
|
||||
|
||||
if (options.modelCount === 0) {
|
||||
status = 'degraded';
|
||||
reasons.add('no_models_available');
|
||||
}
|
||||
|
||||
return {
|
||||
status,
|
||||
reasons: Array.from(reasons),
|
||||
version: options.version,
|
||||
uptime: Math.floor((Date.now() - options.startTime) / 1000),
|
||||
containers: options.statuses.size,
|
||||
models: options.modelCount,
|
||||
gpus: options.gpus.length,
|
||||
details: {
|
||||
containers: containerHealth,
|
||||
gpus: gpuStatus,
|
||||
},
|
||||
};
|
||||
}
|
||||
+7
-35
@@ -22,6 +22,7 @@ import { VERSION } from '../constants.ts';
|
||||
import type { ContainerManager } from '../containers/container-manager.ts';
|
||||
import type { ClusterManager } from '../cluster/cluster-manager.ts';
|
||||
import { GpuDetector } from '../hardware/gpu-detector.ts';
|
||||
import { buildHealthSnapshot } from '../helpers/health.ts';
|
||||
|
||||
interface IBundledFile {
|
||||
path: string;
|
||||
@@ -150,42 +151,13 @@ export class UiServer {
|
||||
const models = await this.containerManager.getAllAvailableModels();
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
|
||||
let status: 'ok' | 'degraded' | 'error' = 'ok';
|
||||
const reasons = new Set<'unhealthy_container' | 'no_models_available' | 'gpu_detection_failed'>();
|
||||
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
|
||||
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
|
||||
|
||||
for (const [id, s] of statuses) {
|
||||
if (s.running && s.health === 'healthy') {
|
||||
containerHealth[id] = 'healthy';
|
||||
} else {
|
||||
containerHealth[id] = 'unhealthy';
|
||||
status = 'degraded';
|
||||
reasons.add('unhealthy_container');
|
||||
}
|
||||
}
|
||||
for (const gpu of gpus) {
|
||||
gpuStatus[gpu.id] = 'available';
|
||||
}
|
||||
|
||||
if (models.size === 0) {
|
||||
status = 'degraded';
|
||||
reasons.add('no_models_available');
|
||||
}
|
||||
|
||||
const health: IHealthResponse = {
|
||||
status,
|
||||
reasons: Array.from(reasons),
|
||||
const health: IHealthResponse = buildHealthSnapshot({
|
||||
statuses,
|
||||
modelCount: models.size,
|
||||
gpus,
|
||||
startTime: this.startTime,
|
||||
version: VERSION,
|
||||
uptime: Math.floor((Date.now() - this.startTime) / 1000),
|
||||
containers: statuses.size,
|
||||
models: models.size,
|
||||
gpus: gpus.length,
|
||||
details: {
|
||||
containers: containerHealth,
|
||||
gpus: gpuStatus,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
const clusterConfig = this.clusterManager.getConfig();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user