feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

This commit is contained in:
2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
+17 -4
View File
@@ -7,13 +7,15 @@
import * as http from 'node:http';
import type { IApiConfig } from '../interfaces/config.ts';
import type { IHealthResponse } from '../interfaces/api.ts';
import { ClusterCoordinator } from '../cluster/coordinator.ts';
import { logger } from '../logger.ts';
import { API_SERVER } from '../constants.ts';
import { VERSION } from '../constants.ts';
import { ApiRouter } from './router.ts';
import { ContainerManager } from '../containers/container-manager.ts';
import { ModelRegistry } from '../models/registry.ts';
import { ModelLoader } from '../models/loader.ts';
import { GpuDetector } from '../hardware/gpu-detector.ts';
import { ClusterHandler } from './handlers/cluster.ts';
/**
* API Server for ModelGrid
@@ -26,22 +28,29 @@ export class ApiServer {
private modelRegistry: ModelRegistry;
private modelLoader: ModelLoader;
private gpuDetector: GpuDetector;
private clusterCoordinator: ClusterCoordinator;
private clusterHandler: ClusterHandler;
private startTime: number = 0;
constructor(
config: IApiConfig,
containerManager: ContainerManager,
modelRegistry: ModelRegistry,
modelLoader: ModelLoader,
clusterCoordinator: ClusterCoordinator,
) {
this.config = config;
this.containerManager = containerManager;
this.modelRegistry = modelRegistry;
this.gpuDetector = new GpuDetector();
this.modelLoader = new ModelLoader(modelRegistry, containerManager, true);
this.modelLoader = modelLoader;
this.clusterCoordinator = clusterCoordinator;
this.clusterHandler = new ClusterHandler(clusterCoordinator);
this.router = new ApiRouter(
containerManager,
modelRegistry,
this.modelLoader,
clusterCoordinator,
config.apiKeys,
);
}
@@ -120,6 +129,11 @@ export class ApiServer {
const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`);
const path = url.pathname;
if (path.startsWith('/_cluster')) {
await this.clusterHandler.handle(req, res, path, url);
return;
}
// Health check endpoint (no auth required)
if (path === '/health' || path === '/healthz') {
await this.handleHealthCheck(res);
@@ -194,7 +208,7 @@ export class ApiServer {
const response: IHealthResponse = {
status,
version: '1.0.0', // TODO: Get from config
version: VERSION,
uptime: Math.floor((Date.now() - this.startTime) / 1000),
containers: statuses.size,
models: models.size,
@@ -276,7 +290,6 @@ export class ApiServer {
error: {
message,
type,
code: null,
},
}));
}