feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

This commit is contained in:
2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
+53 -50
View File
@@ -1,34 +1,29 @@
/**
* Models Handler
*
* Handles /v1/models endpoints.
* Models handler.
*/
import * as http from 'node:http';
import type {
IModelInfo,
IListModelsResponse,
IApiError,
} from '../../interfaces/api.ts';
import { logger } from '../../logger.ts';
import type { IApiError, IListModelsResponse, IModelInfo } from '../../interfaces/api.ts';
import { ClusterCoordinator } from '../../cluster/coordinator.ts';
import { ContainerManager } from '../../containers/container-manager.ts';
import { logger } from '../../logger.ts';
import { ModelRegistry } from '../../models/registry.ts';
/**
* Handler for model-related requests
*/
export class ModelsHandler {
private containerManager: ContainerManager;
private modelRegistry: ModelRegistry;
private clusterCoordinator: ClusterCoordinator;
constructor(containerManager: ContainerManager, modelRegistry: ModelRegistry) {
constructor(
containerManager: ContainerManager,
modelRegistry: ModelRegistry,
clusterCoordinator: ClusterCoordinator,
) {
this.containerManager = containerManager;
this.modelRegistry = modelRegistry;
this.clusterCoordinator = clusterCoordinator;
}
/**
* Handle GET /v1/models
*/
public async handleListModels(res: http.ServerResponse): Promise<void> {
try {
const models = await this.getAvailableModels();
@@ -47,13 +42,12 @@ export class ModelsHandler {
}
}
/**
* Handle GET /v1/models/:model
*/
public async handleGetModel(res: http.ServerResponse, modelId: string): Promise<void> {
try {
const models = await this.getAvailableModels();
const model = models.find((m) => m.id === modelId);
const requested = await this.modelRegistry.getModel(modelId);
const canonicalId = requested?.id || modelId;
const model = models.find((entry) => entry.id === canonicalId);
if (!model) {
this.sendError(res, 404, `Model "${modelId}" not found`, 'model_not_found');
@@ -69,51 +63,61 @@ export class ModelsHandler {
}
}
/**
* Get all available models from containers and greenlist
*/
private async getAvailableModels(): Promise<IModelInfo[]> {
const models: IModelInfo[] = [];
const seen = new Set<string>();
const timestamp = Math.floor(Date.now() / 1000);
// Get models from running containers
const containerModels = await this.containerManager.getAllAvailableModels();
for (const [modelId, modelInfo] of containerModels) {
if (!seen.has(modelId)) {
seen.add(modelId);
models.push({
id: modelId,
object: 'model',
created: timestamp,
owned_by: `modelgrid-${modelInfo.container}`,
});
for (const [modelId, endpoints] of containerModels) {
if (seen.has(modelId)) {
continue;
}
const primaryEndpoint = endpoints[0];
seen.add(modelId);
models.push({
id: modelId,
object: 'model',
created: timestamp,
owned_by: `modelgrid-${primaryEndpoint?.type || 'vllm'}`,
});
}
// Add greenlit models that aren't loaded yet
const greenlitModels = await this.modelRegistry.getAllGreenlitModels();
for (const greenlit of greenlitModels) {
if (!seen.has(greenlit.name)) {
seen.add(greenlit.name);
models.push({
id: greenlit.name,
object: 'model',
created: timestamp,
owned_by: `modelgrid-${greenlit.container}`,
});
const clusterStatus = this.clusterCoordinator.getStatus();
for (const [modelId, locations] of Object.entries(clusterStatus.models)) {
if (seen.has(modelId) || locations.length === 0) {
continue;
}
seen.add(modelId);
models.push({
id: modelId,
object: 'model',
created: timestamp,
owned_by: `modelgrid-${locations[0].engine}`,
});
}
// Sort alphabetically
models.sort((a, b) => a.id.localeCompare(b.id));
const catalogModels = await this.modelRegistry.getAllModels();
for (const model of catalogModels) {
if (seen.has(model.id)) {
continue;
}
seen.add(model.id);
models.push({
id: model.id,
object: 'model',
created: timestamp,
owned_by: `modelgrid-${model.engine}`,
});
}
models.sort((left, right) => left.id.localeCompare(right.id));
return models;
}
/**
* Send error response
*/
private sendError(
res: http.ServerResponse,
statusCode: number,
@@ -126,7 +130,6 @@ export class ModelsHandler {
message,
type,
param,
code: null,
},
};