feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
+53
-50
@@ -1,34 +1,29 @@
|
||||
/**
|
||||
* Models Handler
|
||||
*
|
||||
* Handles /v1/models endpoints.
|
||||
* Models handler.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import type {
|
||||
IModelInfo,
|
||||
IListModelsResponse,
|
||||
IApiError,
|
||||
} from '../../interfaces/api.ts';
|
||||
import { logger } from '../../logger.ts';
|
||||
import type { IApiError, IListModelsResponse, IModelInfo } from '../../interfaces/api.ts';
|
||||
import { ClusterCoordinator } from '../../cluster/coordinator.ts';
|
||||
import { ContainerManager } from '../../containers/container-manager.ts';
|
||||
import { logger } from '../../logger.ts';
|
||||
import { ModelRegistry } from '../../models/registry.ts';
|
||||
|
||||
/**
|
||||
* Handler for model-related requests
|
||||
*/
|
||||
export class ModelsHandler {
|
||||
private containerManager: ContainerManager;
|
||||
private modelRegistry: ModelRegistry;
|
||||
private clusterCoordinator: ClusterCoordinator;
|
||||
|
||||
constructor(containerManager: ContainerManager, modelRegistry: ModelRegistry) {
|
||||
constructor(
|
||||
containerManager: ContainerManager,
|
||||
modelRegistry: ModelRegistry,
|
||||
clusterCoordinator: ClusterCoordinator,
|
||||
) {
|
||||
this.containerManager = containerManager;
|
||||
this.modelRegistry = modelRegistry;
|
||||
this.clusterCoordinator = clusterCoordinator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle GET /v1/models
|
||||
*/
|
||||
public async handleListModels(res: http.ServerResponse): Promise<void> {
|
||||
try {
|
||||
const models = await this.getAvailableModels();
|
||||
@@ -47,13 +42,12 @@ export class ModelsHandler {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle GET /v1/models/:model
|
||||
*/
|
||||
public async handleGetModel(res: http.ServerResponse, modelId: string): Promise<void> {
|
||||
try {
|
||||
const models = await this.getAvailableModels();
|
||||
const model = models.find((m) => m.id === modelId);
|
||||
const requested = await this.modelRegistry.getModel(modelId);
|
||||
const canonicalId = requested?.id || modelId;
|
||||
const model = models.find((entry) => entry.id === canonicalId);
|
||||
|
||||
if (!model) {
|
||||
this.sendError(res, 404, `Model "${modelId}" not found`, 'model_not_found');
|
||||
@@ -69,51 +63,61 @@ export class ModelsHandler {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all available models from containers and greenlist
|
||||
*/
|
||||
private async getAvailableModels(): Promise<IModelInfo[]> {
|
||||
const models: IModelInfo[] = [];
|
||||
const seen = new Set<string>();
|
||||
const timestamp = Math.floor(Date.now() / 1000);
|
||||
|
||||
// Get models from running containers
|
||||
const containerModels = await this.containerManager.getAllAvailableModels();
|
||||
for (const [modelId, modelInfo] of containerModels) {
|
||||
if (!seen.has(modelId)) {
|
||||
seen.add(modelId);
|
||||
models.push({
|
||||
id: modelId,
|
||||
object: 'model',
|
||||
created: timestamp,
|
||||
owned_by: `modelgrid-${modelInfo.container}`,
|
||||
});
|
||||
for (const [modelId, endpoints] of containerModels) {
|
||||
if (seen.has(modelId)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const primaryEndpoint = endpoints[0];
|
||||
seen.add(modelId);
|
||||
models.push({
|
||||
id: modelId,
|
||||
object: 'model',
|
||||
created: timestamp,
|
||||
owned_by: `modelgrid-${primaryEndpoint?.type || 'vllm'}`,
|
||||
});
|
||||
}
|
||||
|
||||
// Add greenlit models that aren't loaded yet
|
||||
const greenlitModels = await this.modelRegistry.getAllGreenlitModels();
|
||||
for (const greenlit of greenlitModels) {
|
||||
if (!seen.has(greenlit.name)) {
|
||||
seen.add(greenlit.name);
|
||||
models.push({
|
||||
id: greenlit.name,
|
||||
object: 'model',
|
||||
created: timestamp,
|
||||
owned_by: `modelgrid-${greenlit.container}`,
|
||||
});
|
||||
const clusterStatus = this.clusterCoordinator.getStatus();
|
||||
for (const [modelId, locations] of Object.entries(clusterStatus.models)) {
|
||||
if (seen.has(modelId) || locations.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
seen.add(modelId);
|
||||
models.push({
|
||||
id: modelId,
|
||||
object: 'model',
|
||||
created: timestamp,
|
||||
owned_by: `modelgrid-${locations[0].engine}`,
|
||||
});
|
||||
}
|
||||
|
||||
// Sort alphabetically
|
||||
models.sort((a, b) => a.id.localeCompare(b.id));
|
||||
const catalogModels = await this.modelRegistry.getAllModels();
|
||||
for (const model of catalogModels) {
|
||||
if (seen.has(model.id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
seen.add(model.id);
|
||||
models.push({
|
||||
id: model.id,
|
||||
object: 'model',
|
||||
created: timestamp,
|
||||
owned_by: `modelgrid-${model.engine}`,
|
||||
});
|
||||
}
|
||||
|
||||
models.sort((left, right) => left.id.localeCompare(right.id));
|
||||
return models;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send error response
|
||||
*/
|
||||
private sendError(
|
||||
res: http.ServerResponse,
|
||||
statusCode: number,
|
||||
@@ -126,7 +130,6 @@ export class ModelsHandler {
|
||||
message,
|
||||
type,
|
||||
param,
|
||||
code: null,
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user