feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

This commit is contained in:
2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
+38 -17
View File
@@ -5,7 +5,8 @@
*/
import * as http from 'node:http';
import type { IApiError } from '../interfaces/api.ts';
import type { IApiError, IChatCompletionRequest } from '../interfaces/api.ts';
import { ClusterCoordinator } from '../cluster/coordinator.ts';
import { logger } from '../logger.ts';
import { ContainerManager } from '../containers/container-manager.ts';
import { ModelRegistry } from '../models/registry.ts';
@@ -23,6 +24,7 @@ export class ApiRouter {
private containerManager: ContainerManager;
private modelRegistry: ModelRegistry;
private modelLoader: ModelLoader;
private clusterCoordinator: ClusterCoordinator;
private chatHandler: ChatHandler;
private modelsHandler: ModelsHandler;
private embeddingsHandler: EmbeddingsHandler;
@@ -33,16 +35,27 @@ export class ApiRouter {
containerManager: ContainerManager,
modelRegistry: ModelRegistry,
modelLoader: ModelLoader,
clusterCoordinator: ClusterCoordinator,
apiKeys: string[],
) {
this.containerManager = containerManager;
this.modelRegistry = modelRegistry;
this.modelLoader = modelLoader;
this.clusterCoordinator = clusterCoordinator;
// Initialize handlers
this.chatHandler = new ChatHandler(containerManager, modelLoader);
this.modelsHandler = new ModelsHandler(containerManager, modelRegistry);
this.embeddingsHandler = new EmbeddingsHandler(containerManager);
this.chatHandler = new ChatHandler(
containerManager,
modelRegistry,
modelLoader,
clusterCoordinator,
);
this.modelsHandler = new ModelsHandler(containerManager, modelRegistry, clusterCoordinator);
this.embeddingsHandler = new EmbeddingsHandler(
containerManager,
modelRegistry,
clusterCoordinator,
);
// Initialize middleware
this.authMiddleware = new AuthMiddleware(apiKeys);
@@ -119,8 +132,8 @@ export class ApiRouter {
return;
}
// Handle request
await this.chatHandler.handleChatCompletion(req, res, body);
const requestBody = this.sanityMiddleware.sanitizeChatRequest(body as Record<string, unknown>);
await this.chatHandler.handleChatCompletion(req, res, requestBody);
}
/**
@@ -149,7 +162,7 @@ export class ApiRouter {
}
// Convert to chat format and handle
const chatBody = this.convertCompletionToChat(body);
const chatBody = this.convertCompletionToChat(body as Record<string, unknown>);
await this.chatHandler.handleChatCompletion(req, res, chatBody);
}
@@ -222,7 +235,16 @@ export class ApiRouter {
return;
}
await this.embeddingsHandler.handleEmbeddings(res, body);
const validation = this.sanityMiddleware.validateEmbeddingsRequest(body);
if (!validation.valid) {
this.sendError(res, 400, validation.error || 'Invalid request', 'invalid_request_error');
return;
}
const requestBody = this.sanityMiddleware.sanitizeEmbeddingsRequest(
body as Record<string, unknown>,
);
await this.embeddingsHandler.handleEmbeddings(req, res, requestBody);
}
/**
@@ -257,21 +279,21 @@ export class ApiRouter {
/**
* Convert legacy completion request to chat format
*/
private convertCompletionToChat(body: Record<string, unknown>): Record<string, unknown> {
private convertCompletionToChat(body: Record<string, unknown>): IChatCompletionRequest {
const prompt = body.prompt as string | string[];
const promptText = Array.isArray(prompt) ? prompt.join('\n') : prompt;
return {
model: body.model,
model: body.model as string,
messages: [
{ role: 'user', content: promptText },
],
max_tokens: body.max_tokens,
temperature: body.temperature,
top_p: body.top_p,
n: body.n,
stream: body.stream,
stop: body.stop,
max_tokens: body.max_tokens as number | undefined,
temperature: body.temperature as number | undefined,
top_p: body.top_p as number | undefined,
n: body.n as number | undefined,
stream: body.stream as boolean | undefined,
stop: body.stop as string | string[] | undefined,
};
}
@@ -290,7 +312,6 @@ export class ApiRouter {
message,
type,
param,
code: null,
},
};