feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
+38
-17
@@ -5,7 +5,8 @@
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import type { IApiError } from '../interfaces/api.ts';
|
||||
import type { IApiError, IChatCompletionRequest } from '../interfaces/api.ts';
|
||||
import { ClusterCoordinator } from '../cluster/coordinator.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { ContainerManager } from '../containers/container-manager.ts';
|
||||
import { ModelRegistry } from '../models/registry.ts';
|
||||
@@ -23,6 +24,7 @@ export class ApiRouter {
|
||||
private containerManager: ContainerManager;
|
||||
private modelRegistry: ModelRegistry;
|
||||
private modelLoader: ModelLoader;
|
||||
private clusterCoordinator: ClusterCoordinator;
|
||||
private chatHandler: ChatHandler;
|
||||
private modelsHandler: ModelsHandler;
|
||||
private embeddingsHandler: EmbeddingsHandler;
|
||||
@@ -33,16 +35,27 @@ export class ApiRouter {
|
||||
containerManager: ContainerManager,
|
||||
modelRegistry: ModelRegistry,
|
||||
modelLoader: ModelLoader,
|
||||
clusterCoordinator: ClusterCoordinator,
|
||||
apiKeys: string[],
|
||||
) {
|
||||
this.containerManager = containerManager;
|
||||
this.modelRegistry = modelRegistry;
|
||||
this.modelLoader = modelLoader;
|
||||
this.clusterCoordinator = clusterCoordinator;
|
||||
|
||||
// Initialize handlers
|
||||
this.chatHandler = new ChatHandler(containerManager, modelLoader);
|
||||
this.modelsHandler = new ModelsHandler(containerManager, modelRegistry);
|
||||
this.embeddingsHandler = new EmbeddingsHandler(containerManager);
|
||||
this.chatHandler = new ChatHandler(
|
||||
containerManager,
|
||||
modelRegistry,
|
||||
modelLoader,
|
||||
clusterCoordinator,
|
||||
);
|
||||
this.modelsHandler = new ModelsHandler(containerManager, modelRegistry, clusterCoordinator);
|
||||
this.embeddingsHandler = new EmbeddingsHandler(
|
||||
containerManager,
|
||||
modelRegistry,
|
||||
clusterCoordinator,
|
||||
);
|
||||
|
||||
// Initialize middleware
|
||||
this.authMiddleware = new AuthMiddleware(apiKeys);
|
||||
@@ -119,8 +132,8 @@ export class ApiRouter {
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle request
|
||||
await this.chatHandler.handleChatCompletion(req, res, body);
|
||||
const requestBody = this.sanityMiddleware.sanitizeChatRequest(body as Record<string, unknown>);
|
||||
await this.chatHandler.handleChatCompletion(req, res, requestBody);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -149,7 +162,7 @@ export class ApiRouter {
|
||||
}
|
||||
|
||||
// Convert to chat format and handle
|
||||
const chatBody = this.convertCompletionToChat(body);
|
||||
const chatBody = this.convertCompletionToChat(body as Record<string, unknown>);
|
||||
await this.chatHandler.handleChatCompletion(req, res, chatBody);
|
||||
}
|
||||
|
||||
@@ -222,7 +235,16 @@ export class ApiRouter {
|
||||
return;
|
||||
}
|
||||
|
||||
await this.embeddingsHandler.handleEmbeddings(res, body);
|
||||
const validation = this.sanityMiddleware.validateEmbeddingsRequest(body);
|
||||
if (!validation.valid) {
|
||||
this.sendError(res, 400, validation.error || 'Invalid request', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
const requestBody = this.sanityMiddleware.sanitizeEmbeddingsRequest(
|
||||
body as Record<string, unknown>,
|
||||
);
|
||||
await this.embeddingsHandler.handleEmbeddings(req, res, requestBody);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -257,21 +279,21 @@ export class ApiRouter {
|
||||
/**
|
||||
* Convert legacy completion request to chat format
|
||||
*/
|
||||
private convertCompletionToChat(body: Record<string, unknown>): Record<string, unknown> {
|
||||
private convertCompletionToChat(body: Record<string, unknown>): IChatCompletionRequest {
|
||||
const prompt = body.prompt as string | string[];
|
||||
const promptText = Array.isArray(prompt) ? prompt.join('\n') : prompt;
|
||||
|
||||
return {
|
||||
model: body.model,
|
||||
model: body.model as string,
|
||||
messages: [
|
||||
{ role: 'user', content: promptText },
|
||||
],
|
||||
max_tokens: body.max_tokens,
|
||||
temperature: body.temperature,
|
||||
top_p: body.top_p,
|
||||
n: body.n,
|
||||
stream: body.stream,
|
||||
stop: body.stop,
|
||||
max_tokens: body.max_tokens as number | undefined,
|
||||
temperature: body.temperature as number | undefined,
|
||||
top_p: body.top_p as number | undefined,
|
||||
n: body.n as number | undefined,
|
||||
stream: body.stream as boolean | undefined,
|
||||
stop: body.stop as string | string[] | undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -290,7 +312,6 @@ export class ApiRouter {
|
||||
message,
|
||||
type,
|
||||
param,
|
||||
code: null,
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user