Files
modelgrid/ts/api/handlers/chat.ts

151 lines
4.1 KiB
TypeScript
Raw Normal View History

2026-01-30 03:16:57 +00:00
/**
* Chat Completions Handler
*
* Handles /v1/chat/completions and /v1/completions endpoints.
*/
import * as http from 'node:http';
import type {
IChatCompletionRequest,
IChatCompletionResponse,
IApiError,
} from '../../interfaces/api.ts';
import { logger } from '../../logger.ts';
import { ContainerManager } from '../../containers/container-manager.ts';
import { ModelLoader } from '../../models/loader.ts';
/**
* Handler for chat completion requests
*/
export class ChatHandler {
private containerManager: ContainerManager;
private modelLoader: ModelLoader;
constructor(containerManager: ContainerManager, modelLoader: ModelLoader) {
this.containerManager = containerManager;
this.modelLoader = modelLoader;
}
/**
* Handle POST /v1/chat/completions
*/
public async handleChatCompletion(
req: http.IncomingMessage,
res: http.ServerResponse,
body: IChatCompletionRequest,
): Promise<void> {
const modelName = body.model;
const isStream = body.stream === true;
logger.dim(`Chat completion request for model: ${modelName}`);
try {
// Find or load the model
const container = await this.findOrLoadModel(modelName);
if (!container) {
this.sendError(res, 404, `Model "${modelName}" not found or could not be loaded`, 'model_not_found');
return;
}
// Route to streaming or non-streaming handler
if (isStream) {
await this.handleStreamingCompletion(res, container, body);
} else {
await this.handleNonStreamingCompletion(res, container, body);
}
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
logger.error(`Chat completion error: ${message}`);
this.sendError(res, 500, `Chat completion failed: ${message}`, 'server_error');
}
}
/**
* Find container with model or attempt to load it
*/
private async findOrLoadModel(
modelName: string,
): Promise<import('../../containers/base-container.ts').BaseContainer | null> {
// First, check if model is already loaded
const container = await this.containerManager.findContainerForModel(modelName);
if (container) {
return container;
}
// Try to load the model
logger.info(`Model ${modelName} not loaded, attempting to load...`);
const loadResult = await this.modelLoader.loadModel(modelName);
if (!loadResult.success) {
logger.error(`Failed to load model: ${loadResult.error}`);
return null;
}
// Find the container again after loading
return this.containerManager.findContainerForModel(modelName);
}
/**
* Handle non-streaming chat completion
*/
private async handleNonStreamingCompletion(
res: http.ServerResponse,
container: import('../../containers/base-container.ts').BaseContainer,
body: IChatCompletionRequest,
): Promise<void> {
const response = await container.chatCompletion(body);
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(response));
}
/**
* Handle streaming chat completion
*/
private async handleStreamingCompletion(
res: http.ServerResponse,
container: import('../../containers/base-container.ts').BaseContainer,
body: IChatCompletionRequest,
): Promise<void> {
// Set SSE headers
res.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'X-Accel-Buffering': 'no',
});
// Stream chunks to client
await container.chatCompletionStream(body, (chunk) => {
res.write(`data: ${chunk}\n\n`);
});
// Send final done message
res.write('data: [DONE]\n\n');
res.end();
}
/**
* Send error response
*/
private sendError(
res: http.ServerResponse,
statusCode: number,
message: string,
type: string,
param?: string,
): void {
const error: IApiError = {
error: {
message,
type,
param,
code: null,
},
};
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(error));
}
}