/** * Chat Completions Handler * * Handles /v1/chat/completions and /v1/completions endpoints. */ import * as http from 'node:http'; import type { IChatCompletionRequest, IChatCompletionResponse, IApiError, } from '../../interfaces/api.ts'; import { logger } from '../../logger.ts'; import { ContainerManager } from '../../containers/container-manager.ts'; import { ModelLoader } from '../../models/loader.ts'; /** * Handler for chat completion requests */ export class ChatHandler { private containerManager: ContainerManager; private modelLoader: ModelLoader; constructor(containerManager: ContainerManager, modelLoader: ModelLoader) { this.containerManager = containerManager; this.modelLoader = modelLoader; } /** * Handle POST /v1/chat/completions */ public async handleChatCompletion( req: http.IncomingMessage, res: http.ServerResponse, body: IChatCompletionRequest, ): Promise { const modelName = body.model; const isStream = body.stream === true; logger.dim(`Chat completion request for model: ${modelName}`); try { // Find or load the model const container = await this.findOrLoadModel(modelName); if (!container) { this.sendError(res, 404, `Model "${modelName}" not found or could not be loaded`, 'model_not_found'); return; } // Route to streaming or non-streaming handler if (isStream) { await this.handleStreamingCompletion(res, container, body); } else { await this.handleNonStreamingCompletion(res, container, body); } } catch (error) { const message = error instanceof Error ? error.message : String(error); logger.error(`Chat completion error: ${message}`); this.sendError(res, 500, `Chat completion failed: ${message}`, 'server_error'); } } /** * Find container with model or attempt to load it */ private async findOrLoadModel( modelName: string, ): Promise { // First, check if model is already loaded const container = await this.containerManager.findContainerForModel(modelName); if (container) { return container; } // Try to load the model logger.info(`Model ${modelName} not loaded, attempting to load...`); const loadResult = await this.modelLoader.loadModel(modelName); if (!loadResult.success) { logger.error(`Failed to load model: ${loadResult.error}`); return null; } // Find the container again after loading return this.containerManager.findContainerForModel(modelName); } /** * Handle non-streaming chat completion */ private async handleNonStreamingCompletion( res: http.ServerResponse, container: import('../../containers/base-container.ts').BaseContainer, body: IChatCompletionRequest, ): Promise { const response = await container.chatCompletion(body); res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify(response)); } /** * Handle streaming chat completion */ private async handleStreamingCompletion( res: http.ServerResponse, container: import('../../containers/base-container.ts').BaseContainer, body: IChatCompletionRequest, ): Promise { // Set SSE headers res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'X-Accel-Buffering': 'no', }); // Stream chunks to client await container.chatCompletionStream(body, (chunk) => { res.write(`data: ${chunk}\n\n`); }); // Send final done message res.write('data: [DONE]\n\n'); res.end(); } /** * Send error response */ private sendError( res: http.ServerResponse, statusCode: number, message: string, type: string, param?: string, ): void { const error: IApiError = { error: { message, type, param, code: null, }, }; res.writeHead(statusCode, { 'Content-Type': 'application/json' }); res.end(JSON.stringify(error)); } }