151 lines
4.1 KiB
TypeScript
151 lines
4.1 KiB
TypeScript
|
|
/**
|
||
|
|
* Chat Completions Handler
|
||
|
|
*
|
||
|
|
* Handles /v1/chat/completions and /v1/completions endpoints.
|
||
|
|
*/
|
||
|
|
|
||
|
|
import * as http from 'node:http';
|
||
|
|
import type {
|
||
|
|
IChatCompletionRequest,
|
||
|
|
IChatCompletionResponse,
|
||
|
|
IApiError,
|
||
|
|
} from '../../interfaces/api.ts';
|
||
|
|
import { logger } from '../../logger.ts';
|
||
|
|
import { ContainerManager } from '../../containers/container-manager.ts';
|
||
|
|
import { ModelLoader } from '../../models/loader.ts';
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Handler for chat completion requests
|
||
|
|
*/
|
||
|
|
export class ChatHandler {
|
||
|
|
private containerManager: ContainerManager;
|
||
|
|
private modelLoader: ModelLoader;
|
||
|
|
|
||
|
|
constructor(containerManager: ContainerManager, modelLoader: ModelLoader) {
|
||
|
|
this.containerManager = containerManager;
|
||
|
|
this.modelLoader = modelLoader;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Handle POST /v1/chat/completions
|
||
|
|
*/
|
||
|
|
public async handleChatCompletion(
|
||
|
|
req: http.IncomingMessage,
|
||
|
|
res: http.ServerResponse,
|
||
|
|
body: IChatCompletionRequest,
|
||
|
|
): Promise<void> {
|
||
|
|
const modelName = body.model;
|
||
|
|
const isStream = body.stream === true;
|
||
|
|
|
||
|
|
logger.dim(`Chat completion request for model: ${modelName}`);
|
||
|
|
|
||
|
|
try {
|
||
|
|
// Find or load the model
|
||
|
|
const container = await this.findOrLoadModel(modelName);
|
||
|
|
if (!container) {
|
||
|
|
this.sendError(res, 404, `Model "${modelName}" not found or could not be loaded`, 'model_not_found');
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Route to streaming or non-streaming handler
|
||
|
|
if (isStream) {
|
||
|
|
await this.handleStreamingCompletion(res, container, body);
|
||
|
|
} else {
|
||
|
|
await this.handleNonStreamingCompletion(res, container, body);
|
||
|
|
}
|
||
|
|
} catch (error) {
|
||
|
|
const message = error instanceof Error ? error.message : String(error);
|
||
|
|
logger.error(`Chat completion error: ${message}`);
|
||
|
|
this.sendError(res, 500, `Chat completion failed: ${message}`, 'server_error');
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Find container with model or attempt to load it
|
||
|
|
*/
|
||
|
|
private async findOrLoadModel(
|
||
|
|
modelName: string,
|
||
|
|
): Promise<import('../../containers/base-container.ts').BaseContainer | null> {
|
||
|
|
// First, check if model is already loaded
|
||
|
|
const container = await this.containerManager.findContainerForModel(modelName);
|
||
|
|
if (container) {
|
||
|
|
return container;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Try to load the model
|
||
|
|
logger.info(`Model ${modelName} not loaded, attempting to load...`);
|
||
|
|
const loadResult = await this.modelLoader.loadModel(modelName);
|
||
|
|
|
||
|
|
if (!loadResult.success) {
|
||
|
|
logger.error(`Failed to load model: ${loadResult.error}`);
|
||
|
|
return null;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Find the container again after loading
|
||
|
|
return this.containerManager.findContainerForModel(modelName);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Handle non-streaming chat completion
|
||
|
|
*/
|
||
|
|
private async handleNonStreamingCompletion(
|
||
|
|
res: http.ServerResponse,
|
||
|
|
container: import('../../containers/base-container.ts').BaseContainer,
|
||
|
|
body: IChatCompletionRequest,
|
||
|
|
): Promise<void> {
|
||
|
|
const response = await container.chatCompletion(body);
|
||
|
|
|
||
|
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||
|
|
res.end(JSON.stringify(response));
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Handle streaming chat completion
|
||
|
|
*/
|
||
|
|
private async handleStreamingCompletion(
|
||
|
|
res: http.ServerResponse,
|
||
|
|
container: import('../../containers/base-container.ts').BaseContainer,
|
||
|
|
body: IChatCompletionRequest,
|
||
|
|
): Promise<void> {
|
||
|
|
// Set SSE headers
|
||
|
|
res.writeHead(200, {
|
||
|
|
'Content-Type': 'text/event-stream',
|
||
|
|
'Cache-Control': 'no-cache',
|
||
|
|
'Connection': 'keep-alive',
|
||
|
|
'X-Accel-Buffering': 'no',
|
||
|
|
});
|
||
|
|
|
||
|
|
// Stream chunks to client
|
||
|
|
await container.chatCompletionStream(body, (chunk) => {
|
||
|
|
res.write(`data: ${chunk}\n\n`);
|
||
|
|
});
|
||
|
|
|
||
|
|
// Send final done message
|
||
|
|
res.write('data: [DONE]\n\n');
|
||
|
|
res.end();
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Send error response
|
||
|
|
*/
|
||
|
|
private sendError(
|
||
|
|
res: http.ServerResponse,
|
||
|
|
statusCode: number,
|
||
|
|
message: string,
|
||
|
|
type: string,
|
||
|
|
param?: string,
|
||
|
|
): void {
|
||
|
|
const error: IApiError = {
|
||
|
|
error: {
|
||
|
|
message,
|
||
|
|
type,
|
||
|
|
param,
|
||
|
|
code: null,
|
||
|
|
},
|
||
|
|
};
|
||
|
|
|
||
|
|
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||
|
|
res.end(JSON.stringify(error));
|
||
|
|
}
|
||
|
|
}
|