initial
This commit is contained in:
150
ts/api/handlers/chat.ts
Normal file
150
ts/api/handlers/chat.ts
Normal file
@@ -0,0 +1,150 @@
|
||||
/**
|
||||
* Chat Completions Handler
|
||||
*
|
||||
* Handles /v1/chat/completions and /v1/completions endpoints.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import type {
|
||||
IChatCompletionRequest,
|
||||
IChatCompletionResponse,
|
||||
IApiError,
|
||||
} from '../../interfaces/api.ts';
|
||||
import { logger } from '../../logger.ts';
|
||||
import { ContainerManager } from '../../containers/container-manager.ts';
|
||||
import { ModelLoader } from '../../models/loader.ts';
|
||||
|
||||
/**
|
||||
* Handler for chat completion requests
|
||||
*/
|
||||
export class ChatHandler {
|
||||
private containerManager: ContainerManager;
|
||||
private modelLoader: ModelLoader;
|
||||
|
||||
constructor(containerManager: ContainerManager, modelLoader: ModelLoader) {
|
||||
this.containerManager = containerManager;
|
||||
this.modelLoader = modelLoader;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle POST /v1/chat/completions
|
||||
*/
|
||||
public async handleChatCompletion(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
body: IChatCompletionRequest,
|
||||
): Promise<void> {
|
||||
const modelName = body.model;
|
||||
const isStream = body.stream === true;
|
||||
|
||||
logger.dim(`Chat completion request for model: ${modelName}`);
|
||||
|
||||
try {
|
||||
// Find or load the model
|
||||
const container = await this.findOrLoadModel(modelName);
|
||||
if (!container) {
|
||||
this.sendError(res, 404, `Model "${modelName}" not found or could not be loaded`, 'model_not_found');
|
||||
return;
|
||||
}
|
||||
|
||||
// Route to streaming or non-streaming handler
|
||||
if (isStream) {
|
||||
await this.handleStreamingCompletion(res, container, body);
|
||||
} else {
|
||||
await this.handleNonStreamingCompletion(res, container, body);
|
||||
}
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error(`Chat completion error: ${message}`);
|
||||
this.sendError(res, 500, `Chat completion failed: ${message}`, 'server_error');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find container with model or attempt to load it
|
||||
*/
|
||||
private async findOrLoadModel(
|
||||
modelName: string,
|
||||
): Promise<import('../../containers/base-container.ts').BaseContainer | null> {
|
||||
// First, check if model is already loaded
|
||||
const container = await this.containerManager.findContainerForModel(modelName);
|
||||
if (container) {
|
||||
return container;
|
||||
}
|
||||
|
||||
// Try to load the model
|
||||
logger.info(`Model ${modelName} not loaded, attempting to load...`);
|
||||
const loadResult = await this.modelLoader.loadModel(modelName);
|
||||
|
||||
if (!loadResult.success) {
|
||||
logger.error(`Failed to load model: ${loadResult.error}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Find the container again after loading
|
||||
return this.containerManager.findContainerForModel(modelName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle non-streaming chat completion
|
||||
*/
|
||||
private async handleNonStreamingCompletion(
|
||||
res: http.ServerResponse,
|
||||
container: import('../../containers/base-container.ts').BaseContainer,
|
||||
body: IChatCompletionRequest,
|
||||
): Promise<void> {
|
||||
const response = await container.chatCompletion(body);
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(response));
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle streaming chat completion
|
||||
*/
|
||||
private async handleStreamingCompletion(
|
||||
res: http.ServerResponse,
|
||||
container: import('../../containers/base-container.ts').BaseContainer,
|
||||
body: IChatCompletionRequest,
|
||||
): Promise<void> {
|
||||
// Set SSE headers
|
||||
res.writeHead(200, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'X-Accel-Buffering': 'no',
|
||||
});
|
||||
|
||||
// Stream chunks to client
|
||||
await container.chatCompletionStream(body, (chunk) => {
|
||||
res.write(`data: ${chunk}\n\n`);
|
||||
});
|
||||
|
||||
// Send final done message
|
||||
res.write('data: [DONE]\n\n');
|
||||
res.end();
|
||||
}
|
||||
|
||||
/**
|
||||
* Send error response
|
||||
*/
|
||||
private sendError(
|
||||
res: http.ServerResponse,
|
||||
statusCode: number,
|
||||
message: string,
|
||||
type: string,
|
||||
param?: string,
|
||||
): void {
|
||||
const error: IApiError = {
|
||||
error: {
|
||||
message,
|
||||
type,
|
||||
param,
|
||||
code: null,
|
||||
},
|
||||
};
|
||||
|
||||
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(error));
|
||||
}
|
||||
}
|
||||
235
ts/api/handlers/embeddings.ts
Normal file
235
ts/api/handlers/embeddings.ts
Normal file
@@ -0,0 +1,235 @@
|
||||
/**
|
||||
* Embeddings Handler
|
||||
*
|
||||
* Handles /v1/embeddings endpoint.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import type {
|
||||
IEmbeddingsRequest,
|
||||
IEmbeddingsResponse,
|
||||
IEmbeddingData,
|
||||
IApiError,
|
||||
} from '../../interfaces/api.ts';
|
||||
import { logger } from '../../logger.ts';
|
||||
import { ContainerManager } from '../../containers/container-manager.ts';
|
||||
|
||||
/**
|
||||
* Handler for embeddings requests
|
||||
*/
|
||||
export class EmbeddingsHandler {
|
||||
private containerManager: ContainerManager;
|
||||
|
||||
constructor(containerManager: ContainerManager) {
|
||||
this.containerManager = containerManager;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle POST /v1/embeddings
|
||||
*/
|
||||
public async handleEmbeddings(
|
||||
res: http.ServerResponse,
|
||||
body: IEmbeddingsRequest,
|
||||
): Promise<void> {
|
||||
const modelName = body.model;
|
||||
|
||||
logger.dim(`Embeddings request for model: ${modelName}`);
|
||||
|
||||
try {
|
||||
// Find container with the embedding model
|
||||
const container = await this.containerManager.findContainerForModel(modelName);
|
||||
if (!container) {
|
||||
this.sendError(res, 404, `Embedding model "${modelName}" not found`, 'model_not_found');
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate embeddings
|
||||
const response = await this.generateEmbeddings(container, body);
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(response));
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error(`Embeddings error: ${message}`);
|
||||
this.sendError(res, 500, `Embeddings generation failed: ${message}`, 'server_error');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embeddings from container
|
||||
*/
|
||||
private async generateEmbeddings(
|
||||
container: import('../../containers/base-container.ts').BaseContainer,
|
||||
request: IEmbeddingsRequest,
|
||||
): Promise<IEmbeddingsResponse> {
|
||||
const inputs = Array.isArray(request.input) ? request.input : [request.input];
|
||||
const embeddings: IEmbeddingData[] = [];
|
||||
let totalTokens = 0;
|
||||
|
||||
// Generate embeddings for each input
|
||||
for (let i = 0; i < inputs.length; i++) {
|
||||
const input = inputs[i];
|
||||
const embedding = await this.getEmbeddingFromContainer(container, request.model, input);
|
||||
|
||||
embeddings.push({
|
||||
object: 'embedding',
|
||||
embedding: embedding.vector,
|
||||
index: i,
|
||||
});
|
||||
|
||||
totalTokens += embedding.tokenCount;
|
||||
}
|
||||
|
||||
return {
|
||||
object: 'list',
|
||||
data: embeddings,
|
||||
model: request.model,
|
||||
usage: {
|
||||
prompt_tokens: totalTokens,
|
||||
total_tokens: totalTokens,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding from container (container-specific implementation)
|
||||
*/
|
||||
private async getEmbeddingFromContainer(
|
||||
container: import('../../containers/base-container.ts').BaseContainer,
|
||||
model: string,
|
||||
input: string,
|
||||
): Promise<{ vector: number[]; tokenCount: number }> {
|
||||
const endpoint = container.getEndpoint();
|
||||
const containerType = container.type;
|
||||
|
||||
// Route to container-specific embedding endpoint
|
||||
if (containerType === 'ollama') {
|
||||
return this.getOllamaEmbedding(endpoint, model, input);
|
||||
} else if (containerType === 'vllm') {
|
||||
return this.getVllmEmbedding(endpoint, model, input);
|
||||
} else if (containerType === 'tgi') {
|
||||
return this.getTgiEmbedding(endpoint, model, input);
|
||||
}
|
||||
|
||||
throw new Error(`Container type ${containerType} does not support embeddings`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding from Ollama
|
||||
*/
|
||||
private async getOllamaEmbedding(
|
||||
endpoint: string,
|
||||
model: string,
|
||||
input: string,
|
||||
): Promise<{ vector: number[]; tokenCount: number }> {
|
||||
const response = await fetch(`${endpoint}/api/embeddings`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
prompt: input,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`Ollama embedding error: ${errorText}`);
|
||||
}
|
||||
|
||||
const result = await response.json() as { embedding: number[] };
|
||||
|
||||
// Estimate token count (rough approximation: ~4 chars per token)
|
||||
const tokenCount = Math.ceil(input.length / 4);
|
||||
|
||||
return {
|
||||
vector: result.embedding,
|
||||
tokenCount,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding from vLLM (OpenAI-compatible)
|
||||
*/
|
||||
private async getVllmEmbedding(
|
||||
endpoint: string,
|
||||
model: string,
|
||||
input: string,
|
||||
): Promise<{ vector: number[]; tokenCount: number }> {
|
||||
const response = await fetch(`${endpoint}/v1/embeddings`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
input,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`vLLM embedding error: ${errorText}`);
|
||||
}
|
||||
|
||||
const result = await response.json() as IEmbeddingsResponse;
|
||||
|
||||
return {
|
||||
vector: result.data[0].embedding,
|
||||
tokenCount: result.usage.total_tokens,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding from TGI
|
||||
*/
|
||||
private async getTgiEmbedding(
|
||||
endpoint: string,
|
||||
_model: string,
|
||||
input: string,
|
||||
): Promise<{ vector: number[]; tokenCount: number }> {
|
||||
// TGI uses /embed endpoint
|
||||
const response = await fetch(`${endpoint}/embed`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
inputs: input,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`TGI embedding error: ${errorText}`);
|
||||
}
|
||||
|
||||
const result = await response.json() as number[][];
|
||||
|
||||
// Estimate token count
|
||||
const tokenCount = Math.ceil(input.length / 4);
|
||||
|
||||
return {
|
||||
vector: result[0],
|
||||
tokenCount,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Send error response
|
||||
*/
|
||||
private sendError(
|
||||
res: http.ServerResponse,
|
||||
statusCode: number,
|
||||
message: string,
|
||||
type: string,
|
||||
param?: string,
|
||||
): void {
|
||||
const error: IApiError = {
|
||||
error: {
|
||||
message,
|
||||
type,
|
||||
param,
|
||||
code: null,
|
||||
},
|
||||
};
|
||||
|
||||
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(error));
|
||||
}
|
||||
}
|
||||
9
ts/api/handlers/index.ts
Normal file
9
ts/api/handlers/index.ts
Normal file
@@ -0,0 +1,9 @@
|
||||
/**
|
||||
* API Handlers
|
||||
*
|
||||
* OpenAI-compatible request handlers.
|
||||
*/
|
||||
|
||||
export { ChatHandler } from './chat.ts';
|
||||
export { ModelsHandler } from './models.ts';
|
||||
export { EmbeddingsHandler } from './embeddings.ts';
|
||||
136
ts/api/handlers/models.ts
Normal file
136
ts/api/handlers/models.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
/**
|
||||
* Models Handler
|
||||
*
|
||||
* Handles /v1/models endpoints.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import type {
|
||||
IModelInfo,
|
||||
IListModelsResponse,
|
||||
IApiError,
|
||||
} from '../../interfaces/api.ts';
|
||||
import { logger } from '../../logger.ts';
|
||||
import { ContainerManager } from '../../containers/container-manager.ts';
|
||||
import { ModelRegistry } from '../../models/registry.ts';
|
||||
|
||||
/**
|
||||
* Handler for model-related requests
|
||||
*/
|
||||
export class ModelsHandler {
|
||||
private containerManager: ContainerManager;
|
||||
private modelRegistry: ModelRegistry;
|
||||
|
||||
constructor(containerManager: ContainerManager, modelRegistry: ModelRegistry) {
|
||||
this.containerManager = containerManager;
|
||||
this.modelRegistry = modelRegistry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle GET /v1/models
|
||||
*/
|
||||
public async handleListModels(res: http.ServerResponse): Promise<void> {
|
||||
try {
|
||||
const models = await this.getAvailableModels();
|
||||
|
||||
const response: IListModelsResponse = {
|
||||
object: 'list',
|
||||
data: models,
|
||||
};
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(response));
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error(`Failed to list models: ${message}`);
|
||||
this.sendError(res, 500, `Failed to list models: ${message}`, 'server_error');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle GET /v1/models/:model
|
||||
*/
|
||||
public async handleGetModel(res: http.ServerResponse, modelId: string): Promise<void> {
|
||||
try {
|
||||
const models = await this.getAvailableModels();
|
||||
const model = models.find((m) => m.id === modelId);
|
||||
|
||||
if (!model) {
|
||||
this.sendError(res, 404, `Model "${modelId}" not found`, 'model_not_found');
|
||||
return;
|
||||
}
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(model));
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error(`Failed to get model info: ${message}`);
|
||||
this.sendError(res, 500, `Failed to get model info: ${message}`, 'server_error');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all available models from containers and greenlist
|
||||
*/
|
||||
private async getAvailableModels(): Promise<IModelInfo[]> {
|
||||
const models: IModelInfo[] = [];
|
||||
const seen = new Set<string>();
|
||||
const timestamp = Math.floor(Date.now() / 1000);
|
||||
|
||||
// Get models from running containers
|
||||
const containerModels = await this.containerManager.getAllAvailableModels();
|
||||
for (const [modelId, modelInfo] of containerModels) {
|
||||
if (!seen.has(modelId)) {
|
||||
seen.add(modelId);
|
||||
models.push({
|
||||
id: modelId,
|
||||
object: 'model',
|
||||
created: timestamp,
|
||||
owned_by: `modelgrid-${modelInfo.container}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Add greenlit models that aren't loaded yet
|
||||
const greenlitModels = await this.modelRegistry.getAllGreenlitModels();
|
||||
for (const greenlit of greenlitModels) {
|
||||
if (!seen.has(greenlit.name)) {
|
||||
seen.add(greenlit.name);
|
||||
models.push({
|
||||
id: greenlit.name,
|
||||
object: 'model',
|
||||
created: timestamp,
|
||||
owned_by: `modelgrid-${greenlit.container}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Sort alphabetically
|
||||
models.sort((a, b) => a.id.localeCompare(b.id));
|
||||
|
||||
return models;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send error response
|
||||
*/
|
||||
private sendError(
|
||||
res: http.ServerResponse,
|
||||
statusCode: number,
|
||||
message: string,
|
||||
type: string,
|
||||
param?: string,
|
||||
): void {
|
||||
const error: IApiError = {
|
||||
error: {
|
||||
message,
|
||||
type,
|
||||
param,
|
||||
code: null,
|
||||
},
|
||||
};
|
||||
|
||||
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(error));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user