initial
Some checks failed
CI / Type Check & Lint (push) Failing after 5s
CI / Build Test (Current Platform) (push) Failing after 5s
CI / Build All Platforms (push) Successful in 49s

This commit is contained in:
2026-01-30 03:16:57 +00:00
commit daaf6559e3
80 changed files with 14430 additions and 0 deletions

150
ts/api/handlers/chat.ts Normal file
View File

@@ -0,0 +1,150 @@
/**
* Chat Completions Handler
*
* Handles /v1/chat/completions and /v1/completions endpoints.
*/
import * as http from 'node:http';
import type {
IChatCompletionRequest,
IChatCompletionResponse,
IApiError,
} from '../../interfaces/api.ts';
import { logger } from '../../logger.ts';
import { ContainerManager } from '../../containers/container-manager.ts';
import { ModelLoader } from '../../models/loader.ts';
/**
* Handler for chat completion requests
*/
export class ChatHandler {
private containerManager: ContainerManager;
private modelLoader: ModelLoader;
constructor(containerManager: ContainerManager, modelLoader: ModelLoader) {
this.containerManager = containerManager;
this.modelLoader = modelLoader;
}
/**
* Handle POST /v1/chat/completions
*/
public async handleChatCompletion(
req: http.IncomingMessage,
res: http.ServerResponse,
body: IChatCompletionRequest,
): Promise<void> {
const modelName = body.model;
const isStream = body.stream === true;
logger.dim(`Chat completion request for model: ${modelName}`);
try {
// Find or load the model
const container = await this.findOrLoadModel(modelName);
if (!container) {
this.sendError(res, 404, `Model "${modelName}" not found or could not be loaded`, 'model_not_found');
return;
}
// Route to streaming or non-streaming handler
if (isStream) {
await this.handleStreamingCompletion(res, container, body);
} else {
await this.handleNonStreamingCompletion(res, container, body);
}
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
logger.error(`Chat completion error: ${message}`);
this.sendError(res, 500, `Chat completion failed: ${message}`, 'server_error');
}
}
/**
* Find container with model or attempt to load it
*/
private async findOrLoadModel(
modelName: string,
): Promise<import('../../containers/base-container.ts').BaseContainer | null> {
// First, check if model is already loaded
const container = await this.containerManager.findContainerForModel(modelName);
if (container) {
return container;
}
// Try to load the model
logger.info(`Model ${modelName} not loaded, attempting to load...`);
const loadResult = await this.modelLoader.loadModel(modelName);
if (!loadResult.success) {
logger.error(`Failed to load model: ${loadResult.error}`);
return null;
}
// Find the container again after loading
return this.containerManager.findContainerForModel(modelName);
}
/**
* Handle non-streaming chat completion
*/
private async handleNonStreamingCompletion(
res: http.ServerResponse,
container: import('../../containers/base-container.ts').BaseContainer,
body: IChatCompletionRequest,
): Promise<void> {
const response = await container.chatCompletion(body);
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(response));
}
/**
* Handle streaming chat completion
*/
private async handleStreamingCompletion(
res: http.ServerResponse,
container: import('../../containers/base-container.ts').BaseContainer,
body: IChatCompletionRequest,
): Promise<void> {
// Set SSE headers
res.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'X-Accel-Buffering': 'no',
});
// Stream chunks to client
await container.chatCompletionStream(body, (chunk) => {
res.write(`data: ${chunk}\n\n`);
});
// Send final done message
res.write('data: [DONE]\n\n');
res.end();
}
/**
* Send error response
*/
private sendError(
res: http.ServerResponse,
statusCode: number,
message: string,
type: string,
param?: string,
): void {
const error: IApiError = {
error: {
message,
type,
param,
code: null,
},
};
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(error));
}
}

View File

@@ -0,0 +1,235 @@
/**
* Embeddings Handler
*
* Handles /v1/embeddings endpoint.
*/
import * as http from 'node:http';
import type {
IEmbeddingsRequest,
IEmbeddingsResponse,
IEmbeddingData,
IApiError,
} from '../../interfaces/api.ts';
import { logger } from '../../logger.ts';
import { ContainerManager } from '../../containers/container-manager.ts';
/**
* Handler for embeddings requests
*/
export class EmbeddingsHandler {
private containerManager: ContainerManager;
constructor(containerManager: ContainerManager) {
this.containerManager = containerManager;
}
/**
* Handle POST /v1/embeddings
*/
public async handleEmbeddings(
res: http.ServerResponse,
body: IEmbeddingsRequest,
): Promise<void> {
const modelName = body.model;
logger.dim(`Embeddings request for model: ${modelName}`);
try {
// Find container with the embedding model
const container = await this.containerManager.findContainerForModel(modelName);
if (!container) {
this.sendError(res, 404, `Embedding model "${modelName}" not found`, 'model_not_found');
return;
}
// Generate embeddings
const response = await this.generateEmbeddings(container, body);
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(response));
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
logger.error(`Embeddings error: ${message}`);
this.sendError(res, 500, `Embeddings generation failed: ${message}`, 'server_error');
}
}
/**
* Generate embeddings from container
*/
private async generateEmbeddings(
container: import('../../containers/base-container.ts').BaseContainer,
request: IEmbeddingsRequest,
): Promise<IEmbeddingsResponse> {
const inputs = Array.isArray(request.input) ? request.input : [request.input];
const embeddings: IEmbeddingData[] = [];
let totalTokens = 0;
// Generate embeddings for each input
for (let i = 0; i < inputs.length; i++) {
const input = inputs[i];
const embedding = await this.getEmbeddingFromContainer(container, request.model, input);
embeddings.push({
object: 'embedding',
embedding: embedding.vector,
index: i,
});
totalTokens += embedding.tokenCount;
}
return {
object: 'list',
data: embeddings,
model: request.model,
usage: {
prompt_tokens: totalTokens,
total_tokens: totalTokens,
},
};
}
/**
* Get embedding from container (container-specific implementation)
*/
private async getEmbeddingFromContainer(
container: import('../../containers/base-container.ts').BaseContainer,
model: string,
input: string,
): Promise<{ vector: number[]; tokenCount: number }> {
const endpoint = container.getEndpoint();
const containerType = container.type;
// Route to container-specific embedding endpoint
if (containerType === 'ollama') {
return this.getOllamaEmbedding(endpoint, model, input);
} else if (containerType === 'vllm') {
return this.getVllmEmbedding(endpoint, model, input);
} else if (containerType === 'tgi') {
return this.getTgiEmbedding(endpoint, model, input);
}
throw new Error(`Container type ${containerType} does not support embeddings`);
}
/**
* Get embedding from Ollama
*/
private async getOllamaEmbedding(
endpoint: string,
model: string,
input: string,
): Promise<{ vector: number[]; tokenCount: number }> {
const response = await fetch(`${endpoint}/api/embeddings`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
prompt: input,
}),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Ollama embedding error: ${errorText}`);
}
const result = await response.json() as { embedding: number[] };
// Estimate token count (rough approximation: ~4 chars per token)
const tokenCount = Math.ceil(input.length / 4);
return {
vector: result.embedding,
tokenCount,
};
}
/**
* Get embedding from vLLM (OpenAI-compatible)
*/
private async getVllmEmbedding(
endpoint: string,
model: string,
input: string,
): Promise<{ vector: number[]; tokenCount: number }> {
const response = await fetch(`${endpoint}/v1/embeddings`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
input,
}),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`vLLM embedding error: ${errorText}`);
}
const result = await response.json() as IEmbeddingsResponse;
return {
vector: result.data[0].embedding,
tokenCount: result.usage.total_tokens,
};
}
/**
* Get embedding from TGI
*/
private async getTgiEmbedding(
endpoint: string,
_model: string,
input: string,
): Promise<{ vector: number[]; tokenCount: number }> {
// TGI uses /embed endpoint
const response = await fetch(`${endpoint}/embed`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
inputs: input,
}),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`TGI embedding error: ${errorText}`);
}
const result = await response.json() as number[][];
// Estimate token count
const tokenCount = Math.ceil(input.length / 4);
return {
vector: result[0],
tokenCount,
};
}
/**
* Send error response
*/
private sendError(
res: http.ServerResponse,
statusCode: number,
message: string,
type: string,
param?: string,
): void {
const error: IApiError = {
error: {
message,
type,
param,
code: null,
},
};
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(error));
}
}

9
ts/api/handlers/index.ts Normal file
View File

@@ -0,0 +1,9 @@
/**
* API Handlers
*
* OpenAI-compatible request handlers.
*/
export { ChatHandler } from './chat.ts';
export { ModelsHandler } from './models.ts';
export { EmbeddingsHandler } from './embeddings.ts';

136
ts/api/handlers/models.ts Normal file
View File

@@ -0,0 +1,136 @@
/**
* Models Handler
*
* Handles /v1/models endpoints.
*/
import * as http from 'node:http';
import type {
IModelInfo,
IListModelsResponse,
IApiError,
} from '../../interfaces/api.ts';
import { logger } from '../../logger.ts';
import { ContainerManager } from '../../containers/container-manager.ts';
import { ModelRegistry } from '../../models/registry.ts';
/**
* Handler for model-related requests
*/
export class ModelsHandler {
private containerManager: ContainerManager;
private modelRegistry: ModelRegistry;
constructor(containerManager: ContainerManager, modelRegistry: ModelRegistry) {
this.containerManager = containerManager;
this.modelRegistry = modelRegistry;
}
/**
* Handle GET /v1/models
*/
public async handleListModels(res: http.ServerResponse): Promise<void> {
try {
const models = await this.getAvailableModels();
const response: IListModelsResponse = {
object: 'list',
data: models,
};
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(response));
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
logger.error(`Failed to list models: ${message}`);
this.sendError(res, 500, `Failed to list models: ${message}`, 'server_error');
}
}
/**
* Handle GET /v1/models/:model
*/
public async handleGetModel(res: http.ServerResponse, modelId: string): Promise<void> {
try {
const models = await this.getAvailableModels();
const model = models.find((m) => m.id === modelId);
if (!model) {
this.sendError(res, 404, `Model "${modelId}" not found`, 'model_not_found');
return;
}
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(model));
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
logger.error(`Failed to get model info: ${message}`);
this.sendError(res, 500, `Failed to get model info: ${message}`, 'server_error');
}
}
/**
* Get all available models from containers and greenlist
*/
private async getAvailableModels(): Promise<IModelInfo[]> {
const models: IModelInfo[] = [];
const seen = new Set<string>();
const timestamp = Math.floor(Date.now() / 1000);
// Get models from running containers
const containerModels = await this.containerManager.getAllAvailableModels();
for (const [modelId, modelInfo] of containerModels) {
if (!seen.has(modelId)) {
seen.add(modelId);
models.push({
id: modelId,
object: 'model',
created: timestamp,
owned_by: `modelgrid-${modelInfo.container}`,
});
}
}
// Add greenlit models that aren't loaded yet
const greenlitModels = await this.modelRegistry.getAllGreenlitModels();
for (const greenlit of greenlitModels) {
if (!seen.has(greenlit.name)) {
seen.add(greenlit.name);
models.push({
id: greenlit.name,
object: 'model',
created: timestamp,
owned_by: `modelgrid-${greenlit.container}`,
});
}
}
// Sort alphabetically
models.sort((a, b) => a.id.localeCompare(b.id));
return models;
}
/**
* Send error response
*/
private sendError(
res: http.ServerResponse,
statusCode: number,
message: string,
type: string,
param?: string,
): void {
const error: IApiError = {
error: {
message,
type,
param,
code: null,
},
};
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(error));
}
}