initial

2026-01-30 03:16:57 +00:00
commit daaf6559e3
80 changed files with 14430 additions and 0 deletions
--- a/ts/api/handlers/chat.ts
+++ b/ts/api/handlers/chat.ts
@@ -0,0 +1,150 @@
+/**
+ * Chat Completions Handler
+ *
+ * Handles /v1/chat/completions and /v1/completions endpoints.
+ */
+
+import * as http from 'node:http';
+import type {
+  IChatCompletionRequest,
+  IChatCompletionResponse,
+  IApiError,
+} from '../../interfaces/api.ts';
+import { logger } from '../../logger.ts';
+import { ContainerManager } from '../../containers/container-manager.ts';
+import { ModelLoader } from '../../models/loader.ts';
+
+/**
+ * Handler for chat completion requests
+ */
+export class ChatHandler {
+  private containerManager: ContainerManager;
+  private modelLoader: ModelLoader;
+
+  constructor(containerManager: ContainerManager, modelLoader: ModelLoader) {
+    this.containerManager = containerManager;
+    this.modelLoader = modelLoader;
+  }
+
+  /**
+   * Handle POST /v1/chat/completions
+   */
+  public async handleChatCompletion(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    body: IChatCompletionRequest,
+  ): Promise<void> {
+    const modelName = body.model;
+    const isStream = body.stream === true;
+
+    logger.dim(`Chat completion request for model: ${modelName}`);
+
+    try {
+      // Find or load the model
+      const container = await this.findOrLoadModel(modelName);
+      if (!container) {
+        this.sendError(res, 404, `Model "${modelName}" not found or could not be loaded`, 'model_not_found');
+        return;
+      }
+
+      // Route to streaming or non-streaming handler
+      if (isStream) {
+        await this.handleStreamingCompletion(res, container, body);
+      } else {
+        await this.handleNonStreamingCompletion(res, container, body);
+      }
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      logger.error(`Chat completion error: ${message}`);
+      this.sendError(res, 500, `Chat completion failed: ${message}`, 'server_error');
+    }
+  }
+
+  /**
+   * Find container with model or attempt to load it
+   */
+  private async findOrLoadModel(
+    modelName: string,
+  ): Promise<import('../../containers/base-container.ts').BaseContainer | null> {
+    // First, check if model is already loaded
+    const container = await this.containerManager.findContainerForModel(modelName);
+    if (container) {
+      return container;
+    }
+
+    // Try to load the model
+    logger.info(`Model ${modelName} not loaded, attempting to load...`);
+    const loadResult = await this.modelLoader.loadModel(modelName);
+
+    if (!loadResult.success) {
+      logger.error(`Failed to load model: ${loadResult.error}`);
+      return null;
+    }
+
+    // Find the container again after loading
+    return this.containerManager.findContainerForModel(modelName);
+  }
+
+  /**
+   * Handle non-streaming chat completion
+   */
+  private async handleNonStreamingCompletion(
+    res: http.ServerResponse,
+    container: import('../../containers/base-container.ts').BaseContainer,
+    body: IChatCompletionRequest,
+  ): Promise<void> {
+    const response = await container.chatCompletion(body);
+
+    res.writeHead(200, { 'Content-Type': 'application/json' });
+    res.end(JSON.stringify(response));
+  }
+
+  /**
+   * Handle streaming chat completion
+   */
+  private async handleStreamingCompletion(
+    res: http.ServerResponse,
+    container: import('../../containers/base-container.ts').BaseContainer,
+    body: IChatCompletionRequest,
+  ): Promise<void> {
+    // Set SSE headers
+    res.writeHead(200, {
+      'Content-Type': 'text/event-stream',
+      'Cache-Control': 'no-cache',
+      'Connection': 'keep-alive',
+      'X-Accel-Buffering': 'no',
+    });
+
+    // Stream chunks to client
+    await container.chatCompletionStream(body, (chunk) => {
+      res.write(`data: ${chunk}\n\n`);
+    });
+
+    // Send final done message
+    res.write('data: [DONE]\n\n');
+    res.end();
+  }
+
+  /**
+   * Send error response
+   */
+  private sendError(
+    res: http.ServerResponse,
+    statusCode: number,
+    message: string,
+    type: string,
+    param?: string,
+  ): void {
+    const error: IApiError = {
+      error: {
+        message,
+        type,
+        param,
+        code: null,
+      },
+    };
+
+    res.writeHead(statusCode, { 'Content-Type': 'application/json' });
+    res.end(JSON.stringify(error));
+  }
+}
--- a/ts/api/handlers/embeddings.ts
+++ b/ts/api/handlers/embeddings.ts
@@ -0,0 +1,235 @@
+/**
+ * Embeddings Handler
+ *
+ * Handles /v1/embeddings endpoint.
+ */
+
+import * as http from 'node:http';
+import type {
+  IEmbeddingsRequest,
+  IEmbeddingsResponse,
+  IEmbeddingData,
+  IApiError,
+} from '../../interfaces/api.ts';
+import { logger } from '../../logger.ts';
+import { ContainerManager } from '../../containers/container-manager.ts';
+
+/**
+ * Handler for embeddings requests
+ */
+export class EmbeddingsHandler {
+  private containerManager: ContainerManager;
+
+  constructor(containerManager: ContainerManager) {
+    this.containerManager = containerManager;
+  }
+
+  /**
+   * Handle POST /v1/embeddings
+   */
+  public async handleEmbeddings(
+    res: http.ServerResponse,
+    body: IEmbeddingsRequest,
+  ): Promise<void> {
+    const modelName = body.model;
+
+    logger.dim(`Embeddings request for model: ${modelName}`);
+
+    try {
+      // Find container with the embedding model
+      const container = await this.containerManager.findContainerForModel(modelName);
+      if (!container) {
+        this.sendError(res, 404, `Embedding model "${modelName}" not found`, 'model_not_found');
+        return;
+      }
+
+      // Generate embeddings
+      const response = await this.generateEmbeddings(container, body);
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(response));
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      logger.error(`Embeddings error: ${message}`);
+      this.sendError(res, 500, `Embeddings generation failed: ${message}`, 'server_error');
+    }
+  }
+
+  /**
+   * Generate embeddings from container
+   */
+  private async generateEmbeddings(
+    container: import('../../containers/base-container.ts').BaseContainer,
+    request: IEmbeddingsRequest,
+  ): Promise<IEmbeddingsResponse> {
+    const inputs = Array.isArray(request.input) ? request.input : [request.input];
+    const embeddings: IEmbeddingData[] = [];
+    let totalTokens = 0;
+
+    // Generate embeddings for each input
+    for (let i = 0; i < inputs.length; i++) {
+      const input = inputs[i];
+      const embedding = await this.getEmbeddingFromContainer(container, request.model, input);
+
+      embeddings.push({
+        object: 'embedding',
+        embedding: embedding.vector,
+        index: i,
+      });
+
+      totalTokens += embedding.tokenCount;
+    }
+
+    return {
+      object: 'list',
+      data: embeddings,
+      model: request.model,
+      usage: {
+        prompt_tokens: totalTokens,
+        total_tokens: totalTokens,
+      },
+    };
+  }
+
+  /**
+   * Get embedding from container (container-specific implementation)
+   */
+  private async getEmbeddingFromContainer(
+    container: import('../../containers/base-container.ts').BaseContainer,
+    model: string,
+    input: string,
+  ): Promise<{ vector: number[]; tokenCount: number }> {
+    const endpoint = container.getEndpoint();
+    const containerType = container.type;
+
+    // Route to container-specific embedding endpoint
+    if (containerType === 'ollama') {
+      return this.getOllamaEmbedding(endpoint, model, input);
+    } else if (containerType === 'vllm') {
+      return this.getVllmEmbedding(endpoint, model, input);
+    } else if (containerType === 'tgi') {
+      return this.getTgiEmbedding(endpoint, model, input);
+    }
+
+    throw new Error(`Container type ${containerType} does not support embeddings`);
+  }
+
+  /**
+   * Get embedding from Ollama
+   */
+  private async getOllamaEmbedding(
+    endpoint: string,
+    model: string,
+    input: string,
+  ): Promise<{ vector: number[]; tokenCount: number }> {
+    const response = await fetch(`${endpoint}/api/embeddings`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        model,
+        prompt: input,
+      }),
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      throw new Error(`Ollama embedding error: ${errorText}`);
+    }
+
+    const result = await response.json() as { embedding: number[] };
+
+    // Estimate token count (rough approximation: ~4 chars per token)
+    const tokenCount = Math.ceil(input.length / 4);
+
+    return {
+      vector: result.embedding,
+      tokenCount,
+    };
+  }
+
+  /**
+   * Get embedding from vLLM (OpenAI-compatible)
+   */
+  private async getVllmEmbedding(
+    endpoint: string,
+    model: string,
+    input: string,
+  ): Promise<{ vector: number[]; tokenCount: number }> {
+    const response = await fetch(`${endpoint}/v1/embeddings`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        model,
+        input,
+      }),
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      throw new Error(`vLLM embedding error: ${errorText}`);
+    }
+
+    const result = await response.json() as IEmbeddingsResponse;
+
+    return {
+      vector: result.data[0].embedding,
+      tokenCount: result.usage.total_tokens,
+    };
+  }
+
+  /**
+   * Get embedding from TGI
+   */
+  private async getTgiEmbedding(
+    endpoint: string,
+    _model: string,
+    input: string,
+  ): Promise<{ vector: number[]; tokenCount: number }> {
+    // TGI uses /embed endpoint
+    const response = await fetch(`${endpoint}/embed`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        inputs: input,
+      }),
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      throw new Error(`TGI embedding error: ${errorText}`);
+    }
+
+    const result = await response.json() as number[][];
+
+    // Estimate token count
+    const tokenCount = Math.ceil(input.length / 4);
+
+    return {
+      vector: result[0],
+      tokenCount,
+    };
+  }
+
+  /**
+   * Send error response
+   */
+  private sendError(
+    res: http.ServerResponse,
+    statusCode: number,
+    message: string,
+    type: string,
+    param?: string,
+  ): void {
+    const error: IApiError = {
+      error: {
+        message,
+        type,
+        param,
+        code: null,
+      },
+    };
+
+    res.writeHead(statusCode, { 'Content-Type': 'application/json' });
+    res.end(JSON.stringify(error));
+  }
+}
--- a/ts/api/handlers/index.ts
+++ b/ts/api/handlers/index.ts
@@ -0,0 +1,9 @@
+/**
+ * API Handlers
+ *
+ * OpenAI-compatible request handlers.
+ */
+
+export { ChatHandler } from './chat.ts';
+export { ModelsHandler } from './models.ts';
+export { EmbeddingsHandler } from './embeddings.ts';
--- a/ts/api/handlers/models.ts
+++ b/ts/api/handlers/models.ts
@@ -0,0 +1,136 @@
+/**
+ * Models Handler
+ *
+ * Handles /v1/models endpoints.
+ */
+
+import * as http from 'node:http';
+import type {
+  IModelInfo,
+  IListModelsResponse,
+  IApiError,
+} from '../../interfaces/api.ts';
+import { logger } from '../../logger.ts';
+import { ContainerManager } from '../../containers/container-manager.ts';
+import { ModelRegistry } from '../../models/registry.ts';
+
+/**
+ * Handler for model-related requests
+ */
+export class ModelsHandler {
+  private containerManager: ContainerManager;
+  private modelRegistry: ModelRegistry;
+
+  constructor(containerManager: ContainerManager, modelRegistry: ModelRegistry) {
+    this.containerManager = containerManager;
+    this.modelRegistry = modelRegistry;
+  }
+
+  /**
+   * Handle GET /v1/models
+   */
+  public async handleListModels(res: http.ServerResponse): Promise<void> {
+    try {
+      const models = await this.getAvailableModels();
+
+      const response: IListModelsResponse = {
+        object: 'list',
+        data: models,
+      };
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(response));
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      logger.error(`Failed to list models: ${message}`);
+      this.sendError(res, 500, `Failed to list models: ${message}`, 'server_error');
+    }
+  }
+
+  /**
+   * Handle GET /v1/models/:model
+   */
+  public async handleGetModel(res: http.ServerResponse, modelId: string): Promise<void> {
+    try {
+      const models = await this.getAvailableModels();
+      const model = models.find((m) => m.id === modelId);
+
+      if (!model) {
+        this.sendError(res, 404, `Model "${modelId}" not found`, 'model_not_found');
+        return;
+      }
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(model));
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      logger.error(`Failed to get model info: ${message}`);
+      this.sendError(res, 500, `Failed to get model info: ${message}`, 'server_error');
+    }
+  }
+
+  /**
+   * Get all available models from containers and greenlist
+   */
+  private async getAvailableModels(): Promise<IModelInfo[]> {
+    const models: IModelInfo[] = [];
+    const seen = new Set<string>();
+    const timestamp = Math.floor(Date.now() / 1000);
+
+    // Get models from running containers
+    const containerModels = await this.containerManager.getAllAvailableModels();
+    for (const [modelId, modelInfo] of containerModels) {
+      if (!seen.has(modelId)) {
+        seen.add(modelId);
+        models.push({
+          id: modelId,
+          object: 'model',
+          created: timestamp,
+          owned_by: `modelgrid-${modelInfo.container}`,
+        });
+      }
+    }
+
+    // Add greenlit models that aren't loaded yet
+    const greenlitModels = await this.modelRegistry.getAllGreenlitModels();
+    for (const greenlit of greenlitModels) {
+      if (!seen.has(greenlit.name)) {
+        seen.add(greenlit.name);
+        models.push({
+          id: greenlit.name,
+          object: 'model',
+          created: timestamp,
+          owned_by: `modelgrid-${greenlit.container}`,
+        });
+      }
+    }
+
+    // Sort alphabetically
+    models.sort((a, b) => a.id.localeCompare(b.id));
+
+    return models;
+  }
+
+  /**
+   * Send error response
+   */
+  private sendError(
+    res: http.ServerResponse,
+    statusCode: number,
+    message: string,
+    type: string,
+    param?: string,
+  ): void {
+    const error: IApiError = {
+      error: {
+        message,
+        type,
+        param,
+        code: null,
+      },
+    };
+
+    res.writeHead(statusCode, { 'Content-Type': 'application/json' });
+    res.end(JSON.stringify(error));
+  }
+}