initial

2026-01-30 03:16:57 +00:00
commit daaf6559e3
80 changed files with 14430 additions and 0 deletions
--- a/ts/api/handlers/chat.ts
+++ b/ts/api/handlers/chat.ts
@@ -0,0 +1,150 @@
+/**
+ * Chat Completions Handler
+ *
+ * Handles /v1/chat/completions and /v1/completions endpoints.
+ */
+
+import * as http from 'node:http';
+import type {
+  IChatCompletionRequest,
+  IChatCompletionResponse,
+  IApiError,
+} from '../../interfaces/api.ts';
+import { logger } from '../../logger.ts';
+import { ContainerManager } from '../../containers/container-manager.ts';
+import { ModelLoader } from '../../models/loader.ts';
+
+/**
+ * Handler for chat completion requests
+ */
+export class ChatHandler {
+  private containerManager: ContainerManager;
+  private modelLoader: ModelLoader;
+
+  constructor(containerManager: ContainerManager, modelLoader: ModelLoader) {
+    this.containerManager = containerManager;
+    this.modelLoader = modelLoader;
+  }
+
+  /**
+   * Handle POST /v1/chat/completions
+   */
+  public async handleChatCompletion(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    body: IChatCompletionRequest,
+  ): Promise<void> {
+    const modelName = body.model;
+    const isStream = body.stream === true;
+
+    logger.dim(`Chat completion request for model: ${modelName}`);
+
+    try {
+      // Find or load the model
+      const container = await this.findOrLoadModel(modelName);
+      if (!container) {
+        this.sendError(res, 404, `Model "${modelName}" not found or could not be loaded`, 'model_not_found');
+        return;
+      }
+
+      // Route to streaming or non-streaming handler
+      if (isStream) {
+        await this.handleStreamingCompletion(res, container, body);
+      } else {
+        await this.handleNonStreamingCompletion(res, container, body);
+      }
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      logger.error(`Chat completion error: ${message}`);
+      this.sendError(res, 500, `Chat completion failed: ${message}`, 'server_error');
+    }
+  }
+
+  /**
+   * Find container with model or attempt to load it
+   */
+  private async findOrLoadModel(
+    modelName: string,
+  ): Promise<import('../../containers/base-container.ts').BaseContainer | null> {
+    // First, check if model is already loaded
+    const container = await this.containerManager.findContainerForModel(modelName);
+    if (container) {
+      return container;
+    }
+
+    // Try to load the model
+    logger.info(`Model ${modelName} not loaded, attempting to load...`);
+    const loadResult = await this.modelLoader.loadModel(modelName);
+
+    if (!loadResult.success) {
+      logger.error(`Failed to load model: ${loadResult.error}`);
+      return null;
+    }
+
+    // Find the container again after loading
+    return this.containerManager.findContainerForModel(modelName);
+  }
+
+  /**
+   * Handle non-streaming chat completion
+   */
+  private async handleNonStreamingCompletion(
+    res: http.ServerResponse,
+    container: import('../../containers/base-container.ts').BaseContainer,
+    body: IChatCompletionRequest,
+  ): Promise<void> {
+    const response = await container.chatCompletion(body);
+
+    res.writeHead(200, { 'Content-Type': 'application/json' });
+    res.end(JSON.stringify(response));
+  }
+
+  /**
+   * Handle streaming chat completion
+   */
+  private async handleStreamingCompletion(
+    res: http.ServerResponse,
+    container: import('../../containers/base-container.ts').BaseContainer,
+    body: IChatCompletionRequest,
+  ): Promise<void> {
+    // Set SSE headers
+    res.writeHead(200, {
+      'Content-Type': 'text/event-stream',
+      'Cache-Control': 'no-cache',
+      'Connection': 'keep-alive',
+      'X-Accel-Buffering': 'no',
+    });
+
+    // Stream chunks to client
+    await container.chatCompletionStream(body, (chunk) => {
+      res.write(`data: ${chunk}\n\n`);
+    });
+
+    // Send final done message
+    res.write('data: [DONE]\n\n');
+    res.end();
+  }
+
+  /**
+   * Send error response
+   */
+  private sendError(
+    res: http.ServerResponse,
+    statusCode: number,
+    message: string,
+    type: string,
+    param?: string,
+  ): void {
+    const error: IApiError = {
+      error: {
+        message,
+        type,
+        param,
+        code: null,
+      },
+    };
+
+    res.writeHead(statusCode, { 'Content-Type': 'application/json' });
+    res.end(JSON.stringify(error));
+  }
+}
--- a/ts/api/handlers/embeddings.ts
+++ b/ts/api/handlers/embeddings.ts
@@ -0,0 +1,235 @@
+/**
+ * Embeddings Handler
+ *
+ * Handles /v1/embeddings endpoint.
+ */
+
+import * as http from 'node:http';
+import type {
+  IEmbeddingsRequest,
+  IEmbeddingsResponse,
+  IEmbeddingData,
+  IApiError,
+} from '../../interfaces/api.ts';
+import { logger } from '../../logger.ts';
+import { ContainerManager } from '../../containers/container-manager.ts';
+
+/**
+ * Handler for embeddings requests
+ */
+export class EmbeddingsHandler {
+  private containerManager: ContainerManager;
+
+  constructor(containerManager: ContainerManager) {
+    this.containerManager = containerManager;
+  }
+
+  /**
+   * Handle POST /v1/embeddings
+   */
+  public async handleEmbeddings(
+    res: http.ServerResponse,
+    body: IEmbeddingsRequest,
+  ): Promise<void> {
+    const modelName = body.model;
+
+    logger.dim(`Embeddings request for model: ${modelName}`);
+
+    try {
+      // Find container with the embedding model
+      const container = await this.containerManager.findContainerForModel(modelName);
+      if (!container) {
+        this.sendError(res, 404, `Embedding model "${modelName}" not found`, 'model_not_found');
+        return;
+      }
+
+      // Generate embeddings
+      const response = await this.generateEmbeddings(container, body);
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(response));
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      logger.error(`Embeddings error: ${message}`);
+      this.sendError(res, 500, `Embeddings generation failed: ${message}`, 'server_error');
+    }
+  }
+
+  /**
+   * Generate embeddings from container
+   */
+  private async generateEmbeddings(
+    container: import('../../containers/base-container.ts').BaseContainer,
+    request: IEmbeddingsRequest,
+  ): Promise<IEmbeddingsResponse> {
+    const inputs = Array.isArray(request.input) ? request.input : [request.input];
+    const embeddings: IEmbeddingData[] = [];
+    let totalTokens = 0;
+
+    // Generate embeddings for each input
+    for (let i = 0; i < inputs.length; i++) {
+      const input = inputs[i];
+      const embedding = await this.getEmbeddingFromContainer(container, request.model, input);
+
+      embeddings.push({
+        object: 'embedding',
+        embedding: embedding.vector,
+        index: i,
+      });
+
+      totalTokens += embedding.tokenCount;
+    }
+
+    return {
+      object: 'list',
+      data: embeddings,
+      model: request.model,
+      usage: {
+        prompt_tokens: totalTokens,
+        total_tokens: totalTokens,
+      },
+    };
+  }
+
+  /**
+   * Get embedding from container (container-specific implementation)
+   */
+  private async getEmbeddingFromContainer(
+    container: import('../../containers/base-container.ts').BaseContainer,
+    model: string,
+    input: string,
+  ): Promise<{ vector: number[]; tokenCount: number }> {
+    const endpoint = container.getEndpoint();
+    const containerType = container.type;
+
+    // Route to container-specific embedding endpoint
+    if (containerType === 'ollama') {
+      return this.getOllamaEmbedding(endpoint, model, input);
+    } else if (containerType === 'vllm') {
+      return this.getVllmEmbedding(endpoint, model, input);
+    } else if (containerType === 'tgi') {
+      return this.getTgiEmbedding(endpoint, model, input);
+    }
+
+    throw new Error(`Container type ${containerType} does not support embeddings`);
+  }
+
+  /**
+   * Get embedding from Ollama
+   */
+  private async getOllamaEmbedding(
+    endpoint: string,
+    model: string,
+    input: string,
+  ): Promise<{ vector: number[]; tokenCount: number }> {
+    const response = await fetch(`${endpoint}/api/embeddings`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        model,
+        prompt: input,
+      }),
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      throw new Error(`Ollama embedding error: ${errorText}`);
+    }
+
+    const result = await response.json() as { embedding: number[] };
+
+    // Estimate token count (rough approximation: ~4 chars per token)
+    const tokenCount = Math.ceil(input.length / 4);
+
+    return {
+      vector: result.embedding,
+      tokenCount,
+    };
+  }
+
+  /**
+   * Get embedding from vLLM (OpenAI-compatible)
+   */
+  private async getVllmEmbedding(
+    endpoint: string,
+    model: string,
+    input: string,
+  ): Promise<{ vector: number[]; tokenCount: number }> {
+    const response = await fetch(`${endpoint}/v1/embeddings`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        model,
+        input,
+      }),
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      throw new Error(`vLLM embedding error: ${errorText}`);
+    }
+
+    const result = await response.json() as IEmbeddingsResponse;
+
+    return {
+      vector: result.data[0].embedding,
+      tokenCount: result.usage.total_tokens,
+    };
+  }
+
+  /**
+   * Get embedding from TGI
+   */
+  private async getTgiEmbedding(
+    endpoint: string,
+    _model: string,
+    input: string,
+  ): Promise<{ vector: number[]; tokenCount: number }> {
+    // TGI uses /embed endpoint
+    const response = await fetch(`${endpoint}/embed`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        inputs: input,
+      }),
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      throw new Error(`TGI embedding error: ${errorText}`);
+    }
+
+    const result = await response.json() as number[][];
+
+    // Estimate token count
+    const tokenCount = Math.ceil(input.length / 4);
+
+    return {
+      vector: result[0],
+      tokenCount,
+    };
+  }
+
+  /**
+   * Send error response
+   */
+  private sendError(
+    res: http.ServerResponse,
+    statusCode: number,
+    message: string,
+    type: string,
+    param?: string,
+  ): void {
+    const error: IApiError = {
+      error: {
+        message,
+        type,
+        param,
+        code: null,
+      },
+    };
+
+    res.writeHead(statusCode, { 'Content-Type': 'application/json' });
+    res.end(JSON.stringify(error));
+  }
+}
--- a/ts/api/handlers/index.ts
+++ b/ts/api/handlers/index.ts
@@ -0,0 +1,9 @@
+/**
+ * API Handlers
+ *
+ * OpenAI-compatible request handlers.
+ */
+
+export { ChatHandler } from './chat.ts';
+export { ModelsHandler } from './models.ts';
+export { EmbeddingsHandler } from './embeddings.ts';
--- a/ts/api/handlers/models.ts
+++ b/ts/api/handlers/models.ts
@@ -0,0 +1,136 @@
+/**
+ * Models Handler
+ *
+ * Handles /v1/models endpoints.
+ */
+
+import * as http from 'node:http';
+import type {
+  IModelInfo,
+  IListModelsResponse,
+  IApiError,
+} from '../../interfaces/api.ts';
+import { logger } from '../../logger.ts';
+import { ContainerManager } from '../../containers/container-manager.ts';
+import { ModelRegistry } from '../../models/registry.ts';
+
+/**
+ * Handler for model-related requests
+ */
+export class ModelsHandler {
+  private containerManager: ContainerManager;
+  private modelRegistry: ModelRegistry;
+
+  constructor(containerManager: ContainerManager, modelRegistry: ModelRegistry) {
+    this.containerManager = containerManager;
+    this.modelRegistry = modelRegistry;
+  }
+
+  /**
+   * Handle GET /v1/models
+   */
+  public async handleListModels(res: http.ServerResponse): Promise<void> {
+    try {
+      const models = await this.getAvailableModels();
+
+      const response: IListModelsResponse = {
+        object: 'list',
+        data: models,
+      };
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(response));
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      logger.error(`Failed to list models: ${message}`);
+      this.sendError(res, 500, `Failed to list models: ${message}`, 'server_error');
+    }
+  }
+
+  /**
+   * Handle GET /v1/models/:model
+   */
+  public async handleGetModel(res: http.ServerResponse, modelId: string): Promise<void> {
+    try {
+      const models = await this.getAvailableModels();
+      const model = models.find((m) => m.id === modelId);
+
+      if (!model) {
+        this.sendError(res, 404, `Model "${modelId}" not found`, 'model_not_found');
+        return;
+      }
+
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(model));
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      logger.error(`Failed to get model info: ${message}`);
+      this.sendError(res, 500, `Failed to get model info: ${message}`, 'server_error');
+    }
+  }
+
+  /**
+   * Get all available models from containers and greenlist
+   */
+  private async getAvailableModels(): Promise<IModelInfo[]> {
+    const models: IModelInfo[] = [];
+    const seen = new Set<string>();
+    const timestamp = Math.floor(Date.now() / 1000);
+
+    // Get models from running containers
+    const containerModels = await this.containerManager.getAllAvailableModels();
+    for (const [modelId, modelInfo] of containerModels) {
+      if (!seen.has(modelId)) {
+        seen.add(modelId);
+        models.push({
+          id: modelId,
+          object: 'model',
+          created: timestamp,
+          owned_by: `modelgrid-${modelInfo.container}`,
+        });
+      }
+    }
+
+    // Add greenlit models that aren't loaded yet
+    const greenlitModels = await this.modelRegistry.getAllGreenlitModels();
+    for (const greenlit of greenlitModels) {
+      if (!seen.has(greenlit.name)) {
+        seen.add(greenlit.name);
+        models.push({
+          id: greenlit.name,
+          object: 'model',
+          created: timestamp,
+          owned_by: `modelgrid-${greenlit.container}`,
+        });
+      }
+    }
+
+    // Sort alphabetically
+    models.sort((a, b) => a.id.localeCompare(b.id));
+
+    return models;
+  }
+
+  /**
+   * Send error response
+   */
+  private sendError(
+    res: http.ServerResponse,
+    statusCode: number,
+    message: string,
+    type: string,
+    param?: string,
+  ): void {
+    const error: IApiError = {
+      error: {
+        message,
+        type,
+        param,
+        code: null,
+      },
+    };
+
+    res.writeHead(statusCode, { 'Content-Type': 'application/json' });
+    res.end(JSON.stringify(error));
+  }
+}
--- a/ts/api/index.ts
+++ b/ts/api/index.ts
@@ -0,0 +1,10 @@
+/**
+ * API Gateway Module
+ *
+ * OpenAI-compatible API server for ModelGrid.
+ */
+
+export { ApiServer } from './server.ts';
+export { ApiRouter } from './router.ts';
+export * from './handlers/index.ts';
+export * from './middleware/index.ts';
--- a/ts/api/middleware/auth.ts
+++ b/ts/api/middleware/auth.ts
@@ -0,0 +1,105 @@
+/**
+ * Authentication Middleware
+ *
+ * Validates API keys for incoming requests.
+ */
+
+import * as http from 'node:http';
+import { logger } from '../../logger.ts';
+
+/**
+ * Authentication middleware for API key validation
+ */
+export class AuthMiddleware {
+  private apiKeys: Set<string>;
+  private allowNoAuth: boolean;
+
+  constructor(apiKeys: string[], allowNoAuth: boolean = false) {
+    this.apiKeys = new Set(apiKeys);
+    this.allowNoAuth = allowNoAuth;
+
+    if (this.apiKeys.size === 0 && !allowNoAuth) {
+      logger.warn('No API keys configured - authentication will fail for all requests');
+    }
+  }
+
+  /**
+   * Authenticate a request
+   */
+  public authenticate(req: http.IncomingMessage): boolean {
+    // If no keys configured and allowNoAuth is true, allow all requests
+    if (this.apiKeys.size === 0 && this.allowNoAuth) {
+      return true;
+    }
+
+    const authHeader = req.headers.authorization;
+
+    if (!authHeader) {
+      logger.dim('Request rejected: No Authorization header');
+      return false;
+    }
+
+    // Extract Bearer token
+    const match = authHeader.match(/^Bearer\s+(.+)$/i);
+    if (!match) {
+      logger.dim('Request rejected: Invalid Authorization header format');
+      return false;
+    }
+
+    const apiKey = match[1];
+
+    // Check if key is valid
+    if (!this.apiKeys.has(apiKey)) {
+      logger.dim('Request rejected: Invalid API key');
+      return false;
+    }
+
+    return true;
+  }
+
+  /**
+   * Get API key from request (if authenticated)
+   */
+  public getApiKey(req: http.IncomingMessage): string | null {
+    const authHeader = req.headers.authorization;
+    if (!authHeader) {
+      return null;
+    }
+
+    const match = authHeader.match(/^Bearer\s+(.+)$/i);
+    return match ? match[1] : null;
+  }
+
+  /**
+   * Add an API key
+   */
+  public addApiKey(key: string): void {
+    this.apiKeys.add(key);
+    logger.info('API key added');
+  }
+
+  /**
+   * Remove an API key
+   */
+  public removeApiKey(key: string): boolean {
+    const removed = this.apiKeys.delete(key);
+    if (removed) {
+      logger.info('API key removed');
+    }
+    return removed;
+  }
+
+  /**
+   * Get count of configured API keys
+   */
+  public getKeyCount(): number {
+    return this.apiKeys.size;
+  }
+
+  /**
+   * Check if authentication is required
+   */
+  public isAuthRequired(): boolean {
+    return !this.allowNoAuth || this.apiKeys.size > 0;
+  }
+}
--- a/ts/api/middleware/index.ts
+++ b/ts/api/middleware/index.ts
@@ -0,0 +1,7 @@
+/**
+ * API Middleware
+ */
+
+export { AuthMiddleware } from './auth.ts';
+export { SanityMiddleware } from './sanity.ts';
+export type { IValidationResult } from './sanity.ts';
--- a/ts/api/middleware/sanity.ts
+++ b/ts/api/middleware/sanity.ts
@@ -0,0 +1,254 @@
+/**
+ * Sanity Middleware
+ *
+ * Validates request structure and parameters.
+ */
+
+import type { IChatCompletionRequest, IEmbeddingsRequest } from '../../interfaces/api.ts';
+import { ModelRegistry } from '../../models/registry.ts';
+
+/**
+ * Validation result
+ */
+export interface IValidationResult {
+  valid: boolean;
+  error?: string;
+  param?: string;
+}
+
+/**
+ * Request validation middleware
+ */
+export class SanityMiddleware {
+  private modelRegistry: ModelRegistry;
+
+  constructor(modelRegistry: ModelRegistry) {
+    this.modelRegistry = modelRegistry;
+  }
+
+  /**
+   * Validate chat completion request
+   */
+  public validateChatRequest(body: unknown): IValidationResult {
+    if (!body || typeof body !== 'object') {
+      return { valid: false, error: 'Request body must be a JSON object' };
+    }
+
+    const request = body as Record<string, unknown>;
+
+    // Validate model
+    if (!request.model || typeof request.model !== 'string') {
+      return { valid: false, error: 'Missing or invalid "model" field', param: 'model' };
+    }
+
+    // Validate messages
+    if (!Array.isArray(request.messages)) {
+      return { valid: false, error: 'Missing or invalid "messages" field', param: 'messages' };
+    }
+
+    if (request.messages.length === 0) {
+      return { valid: false, error: '"messages" array cannot be empty', param: 'messages' };
+    }
+
+    // Validate each message
+    for (let i = 0; i < request.messages.length; i++) {
+      const msg = request.messages[i] as Record<string, unknown>;
+      const msgValidation = this.validateMessage(msg, i);
+      if (!msgValidation.valid) {
+        return msgValidation;
+      }
+    }
+
+    // Validate optional parameters
+    if (request.temperature !== undefined) {
+      const temp = request.temperature as number;
+      if (typeof temp !== 'number' || temp < 0 || temp > 2) {
+        return { valid: false, error: '"temperature" must be between 0 and 2', param: 'temperature' };
+      }
+    }
+
+    if (request.top_p !== undefined) {
+      const topP = request.top_p as number;
+      if (typeof topP !== 'number' || topP < 0 || topP > 1) {
+        return { valid: false, error: '"top_p" must be between 0 and 1', param: 'top_p' };
+      }
+    }
+
+    if (request.max_tokens !== undefined) {
+      const maxTokens = request.max_tokens as number;
+      if (typeof maxTokens !== 'number' || maxTokens < 1) {
+        return { valid: false, error: '"max_tokens" must be a positive integer', param: 'max_tokens' };
+      }
+    }
+
+    if (request.n !== undefined) {
+      const n = request.n as number;
+      if (typeof n !== 'number' || n < 1 || n > 10) {
+        return { valid: false, error: '"n" must be between 1 and 10', param: 'n' };
+      }
+    }
+
+    if (request.stream !== undefined && typeof request.stream !== 'boolean') {
+      return { valid: false, error: '"stream" must be a boolean', param: 'stream' };
+    }
+
+    if (request.presence_penalty !== undefined) {
+      const pp = request.presence_penalty as number;
+      if (typeof pp !== 'number' || pp < -2 || pp > 2) {
+        return { valid: false, error: '"presence_penalty" must be between -2 and 2', param: 'presence_penalty' };
+      }
+    }
+
+    if (request.frequency_penalty !== undefined) {
+      const fp = request.frequency_penalty as number;
+      if (typeof fp !== 'number' || fp < -2 || fp > 2) {
+        return { valid: false, error: '"frequency_penalty" must be between -2 and 2', param: 'frequency_penalty' };
+      }
+    }
+
+    return { valid: true };
+  }
+
+  /**
+   * Validate a single message in the chat request
+   */
+  private validateMessage(msg: Record<string, unknown>, index: number): IValidationResult {
+    if (!msg || typeof msg !== 'object') {
+      return { valid: false, error: `Message at index ${index} must be an object`, param: `messages[${index}]` };
+    }
+
+    // Validate role
+    const validRoles = ['system', 'user', 'assistant', 'tool'];
+    if (!msg.role || !validRoles.includes(msg.role as string)) {
+      return {
+        valid: false,
+        error: `Invalid role at index ${index}. Must be one of: ${validRoles.join(', ')}`,
+        param: `messages[${index}].role`,
+      };
+    }
+
+    // Validate content (can be null for assistant with tool_calls)
+    if (msg.role === 'assistant' && msg.tool_calls) {
+      // Content can be null/undefined when tool_calls present
+    } else if (msg.content === undefined || msg.content === null) {
+      return {
+        valid: false,
+        error: `Missing content at index ${index}`,
+        param: `messages[${index}].content`,
+      };
+    } else if (typeof msg.content !== 'string') {
+      return {
+        valid: false,
+        error: `Content at index ${index} must be a string`,
+        param: `messages[${index}].content`,
+      };
+    }
+
+    // Validate tool response message
+    if (msg.role === 'tool' && !msg.tool_call_id) {
+      return {
+        valid: false,
+        error: `Tool message at index ${index} requires tool_call_id`,
+        param: `messages[${index}].tool_call_id`,
+      };
+    }
+
+    return { valid: true };
+  }
+
+  /**
+   * Validate embeddings request
+   */
+  public validateEmbeddingsRequest(body: unknown): IValidationResult {
+    if (!body || typeof body !== 'object') {
+      return { valid: false, error: 'Request body must be a JSON object' };
+    }
+
+    const request = body as Record<string, unknown>;
+
+    // Validate model
+    if (!request.model || typeof request.model !== 'string') {
+      return { valid: false, error: 'Missing or invalid "model" field', param: 'model' };
+    }
+
+    // Validate input
+    if (request.input === undefined || request.input === null) {
+      return { valid: false, error: 'Missing "input" field', param: 'input' };
+    }
+
+    const input = request.input;
+    if (typeof input !== 'string' && !Array.isArray(input)) {
+      return { valid: false, error: '"input" must be a string or array of strings', param: 'input' };
+    }
+
+    if (Array.isArray(input)) {
+      for (let i = 0; i < input.length; i++) {
+        if (typeof input[i] !== 'string') {
+          return { valid: false, error: `"input[${i}]" must be a string`, param: `input[${i}]` };
+        }
+      }
+
+      if (input.length === 0) {
+        return { valid: false, error: '"input" array cannot be empty', param: 'input' };
+      }
+    }
+
+    // Validate encoding_format
+    if (request.encoding_format !== undefined) {
+      const format = request.encoding_format as string;
+      if (format !== 'float' && format !== 'base64') {
+        return { valid: false, error: '"encoding_format" must be "float" or "base64"', param: 'encoding_format' };
+      }
+    }
+
+    return { valid: true };
+  }
+
+  /**
+   * Check if model is in greenlist (async validation)
+   */
+  public async validateModelGreenlist(modelName: string): Promise<IValidationResult> {
+    const isGreenlit = await this.modelRegistry.isModelGreenlit(modelName);
+    if (!isGreenlit) {
+      return {
+        valid: false,
+        error: `Model "${modelName}" is not greenlit. Contact administrator to add it to the greenlist.`,
+        param: 'model',
+      };
+    }
+    return { valid: true };
+  }
+
+  /**
+   * Sanitize request body by removing unknown fields
+   */
+  public sanitizeChatRequest(body: Record<string, unknown>): IChatCompletionRequest {
+    return {
+      model: body.model as string,
+      messages: body.messages as IChatCompletionRequest['messages'],
+      max_tokens: body.max_tokens as number | undefined,
+      temperature: body.temperature as number | undefined,
+      top_p: body.top_p as number | undefined,
+      n: body.n as number | undefined,
+      stream: body.stream as boolean | undefined,
+      stop: body.stop as string | string[] | undefined,
+      presence_penalty: body.presence_penalty as number | undefined,
+      frequency_penalty: body.frequency_penalty as number | undefined,
+      user: body.user as string | undefined,
+      tools: body.tools as IChatCompletionRequest['tools'],
+      tool_choice: body.tool_choice as IChatCompletionRequest['tool_choice'],
+    };
+  }
+
+  /**
+   * Sanitize embeddings request
+   */
+  public sanitizeEmbeddingsRequest(body: Record<string, unknown>): IEmbeddingsRequest {
+    return {
+      model: body.model as string,
+      input: body.input as string | string[],
+      user: body.user as string | undefined,
+      encoding_format: body.encoding_format as 'float' | 'base64' | undefined,
+    };
+  }
+}
--- a/ts/api/router.ts
+++ b/ts/api/router.ts
@@ -0,0 +1,300 @@
+/**
+ * API Router
+ *
+ * Routes incoming requests to appropriate handlers.
+ */
+
+import * as http from 'node:http';
+import type { IApiError } from '../interfaces/api.ts';
+import { logger } from '../logger.ts';
+import { ContainerManager } from '../containers/container-manager.ts';
+import { ModelRegistry } from '../models/registry.ts';
+import { ModelLoader } from '../models/loader.ts';
+import { ChatHandler } from './handlers/chat.ts';
+import { ModelsHandler } from './handlers/models.ts';
+import { EmbeddingsHandler } from './handlers/embeddings.ts';
+import { AuthMiddleware } from './middleware/auth.ts';
+import { SanityMiddleware } from './middleware/sanity.ts';
+
+/**
+ * API Router - routes requests to handlers
+ */
+export class ApiRouter {
+  private containerManager: ContainerManager;
+  private modelRegistry: ModelRegistry;
+  private modelLoader: ModelLoader;
+  private chatHandler: ChatHandler;
+  private modelsHandler: ModelsHandler;
+  private embeddingsHandler: EmbeddingsHandler;
+  private authMiddleware: AuthMiddleware;
+  private sanityMiddleware: SanityMiddleware;
+
+  constructor(
+    containerManager: ContainerManager,
+    modelRegistry: ModelRegistry,
+    modelLoader: ModelLoader,
+    apiKeys: string[],
+  ) {
+    this.containerManager = containerManager;
+    this.modelRegistry = modelRegistry;
+    this.modelLoader = modelLoader;
+
+    // Initialize handlers
+    this.chatHandler = new ChatHandler(containerManager, modelLoader);
+    this.modelsHandler = new ModelsHandler(containerManager, modelRegistry);
+    this.embeddingsHandler = new EmbeddingsHandler(containerManager);
+
+    // Initialize middleware
+    this.authMiddleware = new AuthMiddleware(apiKeys);
+    this.sanityMiddleware = new SanityMiddleware(modelRegistry);
+  }
+
+  /**
+   * Route a request to the appropriate handler
+   */
+  public async route(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    path: string,
+  ): Promise<void> {
+    // OpenAI API endpoints
+    if (path === '/v1/chat/completions') {
+      await this.handleChatCompletions(req, res);
+      return;
+    }
+
+    if (path === '/v1/completions') {
+      await this.handleCompletions(req, res);
+      return;
+    }
+
+    if (path === '/v1/models' || path === '/v1/models/') {
+      await this.handleModels(req, res);
+      return;
+    }
+
+    if (path.startsWith('/v1/models/')) {
+      await this.handleModelInfo(req, res, path);
+      return;
+    }
+
+    if (path === '/v1/embeddings') {
+      await this.handleEmbeddings(req, res);
+      return;
+    }
+
+    // Not found
+    this.sendError(res, 404, `Endpoint not found: ${path}`, 'invalid_request_error');
+  }
+
+  /**
+   * Handle POST /v1/chat/completions
+   */
+  private async handleChatCompletions(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+  ): Promise<void> {
+    if (req.method !== 'POST') {
+      this.sendError(res, 405, 'Method not allowed', 'invalid_request_error');
+      return;
+    }
+
+    // Authenticate
+    if (!this.authMiddleware.authenticate(req)) {
+      this.sendError(res, 401, 'Invalid API key', 'authentication_error');
+      return;
+    }
+
+    // Parse body
+    const body = await this.parseRequestBody(req);
+    if (!body) {
+      this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
+      return;
+    }
+
+    // Validate request
+    const validation = this.sanityMiddleware.validateChatRequest(body);
+    if (!validation.valid) {
+      this.sendError(res, 400, validation.error || 'Invalid request', 'invalid_request_error');
+      return;
+    }
+
+    // Handle request
+    await this.chatHandler.handleChatCompletion(req, res, body);
+  }
+
+  /**
+   * Handle POST /v1/completions (legacy endpoint)
+   */
+  private async handleCompletions(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+  ): Promise<void> {
+    if (req.method !== 'POST') {
+      this.sendError(res, 405, 'Method not allowed', 'invalid_request_error');
+      return;
+    }
+
+    // Authenticate
+    if (!this.authMiddleware.authenticate(req)) {
+      this.sendError(res, 401, 'Invalid API key', 'authentication_error');
+      return;
+    }
+
+    // Parse body
+    const body = await this.parseRequestBody(req);
+    if (!body) {
+      this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
+      return;
+    }
+
+    // Convert to chat format and handle
+    const chatBody = this.convertCompletionToChat(body);
+    await this.chatHandler.handleChatCompletion(req, res, chatBody);
+  }
+
+  /**
+   * Handle GET /v1/models
+   */
+  private async handleModels(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+  ): Promise<void> {
+    if (req.method !== 'GET') {
+      this.sendError(res, 405, 'Method not allowed', 'invalid_request_error');
+      return;
+    }
+
+    // Authenticate
+    if (!this.authMiddleware.authenticate(req)) {
+      this.sendError(res, 401, 'Invalid API key', 'authentication_error');
+      return;
+    }
+
+    await this.modelsHandler.handleListModels(res);
+  }
+
+  /**
+   * Handle GET /v1/models/:model
+   */
+  private async handleModelInfo(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    path: string,
+  ): Promise<void> {
+    if (req.method !== 'GET') {
+      this.sendError(res, 405, 'Method not allowed', 'invalid_request_error');
+      return;
+    }
+
+    // Authenticate
+    if (!this.authMiddleware.authenticate(req)) {
+      this.sendError(res, 401, 'Invalid API key', 'authentication_error');
+      return;
+    }
+
+    const modelId = path.replace('/v1/models/', '');
+    await this.modelsHandler.handleGetModel(res, modelId);
+  }
+
+  /**
+   * Handle POST /v1/embeddings
+   */
+  private async handleEmbeddings(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+  ): Promise<void> {
+    if (req.method !== 'POST') {
+      this.sendError(res, 405, 'Method not allowed', 'invalid_request_error');
+      return;
+    }
+
+    // Authenticate
+    if (!this.authMiddleware.authenticate(req)) {
+      this.sendError(res, 401, 'Invalid API key', 'authentication_error');
+      return;
+    }
+
+    // Parse body
+    const body = await this.parseRequestBody(req);
+    if (!body) {
+      this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
+      return;
+    }
+
+    await this.embeddingsHandler.handleEmbeddings(res, body);
+  }
+
+  /**
+   * Parse request body
+   */
+  private async parseRequestBody(req: http.IncomingMessage): Promise<unknown | null> {
+    return new Promise((resolve) => {
+      let body = '';
+
+      req.on('data', (chunk) => {
+        body += chunk.toString();
+        // Limit body size
+        if (body.length > 10 * 1024 * 1024) {
+          resolve(null);
+        }
+      });
+
+      req.on('end', () => {
+        try {
+          resolve(JSON.parse(body));
+        } catch {
+          resolve(null);
+        }
+      });
+
+      req.on('error', () => {
+        resolve(null);
+      });
+    });
+  }
+
+  /**
+   * Convert legacy completion request to chat format
+   */
+  private convertCompletionToChat(body: Record<string, unknown>): Record<string, unknown> {
+    const prompt = body.prompt as string | string[];
+    const promptText = Array.isArray(prompt) ? prompt.join('\n') : prompt;
+
+    return {
+      model: body.model,
+      messages: [
+        { role: 'user', content: promptText },
+      ],
+      max_tokens: body.max_tokens,
+      temperature: body.temperature,
+      top_p: body.top_p,
+      n: body.n,
+      stream: body.stream,
+      stop: body.stop,
+    };
+  }
+
+  /**
+   * Send error response
+   */
+  private sendError(
+    res: http.ServerResponse,
+    statusCode: number,
+    message: string,
+    type: string,
+    param?: string,
+  ): void {
+    const error: IApiError = {
+      error: {
+        message,
+        type,
+        param,
+        code: null,
+      },
+    };
+
+    res.writeHead(statusCode, { 'Content-Type': 'application/json' });
+    res.end(JSON.stringify(error));
+  }
+}
--- a/ts/api/server.ts
+++ b/ts/api/server.ts
@@ -0,0 +1,300 @@
+/**
+ * API Server
+ *
+ * HTTP server for the OpenAI-compatible API gateway.
+ */
+
+import * as http from 'node:http';
+import type { IApiConfig } from '../interfaces/config.ts';
+import type { IHealthResponse } from '../interfaces/api.ts';
+import { logger } from '../logger.ts';
+import { API_SERVER } from '../constants.ts';
+import { ApiRouter } from './router.ts';
+import { ContainerManager } from '../containers/container-manager.ts';
+import { ModelRegistry } from '../models/registry.ts';
+import { ModelLoader } from '../models/loader.ts';
+import { GpuDetector } from '../hardware/gpu-detector.ts';
+
+/**
+ * API Server for ModelGrid
+ */
+export class ApiServer {
+  private server?: http.Server;
+  private config: IApiConfig;
+  private router: ApiRouter;
+  private containerManager: ContainerManager;
+  private modelRegistry: ModelRegistry;
+  private modelLoader: ModelLoader;
+  private gpuDetector: GpuDetector;
+  private startTime: number = 0;
+
+  constructor(
+    config: IApiConfig,
+    containerManager: ContainerManager,
+    modelRegistry: ModelRegistry,
+  ) {
+    this.config = config;
+    this.containerManager = containerManager;
+    this.modelRegistry = modelRegistry;
+    this.gpuDetector = new GpuDetector();
+    this.modelLoader = new ModelLoader(modelRegistry, containerManager, true);
+    this.router = new ApiRouter(
+      containerManager,
+      modelRegistry,
+      this.modelLoader,
+      config.apiKeys,
+    );
+  }
+
+  /**
+   * Start the API server
+   */
+  public async start(): Promise<void> {
+    if (this.server) {
+      logger.warn('API server is already running');
+      return;
+    }
+
+    this.startTime = Date.now();
+
+    this.server = http.createServer(async (req, res) => {
+      await this.handleRequest(req, res);
+    });
+
+    return new Promise((resolve, reject) => {
+      this.server!.listen(this.config.port, this.config.host, () => {
+        logger.success(`API server started on ${this.config.host}:${this.config.port}`);
+        logger.info('OpenAI-compatible API available at:');
+        logger.info(`  POST /v1/chat/completions`);
+        logger.info(`  GET  /v1/models`);
+        logger.info(`  POST /v1/embeddings`);
+        resolve();
+      });
+
+      this.server!.on('error', (error) => {
+        logger.error(`API server error: ${error.message}`);
+        reject(error);
+      });
+    });
+  }
+
+  /**
+   * Stop the API server
+   */
+  public async stop(): Promise<void> {
+    if (!this.server) {
+      return;
+    }
+
+    return new Promise((resolve) => {
+      this.server!.close(() => {
+        logger.log('API server stopped');
+        this.server = undefined;
+        resolve();
+      });
+    });
+  }
+
+  /**
+   * Handle incoming HTTP request
+   */
+  private async handleRequest(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+  ): Promise<void> {
+    const startTime = Date.now();
+
+    // Set CORS headers if enabled
+    if (this.config.cors) {
+      this.setCorsHeaders(req, res);
+    }
+
+    // Handle preflight requests
+    if (req.method === 'OPTIONS') {
+      res.writeHead(204);
+      res.end();
+      return;
+    }
+
+    // Parse URL
+    const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`);
+    const path = url.pathname;
+
+    // Health check endpoint (no auth required)
+    if (path === '/health' || path === '/healthz') {
+      await this.handleHealthCheck(res);
+      return;
+    }
+
+    // Metrics endpoint (no auth required)
+    if (path === '/metrics') {
+      await this.handleMetrics(res);
+      return;
+    }
+
+    // Route request
+    try {
+      await this.router.route(req, res, path);
+    } catch (error) {
+      logger.error(`Request error: ${error instanceof Error ? error.message : String(error)}`);
+      this.sendError(res, 500, 'Internal server error', 'internal_error');
+    }
+
+    // Log request
+    const duration = Date.now() - startTime;
+    logger.dim(`${req.method} ${path} - ${res.statusCode} (${duration}ms)`);
+  }
+
+  /**
+   * Set CORS headers
+   */
+  private setCorsHeaders(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+  ): void {
+    const origin = req.headers.origin || '*';
+    const allowedOrigins = this.config.corsOrigins || ['*'];
+
+    if (allowedOrigins.includes('*') || allowedOrigins.includes(origin)) {
+      res.setHeader('Access-Control-Allow-Origin', origin);
+    }
+
+    res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
+    res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
+    res.setHeader('Access-Control-Max-Age', '86400');
+  }
+
+  /**
+   * Handle health check
+   */
+  private async handleHealthCheck(res: http.ServerResponse): Promise<void> {
+    try {
+      const statuses = await this.containerManager.getAllStatus();
+      const gpus = await this.gpuDetector.detectGpus();
+      const models = await this.containerManager.getAllAvailableModels();
+
+      let status: 'ok' | 'degraded' | 'error' = 'ok';
+      const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
+      const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
+
+      // Check container health
+      for (const [id, containerStatus] of statuses) {
+        if (containerStatus.running && containerStatus.health === 'healthy') {
+          containerHealth[id] = 'healthy';
+        } else {
+          containerHealth[id] = 'unhealthy';
+          status = 'degraded';
+        }
+      }
+
+      // Check GPU status
+      for (const gpu of gpus) {
+        gpuStatus[gpu.id] = 'available';
+      }
+
+      const response: IHealthResponse = {
+        status,
+        version: '1.0.0', // TODO: Get from config
+        uptime: Math.floor((Date.now() - this.startTime) / 1000),
+        containers: statuses.size,
+        models: models.size,
+        gpus: gpus.length,
+        details: {
+          containers: containerHealth,
+          gpus: gpuStatus,
+        },
+      };
+
+      res.writeHead(status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(response, null, 2));
+    } catch (error) {
+      res.writeHead(500, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({
+        status: 'error',
+        error: error instanceof Error ? error.message : String(error),
+      }));
+    }
+  }
+
+  /**
+   * Handle metrics endpoint (Prometheus format)
+   */
+  private async handleMetrics(res: http.ServerResponse): Promise<void> {
+    try {
+      const metrics: string[] = [];
+      const timestamp = Date.now();
+
+      // Server uptime
+      const uptime = Math.floor((timestamp - this.startTime) / 1000);
+      metrics.push(`# HELP modelgrid_uptime_seconds Server uptime in seconds`);
+      metrics.push(`# TYPE modelgrid_uptime_seconds gauge`);
+      metrics.push(`modelgrid_uptime_seconds ${uptime}`);
+
+      // Container count
+      const statuses = await this.containerManager.getAllStatus();
+      metrics.push(`# HELP modelgrid_containers_total Total number of containers`);
+      metrics.push(`# TYPE modelgrid_containers_total gauge`);
+      metrics.push(`modelgrid_containers_total ${statuses.size}`);
+
+      // Running containers
+      const running = Array.from(statuses.values()).filter((s) => s.running).length;
+      metrics.push(`# HELP modelgrid_containers_running Number of running containers`);
+      metrics.push(`# TYPE modelgrid_containers_running gauge`);
+      metrics.push(`modelgrid_containers_running ${running}`);
+
+      // Available models
+      const models = await this.containerManager.getAllAvailableModels();
+      metrics.push(`# HELP modelgrid_models_available Number of available models`);
+      metrics.push(`# TYPE modelgrid_models_available gauge`);
+      metrics.push(`modelgrid_models_available ${models.size}`);
+
+      // GPU count
+      const gpus = await this.gpuDetector.detectGpus();
+      metrics.push(`# HELP modelgrid_gpus_total Total number of GPUs`);
+      metrics.push(`# TYPE modelgrid_gpus_total gauge`);
+      metrics.push(`modelgrid_gpus_total ${gpus.length}`);
+
+      res.writeHead(200, { 'Content-Type': 'text/plain; charset=utf-8' });
+      res.end(metrics.join('\n') + '\n');
+    } catch (error) {
+      res.writeHead(500, { 'Content-Type': 'text/plain' });
+      res.end(`# Error: ${error instanceof Error ? error.message : String(error)}\n`);
+    }
+  }
+
+  /**
+   * Send error response
+   */
+  private sendError(
+    res: http.ServerResponse,
+    statusCode: number,
+    message: string,
+    type: string,
+  ): void {
+    res.writeHead(statusCode, { 'Content-Type': 'application/json' });
+    res.end(JSON.stringify({
+      error: {
+        message,
+        type,
+        code: null,
+      },
+    }));
+  }
+
+  /**
+   * Get server info
+   */
+  public getInfo(): {
+    running: boolean;
+    host: string;
+    port: number;
+    uptime: number;
+  } {
+    return {
+      running: !!this.server,
+      host: this.config.host,
+      port: this.config.port,
+      uptime: this.startTime ? Math.floor((Date.now() - this.startTime) / 1000) : 0,
+    };
+  }
+}