modelgrid/ts/api/server.ts

/**
 * API Server
 *
 * HTTP server for the OpenAI-compatible API gateway.
 */

import * as http from 'node:http';
import type { IApiConfig } from '../interfaces/config.ts';
import type { IHealthResponse } from '../interfaces/api.ts';
import { ClusterCoordinator } from '../cluster/coordinator.ts';
import { logger } from '../logger.ts';
import { VERSION } from '../constants.ts';
import { ApiRouter } from './router.ts';
import { ContainerManager } from '../containers/container-manager.ts';
import { ModelRegistry } from '../models/registry.ts';
import { ModelLoader } from '../models/loader.ts';
import { GpuDetector } from '../hardware/gpu-detector.ts';
import { ClusterHandler } from './handlers/cluster.ts';
import { buildHealthSnapshot } from '../helpers/health.ts';

interface IApiServerOptions {
  gpuDetector?: GpuDetector;
  router?: ApiRouter;
  clusterHandler?: ClusterHandler;
}

/**
 * API Server for ModelGrid
 */
export class ApiServer {
  private server?: http.Server;
  private config: IApiConfig;
  private router: ApiRouter;
  private containerManager: ContainerManager;
  private modelRegistry: ModelRegistry;
  private modelLoader: ModelLoader;
  private gpuDetector: GpuDetector;
  private clusterCoordinator: ClusterCoordinator;
  private clusterHandler: ClusterHandler;
  private startTime: number = 0;
  private requestCounts = new Map<string, number>();
  private authFailureCounts = new Map<string, number>();
  private serverErrorCounts = new Map<string, number>();
  private rateLimitBuckets = new Map<string, { count: number; windowStart: number }>();

  constructor(
    config: IApiConfig,
    containerManager: ContainerManager,
    modelRegistry: ModelRegistry,
    modelLoader: ModelLoader,
    clusterCoordinator: ClusterCoordinator,
    options: IApiServerOptions = {},
  ) {
    this.config = config;
    this.containerManager = containerManager;
    this.modelRegistry = modelRegistry;
    this.gpuDetector = options.gpuDetector || new GpuDetector();
    this.modelLoader = modelLoader;
    this.clusterCoordinator = clusterCoordinator;
    this.clusterHandler = options.clusterHandler || new ClusterHandler(clusterCoordinator);
    this.router = options.router || new ApiRouter(
      containerManager,
      modelRegistry,
      this.modelLoader,
      clusterCoordinator,
      config.apiKeys,
    );
  }

  /**
   * Start the API server
   */
  public async start(): Promise<void> {
    if (this.server) {
      logger.warn('API server is already running');
      return;
    }

    this.startTime = Date.now();

    this.server = http.createServer(async (req, res) => {
      await this.handleRequest(req, res);
    });

    return new Promise((resolve, reject) => {
      this.server!.listen(this.config.port, this.config.host, () => {
        logger.success(`API server started on ${this.config.host}:${this.config.port}`);
        logger.info('OpenAI-compatible API available at:');
        logger.info(`  POST /v1/chat/completions`);
        logger.info(`  GET  /v1/models`);
        logger.info(`  POST /v1/embeddings`);
        resolve();
      });

      this.server!.on('error', (error) => {
        logger.error(`API server error: ${error.message}`);
        reject(error);
      });
    });
  }

  /**
   * Stop the API server
   */
  public async stop(): Promise<void> {
    if (!this.server) {
      return;
    }

    return new Promise((resolve) => {
      this.server!.close(() => {
        logger.log('API server stopped');
        this.server = undefined;
        resolve();
      });
    });
  }

  /**
   * Handle incoming HTTP request
   */
  private async handleRequest(
    req: http.IncomingMessage,
    res: http.ServerResponse,
  ): Promise<void> {
    const startTime = Date.now();
    const requestId = this.ensureRequestId(req, res);

    // Set CORS headers if enabled
    if (this.config.cors) {
      this.setCorsHeaders(req, res);
    }

    // Handle preflight requests
    if (req.method === 'OPTIONS') {
      res.writeHead(204);
      res.end();
      return;
    }

    // Parse URL
    const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`);
    const path = url.pathname;

    if (path.startsWith('/_cluster')) {
      await this.clusterHandler.handle(req, res, path, url);
      this.recordRequest(path, res.statusCode);
      return;
    }

    // Health check endpoint (no auth required)
    if (path === '/health' || path === '/healthz') {
      await this.handleHealthCheck(res);
      this.recordRequest(path, res.statusCode);
      return;
    }

    // Metrics endpoint (no auth required)
    if (path === '/metrics') {
      await this.handleMetrics(res);
      this.recordRequest(path, res.statusCode);
      return;
    }

    if (!this.isRequestWithinRateLimit(req)) {
      this.sendError(res, 429, 'Rate limit exceeded', 'rate_limit_exceeded');
      this.recordRequest(path, res.statusCode);
      return;
    }

    // Route request
    try {
      await this.router.route(req, res, path);
    } catch (error) {
      logger.error(`Request error: ${error instanceof Error ? error.message : String(error)}`);
      this.sendError(res, 500, 'Internal server error', 'internal_error');
    }

    // Log request
    const duration = Date.now() - startTime;
    this.recordRequest(path, res.statusCode);
    logger.dim(`[${requestId}] ${req.method} ${path} - ${res.statusCode} (${duration}ms)`);
  }

  /**
   * Set CORS headers
   */
  private setCorsHeaders(
    req: http.IncomingMessage,
    res: http.ServerResponse,
  ): void {
    const origin = req.headers.origin || '*';
    const allowedOrigins = this.config.corsOrigins || ['*'];

    if (allowedOrigins.includes('*') || allowedOrigins.includes(origin)) {
      res.setHeader('Access-Control-Allow-Origin', origin);
    }

    res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
    res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
    res.setHeader('Access-Control-Max-Age', '86400');
  }

  /**
   * Handle health check
   */
  private async handleHealthCheck(res: http.ServerResponse): Promise<void> {
    try {
      const statuses = await this.containerManager.getAllStatus();
      const gpus = await this.gpuDetector.detectGpus();
      const models = await this.containerManager.getAllAvailableModels();

      const response: IHealthResponse = buildHealthSnapshot({
        statuses,
        modelCount: models.size,
        gpus,
        startTime: this.startTime,
        version: VERSION,
      });

      res.writeHead(response.status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' });
      res.end(JSON.stringify(response, null, 2));
    } catch (error) {
      res.writeHead(500, { 'Content-Type': 'application/json' });
      res.end(JSON.stringify({
        status: 'error',
        reasons: ['gpu_detection_failed'],
        error: error instanceof Error ? error.message : String(error),
      }));
    }
  }

  /**
   * Handle metrics endpoint (Prometheus format)
   */
  private async handleMetrics(res: http.ServerResponse): Promise<void> {
    try {
      const metrics: string[] = [];
      const timestamp = Date.now();

      // Server uptime
      const uptime = Math.floor((timestamp - this.startTime) / 1000);
      metrics.push(`# HELP modelgrid_uptime_seconds Server uptime in seconds`);
      metrics.push(`# TYPE modelgrid_uptime_seconds gauge`);
      metrics.push(`modelgrid_uptime_seconds ${uptime}`);

      // Container count
      const statuses = await this.containerManager.getAllStatus();
      metrics.push(`# HELP modelgrid_containers_total Total number of containers`);
      metrics.push(`# TYPE modelgrid_containers_total gauge`);
      metrics.push(`modelgrid_containers_total ${statuses.size}`);

      // Running containers
      const running = Array.from(statuses.values()).filter((s) => s.running).length;
      metrics.push(`# HELP modelgrid_containers_running Number of running containers`);
      metrics.push(`# TYPE modelgrid_containers_running gauge`);
      metrics.push(`modelgrid_containers_running ${running}`);

      // Available models
      const models = await this.containerManager.getAllAvailableModels();
      metrics.push(`# HELP modelgrid_models_available Number of available models`);
      metrics.push(`# TYPE modelgrid_models_available gauge`);
      metrics.push(`modelgrid_models_available ${models.size}`);

      // GPU count
      const gpus = await this.gpuDetector.detectGpus();
      metrics.push(`# HELP modelgrid_gpus_total Total number of GPUs`);
      metrics.push(`# TYPE modelgrid_gpus_total gauge`);
      metrics.push(`modelgrid_gpus_total ${gpus.length}`);

      for (const [path, count] of this.requestCounts.entries()) {
        metrics.push(`# HELP modelgrid_api_requests_total Total API requests by path`);
        metrics.push(`# TYPE modelgrid_api_requests_total counter`);
        metrics.push(`modelgrid_api_requests_total{path="${this.escapeMetricLabel(path)}"} ${count}`);
      }

      for (const [path, count] of this.authFailureCounts.entries()) {
        metrics.push(`# HELP modelgrid_api_auth_failures_total Total authentication failures by path`);
        metrics.push(`# TYPE modelgrid_api_auth_failures_total counter`);
        metrics.push(
          `modelgrid_api_auth_failures_total{path="${this.escapeMetricLabel(path)}"} ${count}`,
        );
      }

      for (const [path, count] of this.serverErrorCounts.entries()) {
        metrics.push(`# HELP modelgrid_api_server_errors_total Total 5xx responses by path`);
        metrics.push(`# TYPE modelgrid_api_server_errors_total counter`);
        metrics.push(
          `modelgrid_api_server_errors_total{path="${this.escapeMetricLabel(path)}"} ${count}`,
        );
      }

      res.writeHead(200, { 'Content-Type': 'text/plain; charset=utf-8' });
      res.end(metrics.join('\n') + '\n');
    } catch (error) {
      res.writeHead(500, { 'Content-Type': 'text/plain' });
      res.end(`# Error: ${error instanceof Error ? error.message : String(error)}\n`);
    }
  }

  /**
   * Send error response
   */
  private sendError(
    res: http.ServerResponse,
    statusCode: number,
    message: string,
    type: string,
  ): void {
    res.writeHead(statusCode, { 'Content-Type': 'application/json' });
    res.end(JSON.stringify({
      error: {
        message,
        type,
      },
    }));
  }

  /**
   * Get server info
   */
  public getInfo(): {
    running: boolean;
    host: string;
    port: number;
    uptime: number;
  } {
    return {
      running: !!this.server,
      host: this.config.host,
      port: this.config.port,
      uptime: this.startTime ? Math.floor((Date.now() - this.startTime) / 1000) : 0,
    };
  }

  private recordRequest(path: string, statusCode: number): void {
    this.incrementMetric(this.requestCounts, path);

    if (statusCode === 401) {
      this.incrementMetric(this.authFailureCounts, path);
    }

    if (statusCode >= 500) {
      this.incrementMetric(this.serverErrorCounts, path);
    }
  }

  private isRequestWithinRateLimit(req: http.IncomingMessage): boolean {
    const configuredLimit = this.config.rateLimit;
    if (!configuredLimit || configuredLimit <= 0) {
      return true;
    }

    const key = this.getRateLimitKey(req);
    const now = Date.now();
    const windowMs = 60 * 1000;
    const bucket = this.rateLimitBuckets.get(key);

    if (!bucket || now - bucket.windowStart >= windowMs) {
      this.rateLimitBuckets.set(key, { count: 1, windowStart: now });
      return true;
    }

    if (bucket.count >= configuredLimit) {
      return false;
    }

    bucket.count += 1;
    return true;
  }

  private getRateLimitKey(req: http.IncomingMessage): string {
    if (typeof req.headers.authorization === 'string') {
      const match = req.headers.authorization.match(/^Bearer\s+(.+)$/i);
      if (match) {
        return `api_key:${match[1]}`;
      }
    }

    return `ip:${req.socket.remoteAddress || 'unknown'}`;
  }

  private incrementMetric(metric: Map<string, number>, path: string): void {
    metric.set(path, (metric.get(path) || 0) + 1);
  }

  private ensureRequestId(req: http.IncomingMessage, res: http.ServerResponse): string {
    const existing = typeof req.headers['x-request-id'] === 'string'
      ? req.headers['x-request-id']
      : undefined;
    const requestId = existing || this.generateRequestId();
    req.headers['x-request-id'] = requestId;
    res.setHeader('X-Request-Id', requestId);
    return requestId;
  }

  private generateRequestId(): string {
    return `req-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
  }

  private escapeMetricLabel(value: string): string {
    return value.replaceAll('\\', '\\\\').replaceAll('"', '\\"');
  }
}