/** * API Server * * HTTP server for the OpenAI-compatible API gateway. */ import * as http from 'node:http'; import type { IApiConfig } from '../interfaces/config.ts'; import type { IHealthResponse } from '../interfaces/api.ts'; import { logger } from '../logger.ts'; import { API_SERVER } from '../constants.ts'; import { ApiRouter } from './router.ts'; import { ContainerManager } from '../containers/container-manager.ts'; import { ModelRegistry } from '../models/registry.ts'; import { ModelLoader } from '../models/loader.ts'; import { GpuDetector } from '../hardware/gpu-detector.ts'; /** * API Server for ModelGrid */ export class ApiServer { private server?: http.Server; private config: IApiConfig; private router: ApiRouter; private containerManager: ContainerManager; private modelRegistry: ModelRegistry; private modelLoader: ModelLoader; private gpuDetector: GpuDetector; private startTime: number = 0; constructor( config: IApiConfig, containerManager: ContainerManager, modelRegistry: ModelRegistry, ) { this.config = config; this.containerManager = containerManager; this.modelRegistry = modelRegistry; this.gpuDetector = new GpuDetector(); this.modelLoader = new ModelLoader(modelRegistry, containerManager, true); this.router = new ApiRouter( containerManager, modelRegistry, this.modelLoader, config.apiKeys, ); } /** * Start the API server */ public async start(): Promise { if (this.server) { logger.warn('API server is already running'); return; } this.startTime = Date.now(); this.server = http.createServer(async (req, res) => { await this.handleRequest(req, res); }); return new Promise((resolve, reject) => { this.server!.listen(this.config.port, this.config.host, () => { logger.success(`API server started on ${this.config.host}:${this.config.port}`); logger.info('OpenAI-compatible API available at:'); logger.info(` POST /v1/chat/completions`); logger.info(` GET /v1/models`); logger.info(` POST /v1/embeddings`); resolve(); }); this.server!.on('error', (error) => { logger.error(`API server error: ${error.message}`); reject(error); }); }); } /** * Stop the API server */ public async stop(): Promise { if (!this.server) { return; } return new Promise((resolve) => { this.server!.close(() => { logger.log('API server stopped'); this.server = undefined; resolve(); }); }); } /** * Handle incoming HTTP request */ private async handleRequest( req: http.IncomingMessage, res: http.ServerResponse, ): Promise { const startTime = Date.now(); // Set CORS headers if enabled if (this.config.cors) { this.setCorsHeaders(req, res); } // Handle preflight requests if (req.method === 'OPTIONS') { res.writeHead(204); res.end(); return; } // Parse URL const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`); const path = url.pathname; // Health check endpoint (no auth required) if (path === '/health' || path === '/healthz') { await this.handleHealthCheck(res); return; } // Metrics endpoint (no auth required) if (path === '/metrics') { await this.handleMetrics(res); return; } // Route request try { await this.router.route(req, res, path); } catch (error) { logger.error(`Request error: ${error instanceof Error ? error.message : String(error)}`); this.sendError(res, 500, 'Internal server error', 'internal_error'); } // Log request const duration = Date.now() - startTime; logger.dim(`${req.method} ${path} - ${res.statusCode} (${duration}ms)`); } /** * Set CORS headers */ private setCorsHeaders( req: http.IncomingMessage, res: http.ServerResponse, ): void { const origin = req.headers.origin || '*'; const allowedOrigins = this.config.corsOrigins || ['*']; if (allowedOrigins.includes('*') || allowedOrigins.includes(origin)) { res.setHeader('Access-Control-Allow-Origin', origin); } res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS'); res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization'); res.setHeader('Access-Control-Max-Age', '86400'); } /** * Handle health check */ private async handleHealthCheck(res: http.ServerResponse): Promise { try { const statuses = await this.containerManager.getAllStatus(); const gpus = await this.gpuDetector.detectGpus(); const models = await this.containerManager.getAllAvailableModels(); let status: 'ok' | 'degraded' | 'error' = 'ok'; const containerHealth: Record = {}; const gpuStatus: Record = {}; // Check container health for (const [id, containerStatus] of statuses) { if (containerStatus.running && containerStatus.health === 'healthy') { containerHealth[id] = 'healthy'; } else { containerHealth[id] = 'unhealthy'; status = 'degraded'; } } // Check GPU status for (const gpu of gpus) { gpuStatus[gpu.id] = 'available'; } const response: IHealthResponse = { status, version: '1.0.0', // TODO: Get from config uptime: Math.floor((Date.now() - this.startTime) / 1000), containers: statuses.size, models: models.size, gpus: gpus.length, details: { containers: containerHealth, gpus: gpuStatus, }, }; res.writeHead(status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' }); res.end(JSON.stringify(response, null, 2)); } catch (error) { res.writeHead(500, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ status: 'error', error: error instanceof Error ? error.message : String(error), })); } } /** * Handle metrics endpoint (Prometheus format) */ private async handleMetrics(res: http.ServerResponse): Promise { try { const metrics: string[] = []; const timestamp = Date.now(); // Server uptime const uptime = Math.floor((timestamp - this.startTime) / 1000); metrics.push(`# HELP modelgrid_uptime_seconds Server uptime in seconds`); metrics.push(`# TYPE modelgrid_uptime_seconds gauge`); metrics.push(`modelgrid_uptime_seconds ${uptime}`); // Container count const statuses = await this.containerManager.getAllStatus(); metrics.push(`# HELP modelgrid_containers_total Total number of containers`); metrics.push(`# TYPE modelgrid_containers_total gauge`); metrics.push(`modelgrid_containers_total ${statuses.size}`); // Running containers const running = Array.from(statuses.values()).filter((s) => s.running).length; metrics.push(`# HELP modelgrid_containers_running Number of running containers`); metrics.push(`# TYPE modelgrid_containers_running gauge`); metrics.push(`modelgrid_containers_running ${running}`); // Available models const models = await this.containerManager.getAllAvailableModels(); metrics.push(`# HELP modelgrid_models_available Number of available models`); metrics.push(`# TYPE modelgrid_models_available gauge`); metrics.push(`modelgrid_models_available ${models.size}`); // GPU count const gpus = await this.gpuDetector.detectGpus(); metrics.push(`# HELP modelgrid_gpus_total Total number of GPUs`); metrics.push(`# TYPE modelgrid_gpus_total gauge`); metrics.push(`modelgrid_gpus_total ${gpus.length}`); res.writeHead(200, { 'Content-Type': 'text/plain; charset=utf-8' }); res.end(metrics.join('\n') + '\n'); } catch (error) { res.writeHead(500, { 'Content-Type': 'text/plain' }); res.end(`# Error: ${error instanceof Error ? error.message : String(error)}\n`); } } /** * Send error response */ private sendError( res: http.ServerResponse, statusCode: number, message: string, type: string, ): void { res.writeHead(statusCode, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ error: { message, type, code: null, }, })); } /** * Get server info */ public getInfo(): { running: boolean; host: string; port: number; uptime: number; } { return { running: !!this.server, host: this.config.host, port: this.config.port, uptime: this.startTime ? Math.floor((Date.now() - this.startTime) / 1000) : 0, }; } }