Files
modelgrid/ts/api/server.ts
Juergen Kunz daaf6559e3
Some checks failed
CI / Type Check & Lint (push) Failing after 5s
CI / Build Test (Current Platform) (push) Failing after 5s
CI / Build All Platforms (push) Successful in 49s
initial
2026-01-30 03:16:57 +00:00

301 lines
8.7 KiB
TypeScript

/**
* API Server
*
* HTTP server for the OpenAI-compatible API gateway.
*/
import * as http from 'node:http';
import type { IApiConfig } from '../interfaces/config.ts';
import type { IHealthResponse } from '../interfaces/api.ts';
import { logger } from '../logger.ts';
import { API_SERVER } from '../constants.ts';
import { ApiRouter } from './router.ts';
import { ContainerManager } from '../containers/container-manager.ts';
import { ModelRegistry } from '../models/registry.ts';
import { ModelLoader } from '../models/loader.ts';
import { GpuDetector } from '../hardware/gpu-detector.ts';
/**
* API Server for ModelGrid
*/
export class ApiServer {
private server?: http.Server;
private config: IApiConfig;
private router: ApiRouter;
private containerManager: ContainerManager;
private modelRegistry: ModelRegistry;
private modelLoader: ModelLoader;
private gpuDetector: GpuDetector;
private startTime: number = 0;
constructor(
config: IApiConfig,
containerManager: ContainerManager,
modelRegistry: ModelRegistry,
) {
this.config = config;
this.containerManager = containerManager;
this.modelRegistry = modelRegistry;
this.gpuDetector = new GpuDetector();
this.modelLoader = new ModelLoader(modelRegistry, containerManager, true);
this.router = new ApiRouter(
containerManager,
modelRegistry,
this.modelLoader,
config.apiKeys,
);
}
/**
* Start the API server
*/
public async start(): Promise<void> {
if (this.server) {
logger.warn('API server is already running');
return;
}
this.startTime = Date.now();
this.server = http.createServer(async (req, res) => {
await this.handleRequest(req, res);
});
return new Promise((resolve, reject) => {
this.server!.listen(this.config.port, this.config.host, () => {
logger.success(`API server started on ${this.config.host}:${this.config.port}`);
logger.info('OpenAI-compatible API available at:');
logger.info(` POST /v1/chat/completions`);
logger.info(` GET /v1/models`);
logger.info(` POST /v1/embeddings`);
resolve();
});
this.server!.on('error', (error) => {
logger.error(`API server error: ${error.message}`);
reject(error);
});
});
}
/**
* Stop the API server
*/
public async stop(): Promise<void> {
if (!this.server) {
return;
}
return new Promise((resolve) => {
this.server!.close(() => {
logger.log('API server stopped');
this.server = undefined;
resolve();
});
});
}
/**
* Handle incoming HTTP request
*/
private async handleRequest(
req: http.IncomingMessage,
res: http.ServerResponse,
): Promise<void> {
const startTime = Date.now();
// Set CORS headers if enabled
if (this.config.cors) {
this.setCorsHeaders(req, res);
}
// Handle preflight requests
if (req.method === 'OPTIONS') {
res.writeHead(204);
res.end();
return;
}
// Parse URL
const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`);
const path = url.pathname;
// Health check endpoint (no auth required)
if (path === '/health' || path === '/healthz') {
await this.handleHealthCheck(res);
return;
}
// Metrics endpoint (no auth required)
if (path === '/metrics') {
await this.handleMetrics(res);
return;
}
// Route request
try {
await this.router.route(req, res, path);
} catch (error) {
logger.error(`Request error: ${error instanceof Error ? error.message : String(error)}`);
this.sendError(res, 500, 'Internal server error', 'internal_error');
}
// Log request
const duration = Date.now() - startTime;
logger.dim(`${req.method} ${path} - ${res.statusCode} (${duration}ms)`);
}
/**
* Set CORS headers
*/
private setCorsHeaders(
req: http.IncomingMessage,
res: http.ServerResponse,
): void {
const origin = req.headers.origin || '*';
const allowedOrigins = this.config.corsOrigins || ['*'];
if (allowedOrigins.includes('*') || allowedOrigins.includes(origin)) {
res.setHeader('Access-Control-Allow-Origin', origin);
}
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
res.setHeader('Access-Control-Max-Age', '86400');
}
/**
* Handle health check
*/
private async handleHealthCheck(res: http.ServerResponse): Promise<void> {
try {
const statuses = await this.containerManager.getAllStatus();
const gpus = await this.gpuDetector.detectGpus();
const models = await this.containerManager.getAllAvailableModels();
let status: 'ok' | 'degraded' | 'error' = 'ok';
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
// Check container health
for (const [id, containerStatus] of statuses) {
if (containerStatus.running && containerStatus.health === 'healthy') {
containerHealth[id] = 'healthy';
} else {
containerHealth[id] = 'unhealthy';
status = 'degraded';
}
}
// Check GPU status
for (const gpu of gpus) {
gpuStatus[gpu.id] = 'available';
}
const response: IHealthResponse = {
status,
version: '1.0.0', // TODO: Get from config
uptime: Math.floor((Date.now() - this.startTime) / 1000),
containers: statuses.size,
models: models.size,
gpus: gpus.length,
details: {
containers: containerHealth,
gpus: gpuStatus,
},
};
res.writeHead(status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(response, null, 2));
} catch (error) {
res.writeHead(500, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({
status: 'error',
error: error instanceof Error ? error.message : String(error),
}));
}
}
/**
* Handle metrics endpoint (Prometheus format)
*/
private async handleMetrics(res: http.ServerResponse): Promise<void> {
try {
const metrics: string[] = [];
const timestamp = Date.now();
// Server uptime
const uptime = Math.floor((timestamp - this.startTime) / 1000);
metrics.push(`# HELP modelgrid_uptime_seconds Server uptime in seconds`);
metrics.push(`# TYPE modelgrid_uptime_seconds gauge`);
metrics.push(`modelgrid_uptime_seconds ${uptime}`);
// Container count
const statuses = await this.containerManager.getAllStatus();
metrics.push(`# HELP modelgrid_containers_total Total number of containers`);
metrics.push(`# TYPE modelgrid_containers_total gauge`);
metrics.push(`modelgrid_containers_total ${statuses.size}`);
// Running containers
const running = Array.from(statuses.values()).filter((s) => s.running).length;
metrics.push(`# HELP modelgrid_containers_running Number of running containers`);
metrics.push(`# TYPE modelgrid_containers_running gauge`);
metrics.push(`modelgrid_containers_running ${running}`);
// Available models
const models = await this.containerManager.getAllAvailableModels();
metrics.push(`# HELP modelgrid_models_available Number of available models`);
metrics.push(`# TYPE modelgrid_models_available gauge`);
metrics.push(`modelgrid_models_available ${models.size}`);
// GPU count
const gpus = await this.gpuDetector.detectGpus();
metrics.push(`# HELP modelgrid_gpus_total Total number of GPUs`);
metrics.push(`# TYPE modelgrid_gpus_total gauge`);
metrics.push(`modelgrid_gpus_total ${gpus.length}`);
res.writeHead(200, { 'Content-Type': 'text/plain; charset=utf-8' });
res.end(metrics.join('\n') + '\n');
} catch (error) {
res.writeHead(500, { 'Content-Type': 'text/plain' });
res.end(`# Error: ${error instanceof Error ? error.message : String(error)}\n`);
}
}
/**
* Send error response
*/
private sendError(
res: http.ServerResponse,
statusCode: number,
message: string,
type: string,
): void {
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({
error: {
message,
type,
code: null,
},
}));
}
/**
* Get server info
*/
public getInfo(): {
running: boolean;
host: string;
port: number;
uptime: number;
} {
return {
running: !!this.server,
host: this.config.host,
port: this.config.port,
uptime: this.startTime ? Math.floor((Date.now() - this.startTime) / 1000) : 0,
};
}
}