Files
modelgrid/ts/api/server.ts
T

405 lines
12 KiB
TypeScript

/**
* API Server
*
* HTTP server for the OpenAI-compatible API gateway.
*/
import * as http from 'node:http';
import type { IApiConfig } from '../interfaces/config.ts';
import type { IHealthResponse } from '../interfaces/api.ts';
import { ClusterCoordinator } from '../cluster/coordinator.ts';
import { logger } from '../logger.ts';
import { VERSION } from '../constants.ts';
import { ApiRouter } from './router.ts';
import { ContainerManager } from '../containers/container-manager.ts';
import { ModelRegistry } from '../models/registry.ts';
import { ModelLoader } from '../models/loader.ts';
import { GpuDetector } from '../hardware/gpu-detector.ts';
import { ClusterHandler } from './handlers/cluster.ts';
import { buildHealthSnapshot } from '../helpers/health.ts';
interface IApiServerOptions {
gpuDetector?: GpuDetector;
router?: ApiRouter;
clusterHandler?: ClusterHandler;
}
/**
* API Server for ModelGrid
*/
export class ApiServer {
private server?: http.Server;
private config: IApiConfig;
private router: ApiRouter;
private containerManager: ContainerManager;
private modelRegistry: ModelRegistry;
private modelLoader: ModelLoader;
private gpuDetector: GpuDetector;
private clusterCoordinator: ClusterCoordinator;
private clusterHandler: ClusterHandler;
private startTime: number = 0;
private requestCounts = new Map<string, number>();
private authFailureCounts = new Map<string, number>();
private serverErrorCounts = new Map<string, number>();
private rateLimitBuckets = new Map<string, { count: number; windowStart: number }>();
constructor(
config: IApiConfig,
containerManager: ContainerManager,
modelRegistry: ModelRegistry,
modelLoader: ModelLoader,
clusterCoordinator: ClusterCoordinator,
options: IApiServerOptions = {},
) {
this.config = config;
this.containerManager = containerManager;
this.modelRegistry = modelRegistry;
this.gpuDetector = options.gpuDetector || new GpuDetector();
this.modelLoader = modelLoader;
this.clusterCoordinator = clusterCoordinator;
this.clusterHandler = options.clusterHandler || new ClusterHandler(clusterCoordinator);
this.router = options.router || new ApiRouter(
containerManager,
modelRegistry,
this.modelLoader,
clusterCoordinator,
config.apiKeys,
);
}
/**
* Start the API server
*/
public async start(): Promise<void> {
if (this.server) {
logger.warn('API server is already running');
return;
}
this.startTime = Date.now();
this.server = http.createServer(async (req, res) => {
await this.handleRequest(req, res);
});
return new Promise((resolve, reject) => {
this.server!.listen(this.config.port, this.config.host, () => {
logger.success(`API server started on ${this.config.host}:${this.config.port}`);
logger.info('OpenAI-compatible API available at:');
logger.info(` POST /v1/chat/completions`);
logger.info(` GET /v1/models`);
logger.info(` POST /v1/embeddings`);
resolve();
});
this.server!.on('error', (error) => {
logger.error(`API server error: ${error.message}`);
reject(error);
});
});
}
/**
* Stop the API server
*/
public async stop(): Promise<void> {
if (!this.server) {
return;
}
return new Promise((resolve) => {
this.server!.close(() => {
logger.log('API server stopped');
this.server = undefined;
resolve();
});
});
}
/**
* Handle incoming HTTP request
*/
private async handleRequest(
req: http.IncomingMessage,
res: http.ServerResponse,
): Promise<void> {
const startTime = Date.now();
const requestId = this.ensureRequestId(req, res);
// Set CORS headers if enabled
if (this.config.cors) {
this.setCorsHeaders(req, res);
}
// Handle preflight requests
if (req.method === 'OPTIONS') {
res.writeHead(204);
res.end();
return;
}
// Parse URL
const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`);
const path = url.pathname;
if (path.startsWith('/_cluster')) {
await this.clusterHandler.handle(req, res, path, url);
this.recordRequest(path, res.statusCode);
return;
}
// Health check endpoint (no auth required)
if (path === '/health' || path === '/healthz') {
await this.handleHealthCheck(res);
this.recordRequest(path, res.statusCode);
return;
}
// Metrics endpoint (no auth required)
if (path === '/metrics') {
await this.handleMetrics(res);
this.recordRequest(path, res.statusCode);
return;
}
if (!this.isRequestWithinRateLimit(req)) {
this.sendError(res, 429, 'Rate limit exceeded', 'rate_limit_exceeded');
this.recordRequest(path, res.statusCode);
return;
}
// Route request
try {
await this.router.route(req, res, path);
} catch (error) {
logger.error(`Request error: ${error instanceof Error ? error.message : String(error)}`);
this.sendError(res, 500, 'Internal server error', 'internal_error');
}
// Log request
const duration = Date.now() - startTime;
this.recordRequest(path, res.statusCode);
logger.dim(`[${requestId}] ${req.method} ${path} - ${res.statusCode} (${duration}ms)`);
}
/**
* Set CORS headers
*/
private setCorsHeaders(
req: http.IncomingMessage,
res: http.ServerResponse,
): void {
const origin = req.headers.origin || '*';
const allowedOrigins = this.config.corsOrigins || ['*'];
if (allowedOrigins.includes('*') || allowedOrigins.includes(origin)) {
res.setHeader('Access-Control-Allow-Origin', origin);
}
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
res.setHeader('Access-Control-Max-Age', '86400');
}
/**
* Handle health check
*/
private async handleHealthCheck(res: http.ServerResponse): Promise<void> {
try {
const statuses = await this.containerManager.getAllStatus();
const gpus = await this.gpuDetector.detectGpus();
const models = await this.containerManager.getAllAvailableModels();
const response: IHealthResponse = buildHealthSnapshot({
statuses,
modelCount: models.size,
gpus,
startTime: this.startTime,
version: VERSION,
});
res.writeHead(response.status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(response, null, 2));
} catch (error) {
res.writeHead(500, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({
status: 'error',
reasons: ['gpu_detection_failed'],
error: error instanceof Error ? error.message : String(error),
}));
}
}
/**
* Handle metrics endpoint (Prometheus format)
*/
private async handleMetrics(res: http.ServerResponse): Promise<void> {
try {
const metrics: string[] = [];
const timestamp = Date.now();
// Server uptime
const uptime = Math.floor((timestamp - this.startTime) / 1000);
metrics.push(`# HELP modelgrid_uptime_seconds Server uptime in seconds`);
metrics.push(`# TYPE modelgrid_uptime_seconds gauge`);
metrics.push(`modelgrid_uptime_seconds ${uptime}`);
// Container count
const statuses = await this.containerManager.getAllStatus();
metrics.push(`# HELP modelgrid_containers_total Total number of containers`);
metrics.push(`# TYPE modelgrid_containers_total gauge`);
metrics.push(`modelgrid_containers_total ${statuses.size}`);
// Running containers
const running = Array.from(statuses.values()).filter((s) => s.running).length;
metrics.push(`# HELP modelgrid_containers_running Number of running containers`);
metrics.push(`# TYPE modelgrid_containers_running gauge`);
metrics.push(`modelgrid_containers_running ${running}`);
// Available models
const models = await this.containerManager.getAllAvailableModels();
metrics.push(`# HELP modelgrid_models_available Number of available models`);
metrics.push(`# TYPE modelgrid_models_available gauge`);
metrics.push(`modelgrid_models_available ${models.size}`);
// GPU count
const gpus = await this.gpuDetector.detectGpus();
metrics.push(`# HELP modelgrid_gpus_total Total number of GPUs`);
metrics.push(`# TYPE modelgrid_gpus_total gauge`);
metrics.push(`modelgrid_gpus_total ${gpus.length}`);
for (const [path, count] of this.requestCounts.entries()) {
metrics.push(`# HELP modelgrid_api_requests_total Total API requests by path`);
metrics.push(`# TYPE modelgrid_api_requests_total counter`);
metrics.push(`modelgrid_api_requests_total{path="${this.escapeMetricLabel(path)}"} ${count}`);
}
for (const [path, count] of this.authFailureCounts.entries()) {
metrics.push(`# HELP modelgrid_api_auth_failures_total Total authentication failures by path`);
metrics.push(`# TYPE modelgrid_api_auth_failures_total counter`);
metrics.push(
`modelgrid_api_auth_failures_total{path="${this.escapeMetricLabel(path)}"} ${count}`,
);
}
for (const [path, count] of this.serverErrorCounts.entries()) {
metrics.push(`# HELP modelgrid_api_server_errors_total Total 5xx responses by path`);
metrics.push(`# TYPE modelgrid_api_server_errors_total counter`);
metrics.push(
`modelgrid_api_server_errors_total{path="${this.escapeMetricLabel(path)}"} ${count}`,
);
}
res.writeHead(200, { 'Content-Type': 'text/plain; charset=utf-8' });
res.end(metrics.join('\n') + '\n');
} catch (error) {
res.writeHead(500, { 'Content-Type': 'text/plain' });
res.end(`# Error: ${error instanceof Error ? error.message : String(error)}\n`);
}
}
/**
* Send error response
*/
private sendError(
res: http.ServerResponse,
statusCode: number,
message: string,
type: string,
): void {
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({
error: {
message,
type,
},
}));
}
/**
* Get server info
*/
public getInfo(): {
running: boolean;
host: string;
port: number;
uptime: number;
} {
return {
running: !!this.server,
host: this.config.host,
port: this.config.port,
uptime: this.startTime ? Math.floor((Date.now() - this.startTime) / 1000) : 0,
};
}
private recordRequest(path: string, statusCode: number): void {
this.incrementMetric(this.requestCounts, path);
if (statusCode === 401) {
this.incrementMetric(this.authFailureCounts, path);
}
if (statusCode >= 500) {
this.incrementMetric(this.serverErrorCounts, path);
}
}
private isRequestWithinRateLimit(req: http.IncomingMessage): boolean {
const configuredLimit = this.config.rateLimit;
if (!configuredLimit || configuredLimit <= 0) {
return true;
}
const key = this.getRateLimitKey(req);
const now = Date.now();
const windowMs = 60 * 1000;
const bucket = this.rateLimitBuckets.get(key);
if (!bucket || now - bucket.windowStart >= windowMs) {
this.rateLimitBuckets.set(key, { count: 1, windowStart: now });
return true;
}
if (bucket.count >= configuredLimit) {
return false;
}
bucket.count += 1;
return true;
}
private getRateLimitKey(req: http.IncomingMessage): string {
if (typeof req.headers.authorization === 'string') {
const match = req.headers.authorization.match(/^Bearer\s+(.+)$/i);
if (match) {
return `api_key:${match[1]}`;
}
}
return `ip:${req.socket.remoteAddress || 'unknown'}`;
}
private incrementMetric(metric: Map<string, number>, path: string): void {
metric.set(path, (metric.get(path) || 0) + 1);
}
private ensureRequestId(req: http.IncomingMessage, res: http.ServerResponse): string {
const existing = typeof req.headers['x-request-id'] === 'string'
? req.headers['x-request-id']
: undefined;
const requestId = existing || this.generateRequestId();
req.headers['x-request-id'] = requestId;
res.setHeader('X-Request-Id', requestId);
return requestId;
}
private generateRequestId(): string {
return `req-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
}
private escapeMetricLabel(value: string): string {
return value.replaceAll('\\', '\\\\').replaceAll('"', '\\"');
}
}