301 lines
8.7 KiB
TypeScript
301 lines
8.7 KiB
TypeScript
|
|
/**
|
||
|
|
* API Server
|
||
|
|
*
|
||
|
|
* HTTP server for the OpenAI-compatible API gateway.
|
||
|
|
*/
|
||
|
|
|
||
|
|
import * as http from 'node:http';
|
||
|
|
import type { IApiConfig } from '../interfaces/config.ts';
|
||
|
|
import type { IHealthResponse } from '../interfaces/api.ts';
|
||
|
|
import { logger } from '../logger.ts';
|
||
|
|
import { API_SERVER } from '../constants.ts';
|
||
|
|
import { ApiRouter } from './router.ts';
|
||
|
|
import { ContainerManager } from '../containers/container-manager.ts';
|
||
|
|
import { ModelRegistry } from '../models/registry.ts';
|
||
|
|
import { ModelLoader } from '../models/loader.ts';
|
||
|
|
import { GpuDetector } from '../hardware/gpu-detector.ts';
|
||
|
|
|
||
|
|
/**
|
||
|
|
* API Server for ModelGrid
|
||
|
|
*/
|
||
|
|
export class ApiServer {
|
||
|
|
private server?: http.Server;
|
||
|
|
private config: IApiConfig;
|
||
|
|
private router: ApiRouter;
|
||
|
|
private containerManager: ContainerManager;
|
||
|
|
private modelRegistry: ModelRegistry;
|
||
|
|
private modelLoader: ModelLoader;
|
||
|
|
private gpuDetector: GpuDetector;
|
||
|
|
private startTime: number = 0;
|
||
|
|
|
||
|
|
constructor(
|
||
|
|
config: IApiConfig,
|
||
|
|
containerManager: ContainerManager,
|
||
|
|
modelRegistry: ModelRegistry,
|
||
|
|
) {
|
||
|
|
this.config = config;
|
||
|
|
this.containerManager = containerManager;
|
||
|
|
this.modelRegistry = modelRegistry;
|
||
|
|
this.gpuDetector = new GpuDetector();
|
||
|
|
this.modelLoader = new ModelLoader(modelRegistry, containerManager, true);
|
||
|
|
this.router = new ApiRouter(
|
||
|
|
containerManager,
|
||
|
|
modelRegistry,
|
||
|
|
this.modelLoader,
|
||
|
|
config.apiKeys,
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Start the API server
|
||
|
|
*/
|
||
|
|
public async start(): Promise<void> {
|
||
|
|
if (this.server) {
|
||
|
|
logger.warn('API server is already running');
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
this.startTime = Date.now();
|
||
|
|
|
||
|
|
this.server = http.createServer(async (req, res) => {
|
||
|
|
await this.handleRequest(req, res);
|
||
|
|
});
|
||
|
|
|
||
|
|
return new Promise((resolve, reject) => {
|
||
|
|
this.server!.listen(this.config.port, this.config.host, () => {
|
||
|
|
logger.success(`API server started on ${this.config.host}:${this.config.port}`);
|
||
|
|
logger.info('OpenAI-compatible API available at:');
|
||
|
|
logger.info(` POST /v1/chat/completions`);
|
||
|
|
logger.info(` GET /v1/models`);
|
||
|
|
logger.info(` POST /v1/embeddings`);
|
||
|
|
resolve();
|
||
|
|
});
|
||
|
|
|
||
|
|
this.server!.on('error', (error) => {
|
||
|
|
logger.error(`API server error: ${error.message}`);
|
||
|
|
reject(error);
|
||
|
|
});
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Stop the API server
|
||
|
|
*/
|
||
|
|
public async stop(): Promise<void> {
|
||
|
|
if (!this.server) {
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
return new Promise((resolve) => {
|
||
|
|
this.server!.close(() => {
|
||
|
|
logger.log('API server stopped');
|
||
|
|
this.server = undefined;
|
||
|
|
resolve();
|
||
|
|
});
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Handle incoming HTTP request
|
||
|
|
*/
|
||
|
|
private async handleRequest(
|
||
|
|
req: http.IncomingMessage,
|
||
|
|
res: http.ServerResponse,
|
||
|
|
): Promise<void> {
|
||
|
|
const startTime = Date.now();
|
||
|
|
|
||
|
|
// Set CORS headers if enabled
|
||
|
|
if (this.config.cors) {
|
||
|
|
this.setCorsHeaders(req, res);
|
||
|
|
}
|
||
|
|
|
||
|
|
// Handle preflight requests
|
||
|
|
if (req.method === 'OPTIONS') {
|
||
|
|
res.writeHead(204);
|
||
|
|
res.end();
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Parse URL
|
||
|
|
const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`);
|
||
|
|
const path = url.pathname;
|
||
|
|
|
||
|
|
// Health check endpoint (no auth required)
|
||
|
|
if (path === '/health' || path === '/healthz') {
|
||
|
|
await this.handleHealthCheck(res);
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Metrics endpoint (no auth required)
|
||
|
|
if (path === '/metrics') {
|
||
|
|
await this.handleMetrics(res);
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Route request
|
||
|
|
try {
|
||
|
|
await this.router.route(req, res, path);
|
||
|
|
} catch (error) {
|
||
|
|
logger.error(`Request error: ${error instanceof Error ? error.message : String(error)}`);
|
||
|
|
this.sendError(res, 500, 'Internal server error', 'internal_error');
|
||
|
|
}
|
||
|
|
|
||
|
|
// Log request
|
||
|
|
const duration = Date.now() - startTime;
|
||
|
|
logger.dim(`${req.method} ${path} - ${res.statusCode} (${duration}ms)`);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Set CORS headers
|
||
|
|
*/
|
||
|
|
private setCorsHeaders(
|
||
|
|
req: http.IncomingMessage,
|
||
|
|
res: http.ServerResponse,
|
||
|
|
): void {
|
||
|
|
const origin = req.headers.origin || '*';
|
||
|
|
const allowedOrigins = this.config.corsOrigins || ['*'];
|
||
|
|
|
||
|
|
if (allowedOrigins.includes('*') || allowedOrigins.includes(origin)) {
|
||
|
|
res.setHeader('Access-Control-Allow-Origin', origin);
|
||
|
|
}
|
||
|
|
|
||
|
|
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
|
||
|
|
res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
|
||
|
|
res.setHeader('Access-Control-Max-Age', '86400');
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Handle health check
|
||
|
|
*/
|
||
|
|
private async handleHealthCheck(res: http.ServerResponse): Promise<void> {
|
||
|
|
try {
|
||
|
|
const statuses = await this.containerManager.getAllStatus();
|
||
|
|
const gpus = await this.gpuDetector.detectGpus();
|
||
|
|
const models = await this.containerManager.getAllAvailableModels();
|
||
|
|
|
||
|
|
let status: 'ok' | 'degraded' | 'error' = 'ok';
|
||
|
|
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
|
||
|
|
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
|
||
|
|
|
||
|
|
// Check container health
|
||
|
|
for (const [id, containerStatus] of statuses) {
|
||
|
|
if (containerStatus.running && containerStatus.health === 'healthy') {
|
||
|
|
containerHealth[id] = 'healthy';
|
||
|
|
} else {
|
||
|
|
containerHealth[id] = 'unhealthy';
|
||
|
|
status = 'degraded';
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Check GPU status
|
||
|
|
for (const gpu of gpus) {
|
||
|
|
gpuStatus[gpu.id] = 'available';
|
||
|
|
}
|
||
|
|
|
||
|
|
const response: IHealthResponse = {
|
||
|
|
status,
|
||
|
|
version: '1.0.0', // TODO: Get from config
|
||
|
|
uptime: Math.floor((Date.now() - this.startTime) / 1000),
|
||
|
|
containers: statuses.size,
|
||
|
|
models: models.size,
|
||
|
|
gpus: gpus.length,
|
||
|
|
details: {
|
||
|
|
containers: containerHealth,
|
||
|
|
gpus: gpuStatus,
|
||
|
|
},
|
||
|
|
};
|
||
|
|
|
||
|
|
res.writeHead(status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' });
|
||
|
|
res.end(JSON.stringify(response, null, 2));
|
||
|
|
} catch (error) {
|
||
|
|
res.writeHead(500, { 'Content-Type': 'application/json' });
|
||
|
|
res.end(JSON.stringify({
|
||
|
|
status: 'error',
|
||
|
|
error: error instanceof Error ? error.message : String(error),
|
||
|
|
}));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Handle metrics endpoint (Prometheus format)
|
||
|
|
*/
|
||
|
|
private async handleMetrics(res: http.ServerResponse): Promise<void> {
|
||
|
|
try {
|
||
|
|
const metrics: string[] = [];
|
||
|
|
const timestamp = Date.now();
|
||
|
|
|
||
|
|
// Server uptime
|
||
|
|
const uptime = Math.floor((timestamp - this.startTime) / 1000);
|
||
|
|
metrics.push(`# HELP modelgrid_uptime_seconds Server uptime in seconds`);
|
||
|
|
metrics.push(`# TYPE modelgrid_uptime_seconds gauge`);
|
||
|
|
metrics.push(`modelgrid_uptime_seconds ${uptime}`);
|
||
|
|
|
||
|
|
// Container count
|
||
|
|
const statuses = await this.containerManager.getAllStatus();
|
||
|
|
metrics.push(`# HELP modelgrid_containers_total Total number of containers`);
|
||
|
|
metrics.push(`# TYPE modelgrid_containers_total gauge`);
|
||
|
|
metrics.push(`modelgrid_containers_total ${statuses.size}`);
|
||
|
|
|
||
|
|
// Running containers
|
||
|
|
const running = Array.from(statuses.values()).filter((s) => s.running).length;
|
||
|
|
metrics.push(`# HELP modelgrid_containers_running Number of running containers`);
|
||
|
|
metrics.push(`# TYPE modelgrid_containers_running gauge`);
|
||
|
|
metrics.push(`modelgrid_containers_running ${running}`);
|
||
|
|
|
||
|
|
// Available models
|
||
|
|
const models = await this.containerManager.getAllAvailableModels();
|
||
|
|
metrics.push(`# HELP modelgrid_models_available Number of available models`);
|
||
|
|
metrics.push(`# TYPE modelgrid_models_available gauge`);
|
||
|
|
metrics.push(`modelgrid_models_available ${models.size}`);
|
||
|
|
|
||
|
|
// GPU count
|
||
|
|
const gpus = await this.gpuDetector.detectGpus();
|
||
|
|
metrics.push(`# HELP modelgrid_gpus_total Total number of GPUs`);
|
||
|
|
metrics.push(`# TYPE modelgrid_gpus_total gauge`);
|
||
|
|
metrics.push(`modelgrid_gpus_total ${gpus.length}`);
|
||
|
|
|
||
|
|
res.writeHead(200, { 'Content-Type': 'text/plain; charset=utf-8' });
|
||
|
|
res.end(metrics.join('\n') + '\n');
|
||
|
|
} catch (error) {
|
||
|
|
res.writeHead(500, { 'Content-Type': 'text/plain' });
|
||
|
|
res.end(`# Error: ${error instanceof Error ? error.message : String(error)}\n`);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Send error response
|
||
|
|
*/
|
||
|
|
private sendError(
|
||
|
|
res: http.ServerResponse,
|
||
|
|
statusCode: number,
|
||
|
|
message: string,
|
||
|
|
type: string,
|
||
|
|
): void {
|
||
|
|
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||
|
|
res.end(JSON.stringify({
|
||
|
|
error: {
|
||
|
|
message,
|
||
|
|
type,
|
||
|
|
code: null,
|
||
|
|
},
|
||
|
|
}));
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Get server info
|
||
|
|
*/
|
||
|
|
public getInfo(): {
|
||
|
|
running: boolean;
|
||
|
|
host: string;
|
||
|
|
port: number;
|
||
|
|
uptime: number;
|
||
|
|
} {
|
||
|
|
return {
|
||
|
|
running: !!this.server,
|
||
|
|
host: this.config.host,
|
||
|
|
port: this.config.port,
|
||
|
|
uptime: this.startTime ? Math.floor((Date.now() - this.startTime) / 1000) : 0,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
}
|