initial
This commit is contained in:
150
ts/api/handlers/chat.ts
Normal file
150
ts/api/handlers/chat.ts
Normal file
@@ -0,0 +1,150 @@
|
||||
/**
|
||||
* Chat Completions Handler
|
||||
*
|
||||
* Handles /v1/chat/completions and /v1/completions endpoints.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import type {
|
||||
IChatCompletionRequest,
|
||||
IChatCompletionResponse,
|
||||
IApiError,
|
||||
} from '../../interfaces/api.ts';
|
||||
import { logger } from '../../logger.ts';
|
||||
import { ContainerManager } from '../../containers/container-manager.ts';
|
||||
import { ModelLoader } from '../../models/loader.ts';
|
||||
|
||||
/**
|
||||
* Handler for chat completion requests
|
||||
*/
|
||||
export class ChatHandler {
|
||||
private containerManager: ContainerManager;
|
||||
private modelLoader: ModelLoader;
|
||||
|
||||
constructor(containerManager: ContainerManager, modelLoader: ModelLoader) {
|
||||
this.containerManager = containerManager;
|
||||
this.modelLoader = modelLoader;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle POST /v1/chat/completions
|
||||
*/
|
||||
public async handleChatCompletion(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
body: IChatCompletionRequest,
|
||||
): Promise<void> {
|
||||
const modelName = body.model;
|
||||
const isStream = body.stream === true;
|
||||
|
||||
logger.dim(`Chat completion request for model: ${modelName}`);
|
||||
|
||||
try {
|
||||
// Find or load the model
|
||||
const container = await this.findOrLoadModel(modelName);
|
||||
if (!container) {
|
||||
this.sendError(res, 404, `Model "${modelName}" not found or could not be loaded`, 'model_not_found');
|
||||
return;
|
||||
}
|
||||
|
||||
// Route to streaming or non-streaming handler
|
||||
if (isStream) {
|
||||
await this.handleStreamingCompletion(res, container, body);
|
||||
} else {
|
||||
await this.handleNonStreamingCompletion(res, container, body);
|
||||
}
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error(`Chat completion error: ${message}`);
|
||||
this.sendError(res, 500, `Chat completion failed: ${message}`, 'server_error');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find container with model or attempt to load it
|
||||
*/
|
||||
private async findOrLoadModel(
|
||||
modelName: string,
|
||||
): Promise<import('../../containers/base-container.ts').BaseContainer | null> {
|
||||
// First, check if model is already loaded
|
||||
const container = await this.containerManager.findContainerForModel(modelName);
|
||||
if (container) {
|
||||
return container;
|
||||
}
|
||||
|
||||
// Try to load the model
|
||||
logger.info(`Model ${modelName} not loaded, attempting to load...`);
|
||||
const loadResult = await this.modelLoader.loadModel(modelName);
|
||||
|
||||
if (!loadResult.success) {
|
||||
logger.error(`Failed to load model: ${loadResult.error}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Find the container again after loading
|
||||
return this.containerManager.findContainerForModel(modelName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle non-streaming chat completion
|
||||
*/
|
||||
private async handleNonStreamingCompletion(
|
||||
res: http.ServerResponse,
|
||||
container: import('../../containers/base-container.ts').BaseContainer,
|
||||
body: IChatCompletionRequest,
|
||||
): Promise<void> {
|
||||
const response = await container.chatCompletion(body);
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(response));
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle streaming chat completion
|
||||
*/
|
||||
private async handleStreamingCompletion(
|
||||
res: http.ServerResponse,
|
||||
container: import('../../containers/base-container.ts').BaseContainer,
|
||||
body: IChatCompletionRequest,
|
||||
): Promise<void> {
|
||||
// Set SSE headers
|
||||
res.writeHead(200, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'X-Accel-Buffering': 'no',
|
||||
});
|
||||
|
||||
// Stream chunks to client
|
||||
await container.chatCompletionStream(body, (chunk) => {
|
||||
res.write(`data: ${chunk}\n\n`);
|
||||
});
|
||||
|
||||
// Send final done message
|
||||
res.write('data: [DONE]\n\n');
|
||||
res.end();
|
||||
}
|
||||
|
||||
/**
|
||||
* Send error response
|
||||
*/
|
||||
private sendError(
|
||||
res: http.ServerResponse,
|
||||
statusCode: number,
|
||||
message: string,
|
||||
type: string,
|
||||
param?: string,
|
||||
): void {
|
||||
const error: IApiError = {
|
||||
error: {
|
||||
message,
|
||||
type,
|
||||
param,
|
||||
code: null,
|
||||
},
|
||||
};
|
||||
|
||||
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(error));
|
||||
}
|
||||
}
|
||||
235
ts/api/handlers/embeddings.ts
Normal file
235
ts/api/handlers/embeddings.ts
Normal file
@@ -0,0 +1,235 @@
|
||||
/**
|
||||
* Embeddings Handler
|
||||
*
|
||||
* Handles /v1/embeddings endpoint.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import type {
|
||||
IEmbeddingsRequest,
|
||||
IEmbeddingsResponse,
|
||||
IEmbeddingData,
|
||||
IApiError,
|
||||
} from '../../interfaces/api.ts';
|
||||
import { logger } from '../../logger.ts';
|
||||
import { ContainerManager } from '../../containers/container-manager.ts';
|
||||
|
||||
/**
|
||||
* Handler for embeddings requests
|
||||
*/
|
||||
export class EmbeddingsHandler {
|
||||
private containerManager: ContainerManager;
|
||||
|
||||
constructor(containerManager: ContainerManager) {
|
||||
this.containerManager = containerManager;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle POST /v1/embeddings
|
||||
*/
|
||||
public async handleEmbeddings(
|
||||
res: http.ServerResponse,
|
||||
body: IEmbeddingsRequest,
|
||||
): Promise<void> {
|
||||
const modelName = body.model;
|
||||
|
||||
logger.dim(`Embeddings request for model: ${modelName}`);
|
||||
|
||||
try {
|
||||
// Find container with the embedding model
|
||||
const container = await this.containerManager.findContainerForModel(modelName);
|
||||
if (!container) {
|
||||
this.sendError(res, 404, `Embedding model "${modelName}" not found`, 'model_not_found');
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate embeddings
|
||||
const response = await this.generateEmbeddings(container, body);
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(response));
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error(`Embeddings error: ${message}`);
|
||||
this.sendError(res, 500, `Embeddings generation failed: ${message}`, 'server_error');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embeddings from container
|
||||
*/
|
||||
private async generateEmbeddings(
|
||||
container: import('../../containers/base-container.ts').BaseContainer,
|
||||
request: IEmbeddingsRequest,
|
||||
): Promise<IEmbeddingsResponse> {
|
||||
const inputs = Array.isArray(request.input) ? request.input : [request.input];
|
||||
const embeddings: IEmbeddingData[] = [];
|
||||
let totalTokens = 0;
|
||||
|
||||
// Generate embeddings for each input
|
||||
for (let i = 0; i < inputs.length; i++) {
|
||||
const input = inputs[i];
|
||||
const embedding = await this.getEmbeddingFromContainer(container, request.model, input);
|
||||
|
||||
embeddings.push({
|
||||
object: 'embedding',
|
||||
embedding: embedding.vector,
|
||||
index: i,
|
||||
});
|
||||
|
||||
totalTokens += embedding.tokenCount;
|
||||
}
|
||||
|
||||
return {
|
||||
object: 'list',
|
||||
data: embeddings,
|
||||
model: request.model,
|
||||
usage: {
|
||||
prompt_tokens: totalTokens,
|
||||
total_tokens: totalTokens,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding from container (container-specific implementation)
|
||||
*/
|
||||
private async getEmbeddingFromContainer(
|
||||
container: import('../../containers/base-container.ts').BaseContainer,
|
||||
model: string,
|
||||
input: string,
|
||||
): Promise<{ vector: number[]; tokenCount: number }> {
|
||||
const endpoint = container.getEndpoint();
|
||||
const containerType = container.type;
|
||||
|
||||
// Route to container-specific embedding endpoint
|
||||
if (containerType === 'ollama') {
|
||||
return this.getOllamaEmbedding(endpoint, model, input);
|
||||
} else if (containerType === 'vllm') {
|
||||
return this.getVllmEmbedding(endpoint, model, input);
|
||||
} else if (containerType === 'tgi') {
|
||||
return this.getTgiEmbedding(endpoint, model, input);
|
||||
}
|
||||
|
||||
throw new Error(`Container type ${containerType} does not support embeddings`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding from Ollama
|
||||
*/
|
||||
private async getOllamaEmbedding(
|
||||
endpoint: string,
|
||||
model: string,
|
||||
input: string,
|
||||
): Promise<{ vector: number[]; tokenCount: number }> {
|
||||
const response = await fetch(`${endpoint}/api/embeddings`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
prompt: input,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`Ollama embedding error: ${errorText}`);
|
||||
}
|
||||
|
||||
const result = await response.json() as { embedding: number[] };
|
||||
|
||||
// Estimate token count (rough approximation: ~4 chars per token)
|
||||
const tokenCount = Math.ceil(input.length / 4);
|
||||
|
||||
return {
|
||||
vector: result.embedding,
|
||||
tokenCount,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding from vLLM (OpenAI-compatible)
|
||||
*/
|
||||
private async getVllmEmbedding(
|
||||
endpoint: string,
|
||||
model: string,
|
||||
input: string,
|
||||
): Promise<{ vector: number[]; tokenCount: number }> {
|
||||
const response = await fetch(`${endpoint}/v1/embeddings`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
input,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`vLLM embedding error: ${errorText}`);
|
||||
}
|
||||
|
||||
const result = await response.json() as IEmbeddingsResponse;
|
||||
|
||||
return {
|
||||
vector: result.data[0].embedding,
|
||||
tokenCount: result.usage.total_tokens,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding from TGI
|
||||
*/
|
||||
private async getTgiEmbedding(
|
||||
endpoint: string,
|
||||
_model: string,
|
||||
input: string,
|
||||
): Promise<{ vector: number[]; tokenCount: number }> {
|
||||
// TGI uses /embed endpoint
|
||||
const response = await fetch(`${endpoint}/embed`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
inputs: input,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`TGI embedding error: ${errorText}`);
|
||||
}
|
||||
|
||||
const result = await response.json() as number[][];
|
||||
|
||||
// Estimate token count
|
||||
const tokenCount = Math.ceil(input.length / 4);
|
||||
|
||||
return {
|
||||
vector: result[0],
|
||||
tokenCount,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Send error response
|
||||
*/
|
||||
private sendError(
|
||||
res: http.ServerResponse,
|
||||
statusCode: number,
|
||||
message: string,
|
||||
type: string,
|
||||
param?: string,
|
||||
): void {
|
||||
const error: IApiError = {
|
||||
error: {
|
||||
message,
|
||||
type,
|
||||
param,
|
||||
code: null,
|
||||
},
|
||||
};
|
||||
|
||||
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(error));
|
||||
}
|
||||
}
|
||||
9
ts/api/handlers/index.ts
Normal file
9
ts/api/handlers/index.ts
Normal file
@@ -0,0 +1,9 @@
|
||||
/**
|
||||
* API Handlers
|
||||
*
|
||||
* OpenAI-compatible request handlers.
|
||||
*/
|
||||
|
||||
export { ChatHandler } from './chat.ts';
|
||||
export { ModelsHandler } from './models.ts';
|
||||
export { EmbeddingsHandler } from './embeddings.ts';
|
||||
136
ts/api/handlers/models.ts
Normal file
136
ts/api/handlers/models.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
/**
|
||||
* Models Handler
|
||||
*
|
||||
* Handles /v1/models endpoints.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import type {
|
||||
IModelInfo,
|
||||
IListModelsResponse,
|
||||
IApiError,
|
||||
} from '../../interfaces/api.ts';
|
||||
import { logger } from '../../logger.ts';
|
||||
import { ContainerManager } from '../../containers/container-manager.ts';
|
||||
import { ModelRegistry } from '../../models/registry.ts';
|
||||
|
||||
/**
|
||||
* Handler for model-related requests
|
||||
*/
|
||||
export class ModelsHandler {
|
||||
private containerManager: ContainerManager;
|
||||
private modelRegistry: ModelRegistry;
|
||||
|
||||
constructor(containerManager: ContainerManager, modelRegistry: ModelRegistry) {
|
||||
this.containerManager = containerManager;
|
||||
this.modelRegistry = modelRegistry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle GET /v1/models
|
||||
*/
|
||||
public async handleListModels(res: http.ServerResponse): Promise<void> {
|
||||
try {
|
||||
const models = await this.getAvailableModels();
|
||||
|
||||
const response: IListModelsResponse = {
|
||||
object: 'list',
|
||||
data: models,
|
||||
};
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(response));
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error(`Failed to list models: ${message}`);
|
||||
this.sendError(res, 500, `Failed to list models: ${message}`, 'server_error');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle GET /v1/models/:model
|
||||
*/
|
||||
public async handleGetModel(res: http.ServerResponse, modelId: string): Promise<void> {
|
||||
try {
|
||||
const models = await this.getAvailableModels();
|
||||
const model = models.find((m) => m.id === modelId);
|
||||
|
||||
if (!model) {
|
||||
this.sendError(res, 404, `Model "${modelId}" not found`, 'model_not_found');
|
||||
return;
|
||||
}
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(model));
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error(`Failed to get model info: ${message}`);
|
||||
this.sendError(res, 500, `Failed to get model info: ${message}`, 'server_error');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all available models from containers and greenlist
|
||||
*/
|
||||
private async getAvailableModels(): Promise<IModelInfo[]> {
|
||||
const models: IModelInfo[] = [];
|
||||
const seen = new Set<string>();
|
||||
const timestamp = Math.floor(Date.now() / 1000);
|
||||
|
||||
// Get models from running containers
|
||||
const containerModels = await this.containerManager.getAllAvailableModels();
|
||||
for (const [modelId, modelInfo] of containerModels) {
|
||||
if (!seen.has(modelId)) {
|
||||
seen.add(modelId);
|
||||
models.push({
|
||||
id: modelId,
|
||||
object: 'model',
|
||||
created: timestamp,
|
||||
owned_by: `modelgrid-${modelInfo.container}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Add greenlit models that aren't loaded yet
|
||||
const greenlitModels = await this.modelRegistry.getAllGreenlitModels();
|
||||
for (const greenlit of greenlitModels) {
|
||||
if (!seen.has(greenlit.name)) {
|
||||
seen.add(greenlit.name);
|
||||
models.push({
|
||||
id: greenlit.name,
|
||||
object: 'model',
|
||||
created: timestamp,
|
||||
owned_by: `modelgrid-${greenlit.container}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Sort alphabetically
|
||||
models.sort((a, b) => a.id.localeCompare(b.id));
|
||||
|
||||
return models;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send error response
|
||||
*/
|
||||
private sendError(
|
||||
res: http.ServerResponse,
|
||||
statusCode: number,
|
||||
message: string,
|
||||
type: string,
|
||||
param?: string,
|
||||
): void {
|
||||
const error: IApiError = {
|
||||
error: {
|
||||
message,
|
||||
type,
|
||||
param,
|
||||
code: null,
|
||||
},
|
||||
};
|
||||
|
||||
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(error));
|
||||
}
|
||||
}
|
||||
10
ts/api/index.ts
Normal file
10
ts/api/index.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
/**
|
||||
* API Gateway Module
|
||||
*
|
||||
* OpenAI-compatible API server for ModelGrid.
|
||||
*/
|
||||
|
||||
export { ApiServer } from './server.ts';
|
||||
export { ApiRouter } from './router.ts';
|
||||
export * from './handlers/index.ts';
|
||||
export * from './middleware/index.ts';
|
||||
105
ts/api/middleware/auth.ts
Normal file
105
ts/api/middleware/auth.ts
Normal file
@@ -0,0 +1,105 @@
|
||||
/**
|
||||
* Authentication Middleware
|
||||
*
|
||||
* Validates API keys for incoming requests.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import { logger } from '../../logger.ts';
|
||||
|
||||
/**
|
||||
* Authentication middleware for API key validation
|
||||
*/
|
||||
export class AuthMiddleware {
|
||||
private apiKeys: Set<string>;
|
||||
private allowNoAuth: boolean;
|
||||
|
||||
constructor(apiKeys: string[], allowNoAuth: boolean = false) {
|
||||
this.apiKeys = new Set(apiKeys);
|
||||
this.allowNoAuth = allowNoAuth;
|
||||
|
||||
if (this.apiKeys.size === 0 && !allowNoAuth) {
|
||||
logger.warn('No API keys configured - authentication will fail for all requests');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Authenticate a request
|
||||
*/
|
||||
public authenticate(req: http.IncomingMessage): boolean {
|
||||
// If no keys configured and allowNoAuth is true, allow all requests
|
||||
if (this.apiKeys.size === 0 && this.allowNoAuth) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const authHeader = req.headers.authorization;
|
||||
|
||||
if (!authHeader) {
|
||||
logger.dim('Request rejected: No Authorization header');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Extract Bearer token
|
||||
const match = authHeader.match(/^Bearer\s+(.+)$/i);
|
||||
if (!match) {
|
||||
logger.dim('Request rejected: Invalid Authorization header format');
|
||||
return false;
|
||||
}
|
||||
|
||||
const apiKey = match[1];
|
||||
|
||||
// Check if key is valid
|
||||
if (!this.apiKeys.has(apiKey)) {
|
||||
logger.dim('Request rejected: Invalid API key');
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get API key from request (if authenticated)
|
||||
*/
|
||||
public getApiKey(req: http.IncomingMessage): string | null {
|
||||
const authHeader = req.headers.authorization;
|
||||
if (!authHeader) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const match = authHeader.match(/^Bearer\s+(.+)$/i);
|
||||
return match ? match[1] : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an API key
|
||||
*/
|
||||
public addApiKey(key: string): void {
|
||||
this.apiKeys.add(key);
|
||||
logger.info('API key added');
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove an API key
|
||||
*/
|
||||
public removeApiKey(key: string): boolean {
|
||||
const removed = this.apiKeys.delete(key);
|
||||
if (removed) {
|
||||
logger.info('API key removed');
|
||||
}
|
||||
return removed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get count of configured API keys
|
||||
*/
|
||||
public getKeyCount(): number {
|
||||
return this.apiKeys.size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if authentication is required
|
||||
*/
|
||||
public isAuthRequired(): boolean {
|
||||
return !this.allowNoAuth || this.apiKeys.size > 0;
|
||||
}
|
||||
}
|
||||
7
ts/api/middleware/index.ts
Normal file
7
ts/api/middleware/index.ts
Normal file
@@ -0,0 +1,7 @@
|
||||
/**
|
||||
* API Middleware
|
||||
*/
|
||||
|
||||
export { AuthMiddleware } from './auth.ts';
|
||||
export { SanityMiddleware } from './sanity.ts';
|
||||
export type { IValidationResult } from './sanity.ts';
|
||||
254
ts/api/middleware/sanity.ts
Normal file
254
ts/api/middleware/sanity.ts
Normal file
@@ -0,0 +1,254 @@
|
||||
/**
|
||||
* Sanity Middleware
|
||||
*
|
||||
* Validates request structure and parameters.
|
||||
*/
|
||||
|
||||
import type { IChatCompletionRequest, IEmbeddingsRequest } from '../../interfaces/api.ts';
|
||||
import { ModelRegistry } from '../../models/registry.ts';
|
||||
|
||||
/**
|
||||
* Validation result
|
||||
*/
|
||||
export interface IValidationResult {
|
||||
valid: boolean;
|
||||
error?: string;
|
||||
param?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Request validation middleware
|
||||
*/
|
||||
export class SanityMiddleware {
|
||||
private modelRegistry: ModelRegistry;
|
||||
|
||||
constructor(modelRegistry: ModelRegistry) {
|
||||
this.modelRegistry = modelRegistry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate chat completion request
|
||||
*/
|
||||
public validateChatRequest(body: unknown): IValidationResult {
|
||||
if (!body || typeof body !== 'object') {
|
||||
return { valid: false, error: 'Request body must be a JSON object' };
|
||||
}
|
||||
|
||||
const request = body as Record<string, unknown>;
|
||||
|
||||
// Validate model
|
||||
if (!request.model || typeof request.model !== 'string') {
|
||||
return { valid: false, error: 'Missing or invalid "model" field', param: 'model' };
|
||||
}
|
||||
|
||||
// Validate messages
|
||||
if (!Array.isArray(request.messages)) {
|
||||
return { valid: false, error: 'Missing or invalid "messages" field', param: 'messages' };
|
||||
}
|
||||
|
||||
if (request.messages.length === 0) {
|
||||
return { valid: false, error: '"messages" array cannot be empty', param: 'messages' };
|
||||
}
|
||||
|
||||
// Validate each message
|
||||
for (let i = 0; i < request.messages.length; i++) {
|
||||
const msg = request.messages[i] as Record<string, unknown>;
|
||||
const msgValidation = this.validateMessage(msg, i);
|
||||
if (!msgValidation.valid) {
|
||||
return msgValidation;
|
||||
}
|
||||
}
|
||||
|
||||
// Validate optional parameters
|
||||
if (request.temperature !== undefined) {
|
||||
const temp = request.temperature as number;
|
||||
if (typeof temp !== 'number' || temp < 0 || temp > 2) {
|
||||
return { valid: false, error: '"temperature" must be between 0 and 2', param: 'temperature' };
|
||||
}
|
||||
}
|
||||
|
||||
if (request.top_p !== undefined) {
|
||||
const topP = request.top_p as number;
|
||||
if (typeof topP !== 'number' || topP < 0 || topP > 1) {
|
||||
return { valid: false, error: '"top_p" must be between 0 and 1', param: 'top_p' };
|
||||
}
|
||||
}
|
||||
|
||||
if (request.max_tokens !== undefined) {
|
||||
const maxTokens = request.max_tokens as number;
|
||||
if (typeof maxTokens !== 'number' || maxTokens < 1) {
|
||||
return { valid: false, error: '"max_tokens" must be a positive integer', param: 'max_tokens' };
|
||||
}
|
||||
}
|
||||
|
||||
if (request.n !== undefined) {
|
||||
const n = request.n as number;
|
||||
if (typeof n !== 'number' || n < 1 || n > 10) {
|
||||
return { valid: false, error: '"n" must be between 1 and 10', param: 'n' };
|
||||
}
|
||||
}
|
||||
|
||||
if (request.stream !== undefined && typeof request.stream !== 'boolean') {
|
||||
return { valid: false, error: '"stream" must be a boolean', param: 'stream' };
|
||||
}
|
||||
|
||||
if (request.presence_penalty !== undefined) {
|
||||
const pp = request.presence_penalty as number;
|
||||
if (typeof pp !== 'number' || pp < -2 || pp > 2) {
|
||||
return { valid: false, error: '"presence_penalty" must be between -2 and 2', param: 'presence_penalty' };
|
||||
}
|
||||
}
|
||||
|
||||
if (request.frequency_penalty !== undefined) {
|
||||
const fp = request.frequency_penalty as number;
|
||||
if (typeof fp !== 'number' || fp < -2 || fp > 2) {
|
||||
return { valid: false, error: '"frequency_penalty" must be between -2 and 2', param: 'frequency_penalty' };
|
||||
}
|
||||
}
|
||||
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate a single message in the chat request
|
||||
*/
|
||||
private validateMessage(msg: Record<string, unknown>, index: number): IValidationResult {
|
||||
if (!msg || typeof msg !== 'object') {
|
||||
return { valid: false, error: `Message at index ${index} must be an object`, param: `messages[${index}]` };
|
||||
}
|
||||
|
||||
// Validate role
|
||||
const validRoles = ['system', 'user', 'assistant', 'tool'];
|
||||
if (!msg.role || !validRoles.includes(msg.role as string)) {
|
||||
return {
|
||||
valid: false,
|
||||
error: `Invalid role at index ${index}. Must be one of: ${validRoles.join(', ')}`,
|
||||
param: `messages[${index}].role`,
|
||||
};
|
||||
}
|
||||
|
||||
// Validate content (can be null for assistant with tool_calls)
|
||||
if (msg.role === 'assistant' && msg.tool_calls) {
|
||||
// Content can be null/undefined when tool_calls present
|
||||
} else if (msg.content === undefined || msg.content === null) {
|
||||
return {
|
||||
valid: false,
|
||||
error: `Missing content at index ${index}`,
|
||||
param: `messages[${index}].content`,
|
||||
};
|
||||
} else if (typeof msg.content !== 'string') {
|
||||
return {
|
||||
valid: false,
|
||||
error: `Content at index ${index} must be a string`,
|
||||
param: `messages[${index}].content`,
|
||||
};
|
||||
}
|
||||
|
||||
// Validate tool response message
|
||||
if (msg.role === 'tool' && !msg.tool_call_id) {
|
||||
return {
|
||||
valid: false,
|
||||
error: `Tool message at index ${index} requires tool_call_id`,
|
||||
param: `messages[${index}].tool_call_id`,
|
||||
};
|
||||
}
|
||||
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate embeddings request
|
||||
*/
|
||||
public validateEmbeddingsRequest(body: unknown): IValidationResult {
|
||||
if (!body || typeof body !== 'object') {
|
||||
return { valid: false, error: 'Request body must be a JSON object' };
|
||||
}
|
||||
|
||||
const request = body as Record<string, unknown>;
|
||||
|
||||
// Validate model
|
||||
if (!request.model || typeof request.model !== 'string') {
|
||||
return { valid: false, error: 'Missing or invalid "model" field', param: 'model' };
|
||||
}
|
||||
|
||||
// Validate input
|
||||
if (request.input === undefined || request.input === null) {
|
||||
return { valid: false, error: 'Missing "input" field', param: 'input' };
|
||||
}
|
||||
|
||||
const input = request.input;
|
||||
if (typeof input !== 'string' && !Array.isArray(input)) {
|
||||
return { valid: false, error: '"input" must be a string or array of strings', param: 'input' };
|
||||
}
|
||||
|
||||
if (Array.isArray(input)) {
|
||||
for (let i = 0; i < input.length; i++) {
|
||||
if (typeof input[i] !== 'string') {
|
||||
return { valid: false, error: `"input[${i}]" must be a string`, param: `input[${i}]` };
|
||||
}
|
||||
}
|
||||
|
||||
if (input.length === 0) {
|
||||
return { valid: false, error: '"input" array cannot be empty', param: 'input' };
|
||||
}
|
||||
}
|
||||
|
||||
// Validate encoding_format
|
||||
if (request.encoding_format !== undefined) {
|
||||
const format = request.encoding_format as string;
|
||||
if (format !== 'float' && format !== 'base64') {
|
||||
return { valid: false, error: '"encoding_format" must be "float" or "base64"', param: 'encoding_format' };
|
||||
}
|
||||
}
|
||||
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if model is in greenlist (async validation)
|
||||
*/
|
||||
public async validateModelGreenlist(modelName: string): Promise<IValidationResult> {
|
||||
const isGreenlit = await this.modelRegistry.isModelGreenlit(modelName);
|
||||
if (!isGreenlit) {
|
||||
return {
|
||||
valid: false,
|
||||
error: `Model "${modelName}" is not greenlit. Contact administrator to add it to the greenlist.`,
|
||||
param: 'model',
|
||||
};
|
||||
}
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize request body by removing unknown fields
|
||||
*/
|
||||
public sanitizeChatRequest(body: Record<string, unknown>): IChatCompletionRequest {
|
||||
return {
|
||||
model: body.model as string,
|
||||
messages: body.messages as IChatCompletionRequest['messages'],
|
||||
max_tokens: body.max_tokens as number | undefined,
|
||||
temperature: body.temperature as number | undefined,
|
||||
top_p: body.top_p as number | undefined,
|
||||
n: body.n as number | undefined,
|
||||
stream: body.stream as boolean | undefined,
|
||||
stop: body.stop as string | string[] | undefined,
|
||||
presence_penalty: body.presence_penalty as number | undefined,
|
||||
frequency_penalty: body.frequency_penalty as number | undefined,
|
||||
user: body.user as string | undefined,
|
||||
tools: body.tools as IChatCompletionRequest['tools'],
|
||||
tool_choice: body.tool_choice as IChatCompletionRequest['tool_choice'],
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize embeddings request
|
||||
*/
|
||||
public sanitizeEmbeddingsRequest(body: Record<string, unknown>): IEmbeddingsRequest {
|
||||
return {
|
||||
model: body.model as string,
|
||||
input: body.input as string | string[],
|
||||
user: body.user as string | undefined,
|
||||
encoding_format: body.encoding_format as 'float' | 'base64' | undefined,
|
||||
};
|
||||
}
|
||||
}
|
||||
300
ts/api/router.ts
Normal file
300
ts/api/router.ts
Normal file
@@ -0,0 +1,300 @@
|
||||
/**
|
||||
* API Router
|
||||
*
|
||||
* Routes incoming requests to appropriate handlers.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import type { IApiError } from '../interfaces/api.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { ContainerManager } from '../containers/container-manager.ts';
|
||||
import { ModelRegistry } from '../models/registry.ts';
|
||||
import { ModelLoader } from '../models/loader.ts';
|
||||
import { ChatHandler } from './handlers/chat.ts';
|
||||
import { ModelsHandler } from './handlers/models.ts';
|
||||
import { EmbeddingsHandler } from './handlers/embeddings.ts';
|
||||
import { AuthMiddleware } from './middleware/auth.ts';
|
||||
import { SanityMiddleware } from './middleware/sanity.ts';
|
||||
|
||||
/**
|
||||
* API Router - routes requests to handlers
|
||||
*/
|
||||
export class ApiRouter {
|
||||
private containerManager: ContainerManager;
|
||||
private modelRegistry: ModelRegistry;
|
||||
private modelLoader: ModelLoader;
|
||||
private chatHandler: ChatHandler;
|
||||
private modelsHandler: ModelsHandler;
|
||||
private embeddingsHandler: EmbeddingsHandler;
|
||||
private authMiddleware: AuthMiddleware;
|
||||
private sanityMiddleware: SanityMiddleware;
|
||||
|
||||
constructor(
|
||||
containerManager: ContainerManager,
|
||||
modelRegistry: ModelRegistry,
|
||||
modelLoader: ModelLoader,
|
||||
apiKeys: string[],
|
||||
) {
|
||||
this.containerManager = containerManager;
|
||||
this.modelRegistry = modelRegistry;
|
||||
this.modelLoader = modelLoader;
|
||||
|
||||
// Initialize handlers
|
||||
this.chatHandler = new ChatHandler(containerManager, modelLoader);
|
||||
this.modelsHandler = new ModelsHandler(containerManager, modelRegistry);
|
||||
this.embeddingsHandler = new EmbeddingsHandler(containerManager);
|
||||
|
||||
// Initialize middleware
|
||||
this.authMiddleware = new AuthMiddleware(apiKeys);
|
||||
this.sanityMiddleware = new SanityMiddleware(modelRegistry);
|
||||
}
|
||||
|
||||
/**
|
||||
* Route a request to the appropriate handler
|
||||
*/
|
||||
public async route(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
path: string,
|
||||
): Promise<void> {
|
||||
// OpenAI API endpoints
|
||||
if (path === '/v1/chat/completions') {
|
||||
await this.handleChatCompletions(req, res);
|
||||
return;
|
||||
}
|
||||
|
||||
if (path === '/v1/completions') {
|
||||
await this.handleCompletions(req, res);
|
||||
return;
|
||||
}
|
||||
|
||||
if (path === '/v1/models' || path === '/v1/models/') {
|
||||
await this.handleModels(req, res);
|
||||
return;
|
||||
}
|
||||
|
||||
if (path.startsWith('/v1/models/')) {
|
||||
await this.handleModelInfo(req, res, path);
|
||||
return;
|
||||
}
|
||||
|
||||
if (path === '/v1/embeddings') {
|
||||
await this.handleEmbeddings(req, res);
|
||||
return;
|
||||
}
|
||||
|
||||
// Not found
|
||||
this.sendError(res, 404, `Endpoint not found: ${path}`, 'invalid_request_error');
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle POST /v1/chat/completions
|
||||
*/
|
||||
private async handleChatCompletions(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
): Promise<void> {
|
||||
if (req.method !== 'POST') {
|
||||
this.sendError(res, 405, 'Method not allowed', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Authenticate
|
||||
if (!this.authMiddleware.authenticate(req)) {
|
||||
this.sendError(res, 401, 'Invalid API key', 'authentication_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse body
|
||||
const body = await this.parseRequestBody(req);
|
||||
if (!body) {
|
||||
this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Validate request
|
||||
const validation = this.sanityMiddleware.validateChatRequest(body);
|
||||
if (!validation.valid) {
|
||||
this.sendError(res, 400, validation.error || 'Invalid request', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle request
|
||||
await this.chatHandler.handleChatCompletion(req, res, body);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle POST /v1/completions (legacy endpoint)
|
||||
*/
|
||||
private async handleCompletions(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
): Promise<void> {
|
||||
if (req.method !== 'POST') {
|
||||
this.sendError(res, 405, 'Method not allowed', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Authenticate
|
||||
if (!this.authMiddleware.authenticate(req)) {
|
||||
this.sendError(res, 401, 'Invalid API key', 'authentication_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse body
|
||||
const body = await this.parseRequestBody(req);
|
||||
if (!body) {
|
||||
this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Convert to chat format and handle
|
||||
const chatBody = this.convertCompletionToChat(body);
|
||||
await this.chatHandler.handleChatCompletion(req, res, chatBody);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle GET /v1/models
|
||||
*/
|
||||
private async handleModels(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
): Promise<void> {
|
||||
if (req.method !== 'GET') {
|
||||
this.sendError(res, 405, 'Method not allowed', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Authenticate
|
||||
if (!this.authMiddleware.authenticate(req)) {
|
||||
this.sendError(res, 401, 'Invalid API key', 'authentication_error');
|
||||
return;
|
||||
}
|
||||
|
||||
await this.modelsHandler.handleListModels(res);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle GET /v1/models/:model
|
||||
*/
|
||||
private async handleModelInfo(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
path: string,
|
||||
): Promise<void> {
|
||||
if (req.method !== 'GET') {
|
||||
this.sendError(res, 405, 'Method not allowed', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Authenticate
|
||||
if (!this.authMiddleware.authenticate(req)) {
|
||||
this.sendError(res, 401, 'Invalid API key', 'authentication_error');
|
||||
return;
|
||||
}
|
||||
|
||||
const modelId = path.replace('/v1/models/', '');
|
||||
await this.modelsHandler.handleGetModel(res, modelId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle POST /v1/embeddings
|
||||
*/
|
||||
private async handleEmbeddings(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
): Promise<void> {
|
||||
if (req.method !== 'POST') {
|
||||
this.sendError(res, 405, 'Method not allowed', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Authenticate
|
||||
if (!this.authMiddleware.authenticate(req)) {
|
||||
this.sendError(res, 401, 'Invalid API key', 'authentication_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse body
|
||||
const body = await this.parseRequestBody(req);
|
||||
if (!body) {
|
||||
this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
await this.embeddingsHandler.handleEmbeddings(res, body);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse request body
|
||||
*/
|
||||
private async parseRequestBody(req: http.IncomingMessage): Promise<unknown | null> {
|
||||
return new Promise((resolve) => {
|
||||
let body = '';
|
||||
|
||||
req.on('data', (chunk) => {
|
||||
body += chunk.toString();
|
||||
// Limit body size
|
||||
if (body.length > 10 * 1024 * 1024) {
|
||||
resolve(null);
|
||||
}
|
||||
});
|
||||
|
||||
req.on('end', () => {
|
||||
try {
|
||||
resolve(JSON.parse(body));
|
||||
} catch {
|
||||
resolve(null);
|
||||
}
|
||||
});
|
||||
|
||||
req.on('error', () => {
|
||||
resolve(null);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert legacy completion request to chat format
|
||||
*/
|
||||
private convertCompletionToChat(body: Record<string, unknown>): Record<string, unknown> {
|
||||
const prompt = body.prompt as string | string[];
|
||||
const promptText = Array.isArray(prompt) ? prompt.join('\n') : prompt;
|
||||
|
||||
return {
|
||||
model: body.model,
|
||||
messages: [
|
||||
{ role: 'user', content: promptText },
|
||||
],
|
||||
max_tokens: body.max_tokens,
|
||||
temperature: body.temperature,
|
||||
top_p: body.top_p,
|
||||
n: body.n,
|
||||
stream: body.stream,
|
||||
stop: body.stop,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Send error response
|
||||
*/
|
||||
private sendError(
|
||||
res: http.ServerResponse,
|
||||
statusCode: number,
|
||||
message: string,
|
||||
type: string,
|
||||
param?: string,
|
||||
): void {
|
||||
const error: IApiError = {
|
||||
error: {
|
||||
message,
|
||||
type,
|
||||
param,
|
||||
code: null,
|
||||
},
|
||||
};
|
||||
|
||||
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(error));
|
||||
}
|
||||
}
|
||||
300
ts/api/server.ts
Normal file
300
ts/api/server.ts
Normal file
@@ -0,0 +1,300 @@
|
||||
/**
|
||||
* API Server
|
||||
*
|
||||
* HTTP server for the OpenAI-compatible API gateway.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import type { IApiConfig } from '../interfaces/config.ts';
|
||||
import type { IHealthResponse } from '../interfaces/api.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { API_SERVER } from '../constants.ts';
|
||||
import { ApiRouter } from './router.ts';
|
||||
import { ContainerManager } from '../containers/container-manager.ts';
|
||||
import { ModelRegistry } from '../models/registry.ts';
|
||||
import { ModelLoader } from '../models/loader.ts';
|
||||
import { GpuDetector } from '../hardware/gpu-detector.ts';
|
||||
|
||||
/**
|
||||
* API Server for ModelGrid
|
||||
*/
|
||||
export class ApiServer {
|
||||
private server?: http.Server;
|
||||
private config: IApiConfig;
|
||||
private router: ApiRouter;
|
||||
private containerManager: ContainerManager;
|
||||
private modelRegistry: ModelRegistry;
|
||||
private modelLoader: ModelLoader;
|
||||
private gpuDetector: GpuDetector;
|
||||
private startTime: number = 0;
|
||||
|
||||
constructor(
|
||||
config: IApiConfig,
|
||||
containerManager: ContainerManager,
|
||||
modelRegistry: ModelRegistry,
|
||||
) {
|
||||
this.config = config;
|
||||
this.containerManager = containerManager;
|
||||
this.modelRegistry = modelRegistry;
|
||||
this.gpuDetector = new GpuDetector();
|
||||
this.modelLoader = new ModelLoader(modelRegistry, containerManager, true);
|
||||
this.router = new ApiRouter(
|
||||
containerManager,
|
||||
modelRegistry,
|
||||
this.modelLoader,
|
||||
config.apiKeys,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the API server
|
||||
*/
|
||||
public async start(): Promise<void> {
|
||||
if (this.server) {
|
||||
logger.warn('API server is already running');
|
||||
return;
|
||||
}
|
||||
|
||||
this.startTime = Date.now();
|
||||
|
||||
this.server = http.createServer(async (req, res) => {
|
||||
await this.handleRequest(req, res);
|
||||
});
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
this.server!.listen(this.config.port, this.config.host, () => {
|
||||
logger.success(`API server started on ${this.config.host}:${this.config.port}`);
|
||||
logger.info('OpenAI-compatible API available at:');
|
||||
logger.info(` POST /v1/chat/completions`);
|
||||
logger.info(` GET /v1/models`);
|
||||
logger.info(` POST /v1/embeddings`);
|
||||
resolve();
|
||||
});
|
||||
|
||||
this.server!.on('error', (error) => {
|
||||
logger.error(`API server error: ${error.message}`);
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the API server
|
||||
*/
|
||||
public async stop(): Promise<void> {
|
||||
if (!this.server) {
|
||||
return;
|
||||
}
|
||||
|
||||
return new Promise((resolve) => {
|
||||
this.server!.close(() => {
|
||||
logger.log('API server stopped');
|
||||
this.server = undefined;
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle incoming HTTP request
|
||||
*/
|
||||
private async handleRequest(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
): Promise<void> {
|
||||
const startTime = Date.now();
|
||||
|
||||
// Set CORS headers if enabled
|
||||
if (this.config.cors) {
|
||||
this.setCorsHeaders(req, res);
|
||||
}
|
||||
|
||||
// Handle preflight requests
|
||||
if (req.method === 'OPTIONS') {
|
||||
res.writeHead(204);
|
||||
res.end();
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse URL
|
||||
const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`);
|
||||
const path = url.pathname;
|
||||
|
||||
// Health check endpoint (no auth required)
|
||||
if (path === '/health' || path === '/healthz') {
|
||||
await this.handleHealthCheck(res);
|
||||
return;
|
||||
}
|
||||
|
||||
// Metrics endpoint (no auth required)
|
||||
if (path === '/metrics') {
|
||||
await this.handleMetrics(res);
|
||||
return;
|
||||
}
|
||||
|
||||
// Route request
|
||||
try {
|
||||
await this.router.route(req, res, path);
|
||||
} catch (error) {
|
||||
logger.error(`Request error: ${error instanceof Error ? error.message : String(error)}`);
|
||||
this.sendError(res, 500, 'Internal server error', 'internal_error');
|
||||
}
|
||||
|
||||
// Log request
|
||||
const duration = Date.now() - startTime;
|
||||
logger.dim(`${req.method} ${path} - ${res.statusCode} (${duration}ms)`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set CORS headers
|
||||
*/
|
||||
private setCorsHeaders(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
): void {
|
||||
const origin = req.headers.origin || '*';
|
||||
const allowedOrigins = this.config.corsOrigins || ['*'];
|
||||
|
||||
if (allowedOrigins.includes('*') || allowedOrigins.includes(origin)) {
|
||||
res.setHeader('Access-Control-Allow-Origin', origin);
|
||||
}
|
||||
|
||||
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
|
||||
res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
|
||||
res.setHeader('Access-Control-Max-Age', '86400');
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle health check
|
||||
*/
|
||||
private async handleHealthCheck(res: http.ServerResponse): Promise<void> {
|
||||
try {
|
||||
const statuses = await this.containerManager.getAllStatus();
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
const models = await this.containerManager.getAllAvailableModels();
|
||||
|
||||
let status: 'ok' | 'degraded' | 'error' = 'ok';
|
||||
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
|
||||
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
|
||||
|
||||
// Check container health
|
||||
for (const [id, containerStatus] of statuses) {
|
||||
if (containerStatus.running && containerStatus.health === 'healthy') {
|
||||
containerHealth[id] = 'healthy';
|
||||
} else {
|
||||
containerHealth[id] = 'unhealthy';
|
||||
status = 'degraded';
|
||||
}
|
||||
}
|
||||
|
||||
// Check GPU status
|
||||
for (const gpu of gpus) {
|
||||
gpuStatus[gpu.id] = 'available';
|
||||
}
|
||||
|
||||
const response: IHealthResponse = {
|
||||
status,
|
||||
version: '1.0.0', // TODO: Get from config
|
||||
uptime: Math.floor((Date.now() - this.startTime) / 1000),
|
||||
containers: statuses.size,
|
||||
models: models.size,
|
||||
gpus: gpus.length,
|
||||
details: {
|
||||
containers: containerHealth,
|
||||
gpus: gpuStatus,
|
||||
},
|
||||
};
|
||||
|
||||
res.writeHead(status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(response, null, 2));
|
||||
} catch (error) {
|
||||
res.writeHead(500, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({
|
||||
status: 'error',
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle metrics endpoint (Prometheus format)
|
||||
*/
|
||||
private async handleMetrics(res: http.ServerResponse): Promise<void> {
|
||||
try {
|
||||
const metrics: string[] = [];
|
||||
const timestamp = Date.now();
|
||||
|
||||
// Server uptime
|
||||
const uptime = Math.floor((timestamp - this.startTime) / 1000);
|
||||
metrics.push(`# HELP modelgrid_uptime_seconds Server uptime in seconds`);
|
||||
metrics.push(`# TYPE modelgrid_uptime_seconds gauge`);
|
||||
metrics.push(`modelgrid_uptime_seconds ${uptime}`);
|
||||
|
||||
// Container count
|
||||
const statuses = await this.containerManager.getAllStatus();
|
||||
metrics.push(`# HELP modelgrid_containers_total Total number of containers`);
|
||||
metrics.push(`# TYPE modelgrid_containers_total gauge`);
|
||||
metrics.push(`modelgrid_containers_total ${statuses.size}`);
|
||||
|
||||
// Running containers
|
||||
const running = Array.from(statuses.values()).filter((s) => s.running).length;
|
||||
metrics.push(`# HELP modelgrid_containers_running Number of running containers`);
|
||||
metrics.push(`# TYPE modelgrid_containers_running gauge`);
|
||||
metrics.push(`modelgrid_containers_running ${running}`);
|
||||
|
||||
// Available models
|
||||
const models = await this.containerManager.getAllAvailableModels();
|
||||
metrics.push(`# HELP modelgrid_models_available Number of available models`);
|
||||
metrics.push(`# TYPE modelgrid_models_available gauge`);
|
||||
metrics.push(`modelgrid_models_available ${models.size}`);
|
||||
|
||||
// GPU count
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
metrics.push(`# HELP modelgrid_gpus_total Total number of GPUs`);
|
||||
metrics.push(`# TYPE modelgrid_gpus_total gauge`);
|
||||
metrics.push(`modelgrid_gpus_total ${gpus.length}`);
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'text/plain; charset=utf-8' });
|
||||
res.end(metrics.join('\n') + '\n');
|
||||
} catch (error) {
|
||||
res.writeHead(500, { 'Content-Type': 'text/plain' });
|
||||
res.end(`# Error: ${error instanceof Error ? error.message : String(error)}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Send error response
|
||||
*/
|
||||
private sendError(
|
||||
res: http.ServerResponse,
|
||||
statusCode: number,
|
||||
message: string,
|
||||
type: string,
|
||||
): void {
|
||||
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({
|
||||
error: {
|
||||
message,
|
||||
type,
|
||||
code: null,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get server info
|
||||
*/
|
||||
public getInfo(): {
|
||||
running: boolean;
|
||||
host: string;
|
||||
port: number;
|
||||
uptime: number;
|
||||
} {
|
||||
return {
|
||||
running: !!this.server,
|
||||
host: this.config.host,
|
||||
port: this.config.port,
|
||||
uptime: this.startTime ? Math.floor((Date.now() - this.startTime) / 1000) : 0,
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user