/** * Embeddings handler. */ import * as http from 'node:http'; import type { IApiError, IEmbeddingData, IEmbeddingsRequest, IEmbeddingsResponse, } from '../../interfaces/api.ts'; import { ClusterCoordinator } from '../../cluster/coordinator.ts'; import { ContainerManager } from '../../containers/container-manager.ts'; import { logger } from '../../logger.ts'; import { ModelRegistry } from '../../models/registry.ts'; export class EmbeddingsHandler { private containerManager: ContainerManager; private modelRegistry: ModelRegistry; private clusterCoordinator: ClusterCoordinator; constructor( containerManager: ContainerManager, modelRegistry: ModelRegistry, clusterCoordinator: ClusterCoordinator, ) { this.containerManager = containerManager; this.modelRegistry = modelRegistry; this.clusterCoordinator = clusterCoordinator; } public async handleEmbeddings( req: http.IncomingMessage, res: http.ServerResponse, body: IEmbeddingsRequest, ): Promise { const canonicalModel = await this.resolveCanonicalModel(body.model); const requestBody: IEmbeddingsRequest = { ...body, model: canonicalModel, }; logger.dim(`Embeddings request for model: ${canonicalModel}`); try { const container = await this.containerManager.findContainerForModel(canonicalModel); if (container) { const response = await this.generateEmbeddings(container, requestBody); res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify(response)); return; } const ensured = await this.clusterCoordinator.ensureModelViaControlPlane(canonicalModel); if (!ensured) { this.sendError( res, 404, `Embedding model "${canonicalModel}" not found`, 'model_not_found', ); return; } if (ensured.location.nodeName === this.clusterCoordinator.getLocalNodeName()) { const localContainer = await this.containerManager.findContainerForModel(canonicalModel); if (!localContainer) { this.sendError( res, 503, `Embedding model "${canonicalModel}" is not ready`, 'server_error', ); return; } const response = await this.generateEmbeddings(localContainer, requestBody); res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify(response)); return; } const response = await fetch(`${ensured.location.endpoint}/v1/embeddings`, { method: 'POST', headers: this.buildForwardHeaders(req), body: JSON.stringify(requestBody), }); const text = await response.text(); res.writeHead(response.status, { 'Content-Type': response.headers.get('content-type') || 'application/json', }); res.end(text); } catch (error) { const message = error instanceof Error ? error.message : String(error); logger.error(`Embeddings error: ${message}`); this.sendError(res, 500, `Embeddings generation failed: ${message}`, 'server_error'); } } private async resolveCanonicalModel(modelName: string): Promise { const model = await this.modelRegistry.getModel(modelName); return model?.id || modelName; } private async generateEmbeddings( container: import('../../containers/base-container.ts').BaseContainer, request: IEmbeddingsRequest, ): Promise { const inputs = Array.isArray(request.input) ? request.input : [request.input]; const embeddings: IEmbeddingData[] = []; let totalTokens = 0; for (let i = 0; i < inputs.length; i++) { const input = inputs[i]; const embedding = await this.getEmbeddingFromContainer(container, request.model, input); embeddings.push({ object: 'embedding', embedding: embedding.vector, index: i, }); totalTokens += embedding.tokenCount; } return { object: 'list', data: embeddings, model: request.model, usage: { prompt_tokens: totalTokens, total_tokens: totalTokens, }, }; } private async getEmbeddingFromContainer( container: import('../../containers/base-container.ts').BaseContainer, model: string, input: string, ): Promise<{ vector: number[]; tokenCount: number }> { const endpoint = container.getEndpoint(); const containerType = container.type; if (containerType === 'vllm') { return this.getVllmEmbedding(endpoint, model, input); } if (containerType === 'tgi') { return this.getTgiEmbedding(endpoint, model, input); } throw new Error(`Container type ${containerType} does not support embeddings`); } private async getVllmEmbedding( endpoint: string, model: string, input: string, ): Promise<{ vector: number[]; tokenCount: number }> { const response = await fetch(`${endpoint}/v1/embeddings`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model, input }), }); if (!response.ok) { throw new Error(`vLLM embedding error: ${await response.text()}`); } const result = await response.json() as IEmbeddingsResponse; return { vector: result.data[0].embedding, tokenCount: result.usage.total_tokens, }; } private async getTgiEmbedding( endpoint: string, _model: string, input: string, ): Promise<{ vector: number[]; tokenCount: number }> { const response = await fetch(`${endpoint}/embed`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ inputs: input }), }); if (!response.ok) { throw new Error(`TGI embedding error: ${await response.text()}`); } const result = await response.json() as number[][]; return { vector: result[0], tokenCount: Math.ceil(input.length / 4), }; } private buildForwardHeaders(req: http.IncomingMessage): Record { const headers: Record = { 'Content-Type': 'application/json', }; if (typeof req.headers.authorization === 'string') { headers.Authorization = req.headers.authorization; } if (typeof req.headers['x-request-id'] === 'string') { headers['X-Request-Id'] = req.headers['x-request-id']; } return headers; } private sendError( res: http.ServerResponse, statusCode: number, message: string, type: string, param?: string, ): void { const error: IApiError = { error: { message, type, param, }, }; res.writeHead(statusCode, { 'Content-Type': 'application/json' }); res.end(JSON.stringify(error)); } }