diff --git a/ts/api/handlers/chat.ts b/ts/api/handlers/chat.ts index ede95a9..4d566d9 100644 --- a/ts/api/handlers/chat.ts +++ b/ts/api/handlers/chat.ts @@ -6,6 +6,7 @@ import * as http from 'node:http'; import type { IApiError, IChatCompletionRequest } from '../../interfaces/api.ts'; import { ClusterCoordinator } from '../../cluster/coordinator.ts'; import { ContainerManager } from '../../containers/container-manager.ts'; +import { API_SERVER } from '../../constants.ts'; import { logger } from '../../logger.ts'; import { ModelRegistry } from '../../models/registry.ts'; import { ModelLoader } from '../../models/loader.ts'; @@ -158,11 +159,14 @@ export class ChatHandler { targetEndpoint: string, body: IChatCompletionRequest, ): Promise { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), API_SERVER.REQUEST_TIMEOUT_MS); const response = await fetch(`${targetEndpoint}/v1/chat/completions`, { method: 'POST', headers: this.buildForwardHeaders(req), body: JSON.stringify(body), - }); + signal: controller.signal, + }).finally(() => clearTimeout(timeout)); if (body.stream) { res.writeHead(response.status, { diff --git a/ts/api/handlers/embeddings.ts b/ts/api/handlers/embeddings.ts index 9873c50..5c90a76 100644 --- a/ts/api/handlers/embeddings.ts +++ b/ts/api/handlers/embeddings.ts @@ -11,6 +11,7 @@ import type { } from '../../interfaces/api.ts'; import { ClusterCoordinator } from '../../cluster/coordinator.ts'; import { ContainerManager } from '../../containers/container-manager.ts'; +import { API_SERVER } from '../../constants.ts'; import { logger } from '../../logger.ts'; import { ModelRegistry } from '../../models/registry.ts'; @@ -80,7 +81,7 @@ export class EmbeddingsHandler { return; } - const response = await fetch(`${ensured.location.endpoint}/v1/embeddings`, { + const response = await this.fetchWithTimeout(`${ensured.location.endpoint}/v1/embeddings`, { method: 'POST', headers: this.buildForwardHeaders(req), body: JSON.stringify(requestBody), @@ -159,7 +160,7 @@ export class EmbeddingsHandler { model: string, input: string, ): Promise<{ vector: number[]; tokenCount: number }> { - const response = await fetch(`${endpoint}/v1/embeddings`, { + const response = await this.fetchWithTimeout(`${endpoint}/v1/embeddings`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model, input }), @@ -181,7 +182,7 @@ export class EmbeddingsHandler { _model: string, input: string, ): Promise<{ vector: number[]; tokenCount: number }> { - const response = await fetch(`${endpoint}/embed`, { + const response = await this.fetchWithTimeout(`${endpoint}/embed`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ inputs: input }), @@ -214,6 +215,20 @@ export class EmbeddingsHandler { return headers; } + private async fetchWithTimeout(url: string, init: RequestInit): Promise { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), API_SERVER.REQUEST_TIMEOUT_MS); + + try { + return await fetch(url, { + ...init, + signal: controller.signal, + }); + } finally { + clearTimeout(timeout); + } + } + private sendError( res: http.ServerResponse, statusCode: number, diff --git a/ts/containers/base-container.ts b/ts/containers/base-container.ts index d3e68d3..eb676a1 100644 --- a/ts/containers/base-container.ts +++ b/ts/containers/base-container.ts @@ -11,6 +11,7 @@ import type { TContainerType, } from '../interfaces/container.ts'; import type { IChatCompletionRequest, IChatCompletionResponse } from '../interfaces/api.ts'; +import { API_SERVER } from '../constants.ts'; import { ContainerRuntime } from '../docker/container-runtime.ts'; import { logger } from '../logger.ts'; @@ -165,7 +166,7 @@ export abstract class BaseContainer { const url = `${endpoint}${path}`; const controller = new AbortController(); - const timeout = options.timeout || 30000; + const timeout = options.timeout || API_SERVER.REQUEST_TIMEOUT_MS; const timeoutId = setTimeout(() => controller.abort(), timeout); try { diff --git a/ts/models/registry.ts b/ts/models/registry.ts index 9e68629..46c7e4e 100644 --- a/ts/models/registry.ts +++ b/ts/models/registry.ts @@ -4,7 +4,7 @@ import * as fs from 'node:fs/promises'; import type { IModelCatalog, IModelCatalogEntry } from '../interfaces/catalog.ts'; -import { MODEL_REGISTRY, TIMING } from '../constants.ts'; +import { API_SERVER, MODEL_REGISTRY, TIMING } from '../constants.ts'; import { logger } from '../logger.ts'; export class ModelRegistry { @@ -167,7 +167,7 @@ export class ModelRegistry { } const controller = new AbortController(); - const timeout = setTimeout(() => controller.abort(), 30000); + const timeout = setTimeout(() => controller.abort(), API_SERVER.REQUEST_TIMEOUT_MS); try { const response = await fetch(source, {