feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

This commit is contained in:
2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
+15 -9
View File
@@ -4,15 +4,11 @@
* Manages HuggingFace Text Generation Inference containers.
*/
import type { IContainerConfig, ILoadedModel, TContainerType } from '../interfaces/container.ts';
import type {
IContainerConfig,
ILoadedModel,
TContainerType,
} from '../interfaces/container.ts';
import type {
IChatCompletionChoice,
IChatCompletionRequest,
IChatCompletionResponse,
IChatCompletionChoice,
IChatMessage,
} from '../interfaces/api.ts';
import { CONTAINER_IMAGES, CONTAINER_PORTS } from '../constants.ts';
@@ -161,7 +157,9 @@ export class TgiContainer extends BaseContainer {
const info = await this.fetchJson<ITgiInfoResponse>('/info');
return [info.model_id];
} catch (error) {
logger.warn(`Failed to get TGI info: ${error instanceof Error ? error.message : String(error)}`);
logger.warn(
`Failed to get TGI info: ${error instanceof Error ? error.message : String(error)}`,
);
return this.config.models || [];
}
}
@@ -232,7 +230,11 @@ export class TgiContainer extends BaseContainer {
temperature: request.temperature,
top_p: request.top_p,
max_new_tokens: request.max_tokens || 1024,
stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined,
stop: Array.isArray(request.stop)
? request.stop
: request.stop
? [request.stop]
: undefined,
do_sample: (request.temperature || 0) > 0,
return_full_text: false,
},
@@ -288,7 +290,11 @@ export class TgiContainer extends BaseContainer {
temperature: request.temperature,
top_p: request.top_p,
max_new_tokens: request.max_tokens || 1024,
stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined,
stop: Array.isArray(request.stop)
? request.stop
: request.stop
? [request.stop]
: undefined,
do_sample: (request.temperature || 0) > 0,
},
},