feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
+15
-9
@@ -4,15 +4,11 @@
|
||||
* Manages HuggingFace Text Generation Inference containers.
|
||||
*/
|
||||
|
||||
import type { IContainerConfig, ILoadedModel, TContainerType } from '../interfaces/container.ts';
|
||||
import type {
|
||||
IContainerConfig,
|
||||
ILoadedModel,
|
||||
TContainerType,
|
||||
} from '../interfaces/container.ts';
|
||||
import type {
|
||||
IChatCompletionChoice,
|
||||
IChatCompletionRequest,
|
||||
IChatCompletionResponse,
|
||||
IChatCompletionChoice,
|
||||
IChatMessage,
|
||||
} from '../interfaces/api.ts';
|
||||
import { CONTAINER_IMAGES, CONTAINER_PORTS } from '../constants.ts';
|
||||
@@ -161,7 +157,9 @@ export class TgiContainer extends BaseContainer {
|
||||
const info = await this.fetchJson<ITgiInfoResponse>('/info');
|
||||
return [info.model_id];
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to get TGI info: ${error instanceof Error ? error.message : String(error)}`);
|
||||
logger.warn(
|
||||
`Failed to get TGI info: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
return this.config.models || [];
|
||||
}
|
||||
}
|
||||
@@ -232,7 +230,11 @@ export class TgiContainer extends BaseContainer {
|
||||
temperature: request.temperature,
|
||||
top_p: request.top_p,
|
||||
max_new_tokens: request.max_tokens || 1024,
|
||||
stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined,
|
||||
stop: Array.isArray(request.stop)
|
||||
? request.stop
|
||||
: request.stop
|
||||
? [request.stop]
|
||||
: undefined,
|
||||
do_sample: (request.temperature || 0) > 0,
|
||||
return_full_text: false,
|
||||
},
|
||||
@@ -288,7 +290,11 @@ export class TgiContainer extends BaseContainer {
|
||||
temperature: request.temperature,
|
||||
top_p: request.top_p,
|
||||
max_new_tokens: request.max_tokens || 1024,
|
||||
stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined,
|
||||
stop: Array.isArray(request.stop)
|
||||
? request.stop
|
||||
: request.stop
|
||||
? [request.stop]
|
||||
: undefined,
|
||||
do_sample: (request.temperature || 0) > 0,
|
||||
},
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user