feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
+10
-5
@@ -141,8 +141,8 @@ const containerColumns: ITableColumn[] = [
|
||||
];
|
||||
|
||||
const containerData = [
|
||||
{ id: 'ollama-1', type: 'ollama', status: 'Running', gpu: 'gpu-0', models: '3' },
|
||||
{ id: 'vllm-1', type: 'vllm', status: 'Running', gpu: 'gpu-1', models: '1' },
|
||||
{ id: 'vllm-qwen', type: 'vllm', status: 'Running', gpu: 'gpu-0', models: '1' },
|
||||
{ id: 'vllm-llama', type: 'vllm', status: 'Running', gpu: 'gpu-1', models: '1' },
|
||||
];
|
||||
|
||||
logger.logTable(containerColumns, containerData, 'AI Containers');
|
||||
@@ -183,9 +183,14 @@ const modelColumns: ITableColumn[] = [
|
||||
];
|
||||
|
||||
const modelData = [
|
||||
{ name: 'llama3:8b', container: 'ollama-1', size: '4.7 GB', status: 'Loaded' },
|
||||
{ name: 'mistral:7b', container: 'ollama-1', size: '4.1 GB', status: 'Loaded' },
|
||||
{ name: 'llama3:70b', container: 'vllm-1', size: '40 GB', status: 'Loaded' },
|
||||
{ name: 'Qwen/Qwen2.5-7B-Instruct', container: 'vllm-qwen', size: '15 GB', status: 'Loaded' },
|
||||
{
|
||||
name: 'meta-llama/Llama-3.1-8B-Instruct',
|
||||
container: 'vllm-llama',
|
||||
size: '16 GB',
|
||||
status: 'Loaded',
|
||||
},
|
||||
{ name: 'BAAI/bge-m3', container: 'vllm-embed', size: '5 GB', status: 'Loaded' },
|
||||
];
|
||||
|
||||
logger.logTable(modelColumns, modelData, 'Loaded Models');
|
||||
|
||||
Reference in New Issue
Block a user