feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

This commit is contained in:
2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
+10 -5
View File
@@ -141,8 +141,8 @@ const containerColumns: ITableColumn[] = [
];
const containerData = [
{ id: 'ollama-1', type: 'ollama', status: 'Running', gpu: 'gpu-0', models: '3' },
{ id: 'vllm-1', type: 'vllm', status: 'Running', gpu: 'gpu-1', models: '1' },
{ id: 'vllm-qwen', type: 'vllm', status: 'Running', gpu: 'gpu-0', models: '1' },
{ id: 'vllm-llama', type: 'vllm', status: 'Running', gpu: 'gpu-1', models: '1' },
];
logger.logTable(containerColumns, containerData, 'AI Containers');
@@ -183,9 +183,14 @@ const modelColumns: ITableColumn[] = [
];
const modelData = [
{ name: 'llama3:8b', container: 'ollama-1', size: '4.7 GB', status: 'Loaded' },
{ name: 'mistral:7b', container: 'ollama-1', size: '4.1 GB', status: 'Loaded' },
{ name: 'llama3:70b', container: 'vllm-1', size: '40 GB', status: 'Loaded' },
{ name: 'Qwen/Qwen2.5-7B-Instruct', container: 'vllm-qwen', size: '15 GB', status: 'Loaded' },
{
name: 'meta-llama/Llama-3.1-8B-Instruct',
container: 'vllm-llama',
size: '16 GB',
status: 'Loaded',
},
{ name: 'BAAI/bge-m3', container: 'vllm-embed', size: '5 GB', status: 'Loaded' },
];
logger.logTable(modelColumns, modelData, 'Loaded Models');