feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

This commit is contained in:
2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
+11 -4
View File
@@ -103,7 +103,6 @@ modelgrid/
│ │ └── container-runtime.ts
│ ├── containers/ # Container orchestration
│ │ ├── index.ts
│ │ ├── ollama.ts # Ollama container
│ │ ├── vllm.ts # vLLM container
│ │ ├── tgi.ts # TGI container
│ │ └── base-container.ts # Abstract container class
@@ -184,9 +183,17 @@ Models are controlled via a remote greenlist to prevent arbitrary downloads:
{
"version": "1.0",
"models": [
{ "name": "llama3:8b", "container": "ollama", "minVram": 8 },
{ "name": "mistral:7b", "container": "ollama", "minVram": 8 },
{ "name": "llama3:70b", "container": "vllm", "minVram": 48 }
{ "id": "Qwen/Qwen2.5-7B-Instruct", "engine": "vllm", "requirements": { "minVramGb": 16 } },
{
"id": "meta-llama/Llama-3.1-8B-Instruct",
"engine": "vllm",
"requirements": { "minVramGb": 18 }
},
{
"id": "meta-llama/Llama-3.1-70B-Instruct",
"engine": "vllm",
"requirements": { "minVramGb": 48 }
}
]
}
```