feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
+11
-4
@@ -103,7 +103,6 @@ modelgrid/
|
||||
│ │ └── container-runtime.ts
|
||||
│ ├── containers/ # Container orchestration
|
||||
│ │ ├── index.ts
|
||||
│ │ ├── ollama.ts # Ollama container
|
||||
│ │ ├── vllm.ts # vLLM container
|
||||
│ │ ├── tgi.ts # TGI container
|
||||
│ │ └── base-container.ts # Abstract container class
|
||||
@@ -184,9 +183,17 @@ Models are controlled via a remote greenlist to prevent arbitrary downloads:
|
||||
{
|
||||
"version": "1.0",
|
||||
"models": [
|
||||
{ "name": "llama3:8b", "container": "ollama", "minVram": 8 },
|
||||
{ "name": "mistral:7b", "container": "ollama", "minVram": 8 },
|
||||
{ "name": "llama3:70b", "container": "vllm", "minVram": 48 }
|
||||
{ "id": "Qwen/Qwen2.5-7B-Instruct", "engine": "vllm", "requirements": { "minVramGb": 16 } },
|
||||
{
|
||||
"id": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"engine": "vllm",
|
||||
"requirements": { "minVramGb": 18 }
|
||||
},
|
||||
{
|
||||
"id": "meta-llama/Llama-3.1-70B-Instruct",
|
||||
"engine": "vllm",
|
||||
"requirements": { "minVramGb": 48 }
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user