feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
+9
-6
@@ -2,7 +2,8 @@
|
||||
|
||||
## Project Overview
|
||||
|
||||
ModelGrid is a root-level daemon that manages GPU infrastructure, Docker, and AI model containers (Ollama, vLLM, TGI) with an OpenAI-compatible API interface.
|
||||
ModelGrid is a root-level daemon that manages GPU infrastructure, Docker, and AI model containers
|
||||
(Ollama, vLLM, TGI) with an OpenAI-compatible API interface.
|
||||
|
||||
## Architecture
|
||||
|
||||
@@ -52,7 +53,6 @@ ts/
|
||||
│ ├── docker-manager.ts # Docker setup
|
||||
│ └── container-runtime.ts # Container lifecycle
|
||||
├── containers/ # AI container management
|
||||
│ ├── ollama.ts # Ollama container
|
||||
│ ├── vllm.ts # vLLM container
|
||||
│ ├── tgi.ts # TGI container
|
||||
│ └── container-manager.ts # Orchestrator
|
||||
@@ -83,16 +83,19 @@ ts/
|
||||
## Key Concepts
|
||||
|
||||
### Greenlit Model System
|
||||
|
||||
- Only pre-approved models can be auto-pulled for security
|
||||
- Greenlist fetched from remote URL (configurable)
|
||||
- VRAM requirements checked before loading
|
||||
|
||||
### Container Types
|
||||
|
||||
- **Ollama**: Easy to use, native API converted to OpenAI format
|
||||
- **vLLM**: High performance, natively OpenAI-compatible
|
||||
- **TGI**: HuggingFace Text Generation Inference
|
||||
|
||||
### GPU Support
|
||||
|
||||
- NVIDIA: nvidia-smi, CUDA, nvidia-docker2
|
||||
- AMD: rocm-smi, ROCm
|
||||
- Intel Arc: xpu-smi, oneAPI
|
||||
@@ -105,14 +108,14 @@ Config file: `/etc/modelgrid/config.json`
|
||||
interface IModelGridConfig {
|
||||
version: string;
|
||||
api: {
|
||||
port: number; // Default: 8080
|
||||
host: string; // Default: '0.0.0.0'
|
||||
apiKeys: string[]; // Valid API keys
|
||||
port: number; // Default: 8080
|
||||
host: string; // Default: '0.0.0.0'
|
||||
apiKeys: string[]; // Valid API keys
|
||||
cors: boolean;
|
||||
corsOrigins: string[];
|
||||
};
|
||||
docker: {
|
||||
networkName: string; // Default: 'modelgrid'
|
||||
networkName: string; // Default: 'modelgrid'
|
||||
runtime: 'docker' | 'podman';
|
||||
};
|
||||
gpus: {
|
||||
|
||||
Reference in New Issue
Block a user