feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
@@ -2,7 +2,8 @@

 ## Project Overview

-ModelGrid is a root-level daemon that manages GPU infrastructure, Docker, and AI model containers (Ollama, vLLM, TGI) with an OpenAI-compatible API interface.
+ModelGrid is a root-level daemon that manages GPU infrastructure, Docker, and AI model containers
+(Ollama, vLLM, TGI) with an OpenAI-compatible API interface.

 ## Architecture

@@ -52,7 +53,6 @@ ts/
 │   ├── docker-manager.ts # Docker setup
 │   └── container-runtime.ts # Container lifecycle
 ├── containers/           # AI container management
-│   ├── ollama.ts         # Ollama container
 │   ├── vllm.ts           # vLLM container
 │   ├── tgi.ts            # TGI container
 │   └── container-manager.ts # Orchestrator
@@ -83,16 +83,19 @@ ts/
 ## Key Concepts

 ### Greenlit Model System
+
 - Only pre-approved models can be auto-pulled for security
 - Greenlist fetched from remote URL (configurable)
 - VRAM requirements checked before loading

 ### Container Types
+
 - **Ollama**: Easy to use, native API converted to OpenAI format
 - **vLLM**: High performance, natively OpenAI-compatible
 - **TGI**: HuggingFace Text Generation Inference

 ### GPU Support
+
 - NVIDIA: nvidia-smi, CUDA, nvidia-docker2
 - AMD: rocm-smi, ROCm
 - Intel Arc: xpu-smi, oneAPI
@@ -105,14 +108,14 @@ Config file: `/etc/modelgrid/config.json`
 interface IModelGridConfig {
  version: string;
  api: {
-    port: number;           // Default: 8080
-    host: string;           // Default: '0.0.0.0'
-    apiKeys: string[];      // Valid API keys
+    port: number; // Default: 8080
+    host: string; // Default: '0.0.0.0'
+    apiKeys: string[]; // Valid API keys
    cors: boolean;
    corsOrigins: string[];
  };
  docker: {
-    networkName: string;    // Default: 'modelgrid'
+    networkName: string; // Default: 'modelgrid'
    runtime: 'docker' | 'podman';
  };
  gpus: {