feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

This commit is contained in:
2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
+88 -13
View File
@@ -5,6 +5,8 @@
* This makes configuration easier and code more self-documenting.
*/
export const VERSION = '1.0.1';
/**
* Default timing values in milliseconds
*/
@@ -106,9 +108,6 @@ export const CONTAINER_PORTS = {
* Container image defaults
*/
export const CONTAINER_IMAGES = {
/** Ollama official image */
OLLAMA: 'ollama/ollama:latest',
/** vLLM official image */
VLLM: 'vllm/vllm-openai:latest',
@@ -120,20 +119,96 @@ export const CONTAINER_IMAGES = {
* Model registry constants
*/
export const MODEL_REGISTRY = {
/** Default greenlit models URL */
DEFAULT_GREENLIST_URL:
'https://code.foss.global/modelgrid.com/model_lists/raw/branch/main/greenlit.json',
/** Default public catalog URL */
DEFAULT_CATALOG_URL: 'https://list.modelgrid.com/catalog/models.json',
/** Fallback greenlist if remote fetch fails */
FALLBACK_GREENLIST: [
{ name: 'llama3.2:1b', container: 'ollama', minVram: 4 },
{ name: 'llama3.2:3b', container: 'ollama', minVram: 6 },
{ name: 'llama3:8b', container: 'ollama', minVram: 8 },
{ name: 'mistral:7b', container: 'ollama', minVram: 8 },
{ name: 'codellama:7b', container: 'ollama', minVram: 8 },
/** Fallback catalog if remote fetch fails */
FALLBACK_CATALOG: [
{
id: 'Qwen/Qwen2.5-7B-Instruct',
aliases: ['qwen2.5-7b-instruct'],
engine: 'vllm',
source: {
repo: 'Qwen/Qwen2.5-7B-Instruct',
license: 'apache-2.0',
},
capabilities: {
chat: true,
completions: true,
tools: true,
},
requirements: {
minVramGb: 16,
recommendedVramGb: 24,
minGpuCount: 1,
},
metadata: {
family: 'Qwen2.5',
parameterCount: '7B',
contextWindow: 131072,
summary: 'General purpose instruct model for chat and tool use.',
tags: ['chat', 'tool-use', 'instruct'],
},
},
{
id: 'meta-llama/Llama-3.1-8B-Instruct',
aliases: ['llama-3.1-8b-instruct'],
engine: 'vllm',
source: {
repo: 'meta-llama/Llama-3.1-8B-Instruct',
license: 'llama3.1',
},
capabilities: {
chat: true,
completions: true,
tools: true,
},
requirements: {
minVramGb: 18,
recommendedVramGb: 24,
minGpuCount: 1,
},
metadata: {
family: 'Llama 3.1',
parameterCount: '8B',
contextWindow: 131072,
summary: 'High quality instruct model with good ecosystem support.',
tags: ['chat', 'tool-use', 'instruct'],
},
},
{
id: 'BAAI/bge-m3',
aliases: ['bge-m3'],
engine: 'vllm',
source: {
repo: 'BAAI/bge-m3',
license: 'mit',
},
capabilities: {
embeddings: true,
},
requirements: {
minVramGb: 8,
recommendedVramGb: 12,
minGpuCount: 1,
},
metadata: {
family: 'BGE',
summary: 'Multilingual embedding model for retrieval workloads.',
tags: ['embeddings', 'retrieval', 'multilingual'],
},
},
],
} as const;
export const CLUSTER = {
DEFAULT_BIND_HOST: '0.0.0.0',
DEFAULT_GOSSIP_PORT: 7946,
DEFAULT_HEARTBEAT_INTERVAL_MS: 5000,
NODE_STALE_AFTER_MS: 20000,
AUTH_HEADER_NAME: 'x-modelgrid-cluster-secret',
} as const;
/**
* Configuration paths
*/