feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
@@ -0,0 +1,56 @@
+/**
+ * Model catalog interfaces for list.modelgrid.com.
+ */
+
+export interface IModelCapabilitySet {
+  chat?: boolean;
+  completions?: boolean;
+  embeddings?: boolean;
+  tools?: boolean;
+}
+
+export interface IVllmLaunchProfile {
+  replicas?: number;
+  tensorParallelSize?: number;
+  pipelineParallelSize?: number;
+  maxModelLen?: number;
+  gpuMemoryUtilization?: number;
+  quantization?: string;
+  dtype?: string;
+  generationConfig?: 'auto' | 'vllm';
+  extraArgs?: string[];
+  env?: Record<string, string>;
+}
+
+export interface IModelCatalogEntry {
+  id: string;
+  aliases?: string[];
+  engine: 'vllm';
+  source: {
+    repo: string;
+    revision?: string;
+    tokenizer?: string;
+    license?: string;
+    homepage?: string;
+  };
+  capabilities: IModelCapabilitySet;
+  requirements: {
+    minVramGb: number;
+    recommendedVramGb?: number;
+    minGpuCount?: number;
+  };
+  launchDefaults?: IVllmLaunchProfile;
+  metadata?: {
+    family?: string;
+    parameterCount?: string;
+    contextWindow?: number;
+    summary?: string;
+    tags?: string[];
+  };
+}
+
+export interface IModelCatalog {
+  version: string;
+  generatedAt: string;
+  models: IModelCatalogEntry[];
+}