feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
@@ -0,0 +1,56 @@
+/**
+ * Model catalog interfaces for list.modelgrid.com.
+ */
+
+export interface IModelCapabilitySet {
+  chat?: boolean;
+  completions?: boolean;
+  embeddings?: boolean;
+  tools?: boolean;
+}
+
+export interface IVllmLaunchProfile {
+  replicas?: number;
+  tensorParallelSize?: number;
+  pipelineParallelSize?: number;
+  maxModelLen?: number;
+  gpuMemoryUtilization?: number;
+  quantization?: string;
+  dtype?: string;
+  generationConfig?: 'auto' | 'vllm';
+  extraArgs?: string[];
+  env?: Record<string, string>;
+}
+
+export interface IModelCatalogEntry {
+  id: string;
+  aliases?: string[];
+  engine: 'vllm';
+  source: {
+    repo: string;
+    revision?: string;
+    tokenizer?: string;
+    license?: string;
+    homepage?: string;
+  };
+  capabilities: IModelCapabilitySet;
+  requirements: {
+    minVramGb: number;
+    recommendedVramGb?: number;
+    minGpuCount?: number;
+  };
+  launchDefaults?: IVllmLaunchProfile;
+  metadata?: {
+    family?: string;
+    parameterCount?: string;
+    contextWindow?: number;
+    summary?: string;
+    tags?: string[];
+  };
+}
+
+export interface IModelCatalog {
+  version: string;
+  generatedAt: string;
+  models: IModelCatalogEntry[];
+}
@@ -0,0 +1,91 @@
+/**
+ * Cluster and deployment interfaces.
+ */
+
+export type TClusterRole = 'standalone' | 'control-plane' | 'worker';
+export type TClusterNodeSchedulerState = 'active' | 'cordoned' | 'draining';
+
+export interface IClusterConfig {
+  enabled: boolean;
+  nodeName: string;
+  role: TClusterRole;
+  bindHost: string;
+  gossipPort: number;
+  sharedSecret?: string;
+  advertiseUrl?: string;
+  controlPlaneUrl?: string;
+  heartbeatIntervalMs?: number;
+  seedNodes?: string[];
+}
+
+export interface IClusterNodeStatus {
+  nodeName: string;
+  role: TClusterRole;
+  endpoint?: string;
+  healthy: boolean;
+  schedulerState?: TClusterNodeSchedulerState;
+}
+
+export interface IClusterNodeResources {
+  gpuCount: number;
+  totalVramGb: number;
+  availableVramGb: number;
+  maxSingleGpuVramGb: number;
+  largestGpuGroupCount: number;
+  largestGpuGroupVramGb: number;
+  deploymentCount: number;
+  topologyGroups: IClusterGpuTopologyGroup[];
+}
+
+export interface IClusterGpuTopologyGroup {
+  id: string;
+  vendor: 'nvidia' | 'amd' | 'intel' | 'unknown';
+  gpuIds: string[];
+  gpuCount: number;
+  totalVramGb: number;
+  maxSingleGpuVramGb: number;
+  busNumbers: number[];
+}
+
+export interface IClusterDeploymentAdvertisement {
+  modelId: string;
+  engine: 'vllm';
+  endpoint: string;
+  healthy: boolean;
+  containerId?: string;
+}
+
+export interface IClusterNodeHeartbeat extends IClusterNodeStatus {
+  endpoint: string;
+  resources: IClusterNodeResources;
+  deployments: IClusterDeploymentAdvertisement[];
+  lastSeenAt: number;
+}
+
+export interface IClusterModelLocation {
+  modelId: string;
+  nodeName: string;
+  endpoint: string;
+  healthy: boolean;
+  engine: 'vllm';
+  containerId?: string;
+}
+
+export interface IClusterEnsureResponse {
+  model: string;
+  location: IClusterModelLocation;
+  created: boolean;
+}
+
+export interface IClusterDesiredDeployment {
+  modelId: string;
+  desiredReplicas: number;
+  updatedAt: number;
+}
+
+export interface IClusterStatusResponse {
+  localNode: IClusterNodeHeartbeat | null;
+  nodes: IClusterNodeHeartbeat[];
+  models: Record<string, IClusterModelLocation[]>;
+  desiredDeployments: IClusterDesiredDeployment[];
+}
@@ -1,9 +1,9 @@
 /**
- * ModelGrid Configuration Interfaces
- *
- * Defines the configuration structure for the ModelGrid daemon.
+ * ModelGrid configuration interfaces.
 */

+import type { IModelCatalog, IModelCatalogEntry } from './catalog.ts';
+import type { IClusterConfig } from './cluster.ts';
 import type { IContainerConfig } from './container.ts';

 /**
@@ -50,12 +50,12 @@ export interface IGpuAssignmentConfig {
 * Model management configuration
 */
 export interface IModelConfig {
-  /** URL to fetch greenlit models list */
-  greenlistUrl: string;
-  /** Whether to auto-pull models when requested */
-  autoPull: boolean;
-  /** Default container type for new models */
-  defaultContainer: 'ollama' | 'vllm' | 'tgi';
+  /** URL to fetch the public catalog */
+  registryUrl: string;
+  /** Whether to auto-start a deployment when requested */
+  autoDeploy: boolean;
+  /** Default engine for new deployments */
+  defaultEngine: 'vllm';
  /** Models to auto-load on startup */
  autoLoad: string[];
 }
@@ -76,37 +76,14 @@ export interface IModelGridConfig {
  containers: IContainerConfig[];
  /** Model management configuration */
  models: IModelConfig;
+  /** Cluster configuration */
+  cluster: IClusterConfig;
  /** Health check interval in milliseconds */
  checkInterval: number;
 }

-/**
- * Greenlit model entry from remote list
- */
-export interface IGreenlitModel {
-  /** Model name (e.g., "llama3:8b") */
-  name: string;
-  /** Preferred container type */
-  container: 'ollama' | 'vllm' | 'tgi';
-  /** Minimum VRAM required in GB */
-  minVram: number;
-  /** Optional tags for categorization */
-  tags?: string[];
-  /** Optional description */
-  description?: string;
-}
-
-/**
- * Greenlit models list structure
- */
-export interface IGreenlitModelsList {
-  /** List version */
-  version: string;
-  /** Last updated timestamp */
-  lastUpdated: string;
-  /** List of greenlit models */
-  models: IGreenlitModel[];
-}
+export type IRegistryModel = IModelCatalogEntry;
+export type IRegistryCatalog = IModelCatalog;

 /**
 * Update status information
@@ -1,13 +1,11 @@
 /**
- * ModelGrid Container Interfaces
- *
- * Defines types for container management (Ollama, vLLM, TGI).
+ * ModelGrid container interfaces.
 */

 /**
 * Container type
 */
-export type TContainerType = 'ollama' | 'vllm' | 'tgi' | 'custom';
+export type TContainerType = 'vllm' | 'tgi' | 'custom';

 /**
 * Container health status
@@ -5,6 +5,8 @@
 */

 export * from './config.ts';
+export * from './catalog.ts';
+export * from './cluster.ts';
 export * from './gpu.ts';
 export * from './container.ts';
 export * from './api.ts';