feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

This commit is contained in:
2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
+56
View File
@@ -0,0 +1,56 @@
/**
* Model catalog interfaces for list.modelgrid.com.
*/
export interface IModelCapabilitySet {
chat?: boolean;
completions?: boolean;
embeddings?: boolean;
tools?: boolean;
}
export interface IVllmLaunchProfile {
replicas?: number;
tensorParallelSize?: number;
pipelineParallelSize?: number;
maxModelLen?: number;
gpuMemoryUtilization?: number;
quantization?: string;
dtype?: string;
generationConfig?: 'auto' | 'vllm';
extraArgs?: string[];
env?: Record<string, string>;
}
export interface IModelCatalogEntry {
id: string;
aliases?: string[];
engine: 'vllm';
source: {
repo: string;
revision?: string;
tokenizer?: string;
license?: string;
homepage?: string;
};
capabilities: IModelCapabilitySet;
requirements: {
minVramGb: number;
recommendedVramGb?: number;
minGpuCount?: number;
};
launchDefaults?: IVllmLaunchProfile;
metadata?: {
family?: string;
parameterCount?: string;
contextWindow?: number;
summary?: string;
tags?: string[];
};
}
export interface IModelCatalog {
version: string;
generatedAt: string;
models: IModelCatalogEntry[];
}
+91
View File
@@ -0,0 +1,91 @@
/**
* Cluster and deployment interfaces.
*/
export type TClusterRole = 'standalone' | 'control-plane' | 'worker';
export type TClusterNodeSchedulerState = 'active' | 'cordoned' | 'draining';
export interface IClusterConfig {
enabled: boolean;
nodeName: string;
role: TClusterRole;
bindHost: string;
gossipPort: number;
sharedSecret?: string;
advertiseUrl?: string;
controlPlaneUrl?: string;
heartbeatIntervalMs?: number;
seedNodes?: string[];
}
export interface IClusterNodeStatus {
nodeName: string;
role: TClusterRole;
endpoint?: string;
healthy: boolean;
schedulerState?: TClusterNodeSchedulerState;
}
export interface IClusterNodeResources {
gpuCount: number;
totalVramGb: number;
availableVramGb: number;
maxSingleGpuVramGb: number;
largestGpuGroupCount: number;
largestGpuGroupVramGb: number;
deploymentCount: number;
topologyGroups: IClusterGpuTopologyGroup[];
}
export interface IClusterGpuTopologyGroup {
id: string;
vendor: 'nvidia' | 'amd' | 'intel' | 'unknown';
gpuIds: string[];
gpuCount: number;
totalVramGb: number;
maxSingleGpuVramGb: number;
busNumbers: number[];
}
export interface IClusterDeploymentAdvertisement {
modelId: string;
engine: 'vllm';
endpoint: string;
healthy: boolean;
containerId?: string;
}
export interface IClusterNodeHeartbeat extends IClusterNodeStatus {
endpoint: string;
resources: IClusterNodeResources;
deployments: IClusterDeploymentAdvertisement[];
lastSeenAt: number;
}
export interface IClusterModelLocation {
modelId: string;
nodeName: string;
endpoint: string;
healthy: boolean;
engine: 'vllm';
containerId?: string;
}
export interface IClusterEnsureResponse {
model: string;
location: IClusterModelLocation;
created: boolean;
}
export interface IClusterDesiredDeployment {
modelId: string;
desiredReplicas: number;
updatedAt: number;
}
export interface IClusterStatusResponse {
localNode: IClusterNodeHeartbeat | null;
nodes: IClusterNodeHeartbeat[];
models: Record<string, IClusterModelLocation[]>;
desiredDeployments: IClusterDesiredDeployment[];
}
+13 -36
View File
@@ -1,9 +1,9 @@
/**
* ModelGrid Configuration Interfaces
*
* Defines the configuration structure for the ModelGrid daemon.
* ModelGrid configuration interfaces.
*/
import type { IModelCatalog, IModelCatalogEntry } from './catalog.ts';
import type { IClusterConfig } from './cluster.ts';
import type { IContainerConfig } from './container.ts';
/**
@@ -50,12 +50,12 @@ export interface IGpuAssignmentConfig {
* Model management configuration
*/
export interface IModelConfig {
/** URL to fetch greenlit models list */
greenlistUrl: string;
/** Whether to auto-pull models when requested */
autoPull: boolean;
/** Default container type for new models */
defaultContainer: 'ollama' | 'vllm' | 'tgi';
/** URL to fetch the public catalog */
registryUrl: string;
/** Whether to auto-start a deployment when requested */
autoDeploy: boolean;
/** Default engine for new deployments */
defaultEngine: 'vllm';
/** Models to auto-load on startup */
autoLoad: string[];
}
@@ -76,37 +76,14 @@ export interface IModelGridConfig {
containers: IContainerConfig[];
/** Model management configuration */
models: IModelConfig;
/** Cluster configuration */
cluster: IClusterConfig;
/** Health check interval in milliseconds */
checkInterval: number;
}
/**
* Greenlit model entry from remote list
*/
export interface IGreenlitModel {
/** Model name (e.g., "llama3:8b") */
name: string;
/** Preferred container type */
container: 'ollama' | 'vllm' | 'tgi';
/** Minimum VRAM required in GB */
minVram: number;
/** Optional tags for categorization */
tags?: string[];
/** Optional description */
description?: string;
}
/**
* Greenlit models list structure
*/
export interface IGreenlitModelsList {
/** List version */
version: string;
/** Last updated timestamp */
lastUpdated: string;
/** List of greenlit models */
models: IGreenlitModel[];
}
export type IRegistryModel = IModelCatalogEntry;
export type IRegistryCatalog = IModelCatalog;
/**
* Update status information
+2 -4
View File
@@ -1,13 +1,11 @@
/**
* ModelGrid Container Interfaces
*
* Defines types for container management (Ollama, vLLM, TGI).
* ModelGrid container interfaces.
*/
/**
* Container type
*/
export type TContainerType = 'ollama' | 'vllm' | 'tgi' | 'custom';
export type TContainerType = 'vllm' | 'tgi' | 'custom';
/**
* Container health status
+2
View File
@@ -5,6 +5,8 @@
*/
export * from './config.ts';
export * from './catalog.ts';
export * from './cluster.ts';
export * from './gpu.ts';
export * from './container.ts';
export * from './api.ts';