feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
@@ -0,0 +1,56 @@
|
||||
/**
|
||||
* Model catalog interfaces for list.modelgrid.com.
|
||||
*/
|
||||
|
||||
export interface IModelCapabilitySet {
|
||||
chat?: boolean;
|
||||
completions?: boolean;
|
||||
embeddings?: boolean;
|
||||
tools?: boolean;
|
||||
}
|
||||
|
||||
export interface IVllmLaunchProfile {
|
||||
replicas?: number;
|
||||
tensorParallelSize?: number;
|
||||
pipelineParallelSize?: number;
|
||||
maxModelLen?: number;
|
||||
gpuMemoryUtilization?: number;
|
||||
quantization?: string;
|
||||
dtype?: string;
|
||||
generationConfig?: 'auto' | 'vllm';
|
||||
extraArgs?: string[];
|
||||
env?: Record<string, string>;
|
||||
}
|
||||
|
||||
export interface IModelCatalogEntry {
|
||||
id: string;
|
||||
aliases?: string[];
|
||||
engine: 'vllm';
|
||||
source: {
|
||||
repo: string;
|
||||
revision?: string;
|
||||
tokenizer?: string;
|
||||
license?: string;
|
||||
homepage?: string;
|
||||
};
|
||||
capabilities: IModelCapabilitySet;
|
||||
requirements: {
|
||||
minVramGb: number;
|
||||
recommendedVramGb?: number;
|
||||
minGpuCount?: number;
|
||||
};
|
||||
launchDefaults?: IVllmLaunchProfile;
|
||||
metadata?: {
|
||||
family?: string;
|
||||
parameterCount?: string;
|
||||
contextWindow?: number;
|
||||
summary?: string;
|
||||
tags?: string[];
|
||||
};
|
||||
}
|
||||
|
||||
export interface IModelCatalog {
|
||||
version: string;
|
||||
generatedAt: string;
|
||||
models: IModelCatalogEntry[];
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
/**
|
||||
* Cluster and deployment interfaces.
|
||||
*/
|
||||
|
||||
export type TClusterRole = 'standalone' | 'control-plane' | 'worker';
|
||||
export type TClusterNodeSchedulerState = 'active' | 'cordoned' | 'draining';
|
||||
|
||||
export interface IClusterConfig {
|
||||
enabled: boolean;
|
||||
nodeName: string;
|
||||
role: TClusterRole;
|
||||
bindHost: string;
|
||||
gossipPort: number;
|
||||
sharedSecret?: string;
|
||||
advertiseUrl?: string;
|
||||
controlPlaneUrl?: string;
|
||||
heartbeatIntervalMs?: number;
|
||||
seedNodes?: string[];
|
||||
}
|
||||
|
||||
export interface IClusterNodeStatus {
|
||||
nodeName: string;
|
||||
role: TClusterRole;
|
||||
endpoint?: string;
|
||||
healthy: boolean;
|
||||
schedulerState?: TClusterNodeSchedulerState;
|
||||
}
|
||||
|
||||
export interface IClusterNodeResources {
|
||||
gpuCount: number;
|
||||
totalVramGb: number;
|
||||
availableVramGb: number;
|
||||
maxSingleGpuVramGb: number;
|
||||
largestGpuGroupCount: number;
|
||||
largestGpuGroupVramGb: number;
|
||||
deploymentCount: number;
|
||||
topologyGroups: IClusterGpuTopologyGroup[];
|
||||
}
|
||||
|
||||
export interface IClusterGpuTopologyGroup {
|
||||
id: string;
|
||||
vendor: 'nvidia' | 'amd' | 'intel' | 'unknown';
|
||||
gpuIds: string[];
|
||||
gpuCount: number;
|
||||
totalVramGb: number;
|
||||
maxSingleGpuVramGb: number;
|
||||
busNumbers: number[];
|
||||
}
|
||||
|
||||
export interface IClusterDeploymentAdvertisement {
|
||||
modelId: string;
|
||||
engine: 'vllm';
|
||||
endpoint: string;
|
||||
healthy: boolean;
|
||||
containerId?: string;
|
||||
}
|
||||
|
||||
export interface IClusterNodeHeartbeat extends IClusterNodeStatus {
|
||||
endpoint: string;
|
||||
resources: IClusterNodeResources;
|
||||
deployments: IClusterDeploymentAdvertisement[];
|
||||
lastSeenAt: number;
|
||||
}
|
||||
|
||||
export interface IClusterModelLocation {
|
||||
modelId: string;
|
||||
nodeName: string;
|
||||
endpoint: string;
|
||||
healthy: boolean;
|
||||
engine: 'vllm';
|
||||
containerId?: string;
|
||||
}
|
||||
|
||||
export interface IClusterEnsureResponse {
|
||||
model: string;
|
||||
location: IClusterModelLocation;
|
||||
created: boolean;
|
||||
}
|
||||
|
||||
export interface IClusterDesiredDeployment {
|
||||
modelId: string;
|
||||
desiredReplicas: number;
|
||||
updatedAt: number;
|
||||
}
|
||||
|
||||
export interface IClusterStatusResponse {
|
||||
localNode: IClusterNodeHeartbeat | null;
|
||||
nodes: IClusterNodeHeartbeat[];
|
||||
models: Record<string, IClusterModelLocation[]>;
|
||||
desiredDeployments: IClusterDesiredDeployment[];
|
||||
}
|
||||
+13
-36
@@ -1,9 +1,9 @@
|
||||
/**
|
||||
* ModelGrid Configuration Interfaces
|
||||
*
|
||||
* Defines the configuration structure for the ModelGrid daemon.
|
||||
* ModelGrid configuration interfaces.
|
||||
*/
|
||||
|
||||
import type { IModelCatalog, IModelCatalogEntry } from './catalog.ts';
|
||||
import type { IClusterConfig } from './cluster.ts';
|
||||
import type { IContainerConfig } from './container.ts';
|
||||
|
||||
/**
|
||||
@@ -50,12 +50,12 @@ export interface IGpuAssignmentConfig {
|
||||
* Model management configuration
|
||||
*/
|
||||
export interface IModelConfig {
|
||||
/** URL to fetch greenlit models list */
|
||||
greenlistUrl: string;
|
||||
/** Whether to auto-pull models when requested */
|
||||
autoPull: boolean;
|
||||
/** Default container type for new models */
|
||||
defaultContainer: 'ollama' | 'vllm' | 'tgi';
|
||||
/** URL to fetch the public catalog */
|
||||
registryUrl: string;
|
||||
/** Whether to auto-start a deployment when requested */
|
||||
autoDeploy: boolean;
|
||||
/** Default engine for new deployments */
|
||||
defaultEngine: 'vllm';
|
||||
/** Models to auto-load on startup */
|
||||
autoLoad: string[];
|
||||
}
|
||||
@@ -76,37 +76,14 @@ export interface IModelGridConfig {
|
||||
containers: IContainerConfig[];
|
||||
/** Model management configuration */
|
||||
models: IModelConfig;
|
||||
/** Cluster configuration */
|
||||
cluster: IClusterConfig;
|
||||
/** Health check interval in milliseconds */
|
||||
checkInterval: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Greenlit model entry from remote list
|
||||
*/
|
||||
export interface IGreenlitModel {
|
||||
/** Model name (e.g., "llama3:8b") */
|
||||
name: string;
|
||||
/** Preferred container type */
|
||||
container: 'ollama' | 'vllm' | 'tgi';
|
||||
/** Minimum VRAM required in GB */
|
||||
minVram: number;
|
||||
/** Optional tags for categorization */
|
||||
tags?: string[];
|
||||
/** Optional description */
|
||||
description?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Greenlit models list structure
|
||||
*/
|
||||
export interface IGreenlitModelsList {
|
||||
/** List version */
|
||||
version: string;
|
||||
/** Last updated timestamp */
|
||||
lastUpdated: string;
|
||||
/** List of greenlit models */
|
||||
models: IGreenlitModel[];
|
||||
}
|
||||
export type IRegistryModel = IModelCatalogEntry;
|
||||
export type IRegistryCatalog = IModelCatalog;
|
||||
|
||||
/**
|
||||
* Update status information
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
/**
|
||||
* ModelGrid Container Interfaces
|
||||
*
|
||||
* Defines types for container management (Ollama, vLLM, TGI).
|
||||
* ModelGrid container interfaces.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Container type
|
||||
*/
|
||||
export type TContainerType = 'ollama' | 'vllm' | 'tgi' | 'custom';
|
||||
export type TContainerType = 'vllm' | 'tgi' | 'custom';
|
||||
|
||||
/**
|
||||
* Container health status
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
*/
|
||||
|
||||
export * from './config.ts';
|
||||
export * from './catalog.ts';
|
||||
export * from './cluster.ts';
|
||||
export * from './gpu.ts';
|
||||
export * from './container.ts';
|
||||
export * from './api.ts';
|
||||
|
||||
Reference in New Issue
Block a user