feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
@@ -0,0 +1,91 @@
|
||||
/**
|
||||
* Cluster and deployment interfaces.
|
||||
*/
|
||||
|
||||
export type TClusterRole = 'standalone' | 'control-plane' | 'worker';
|
||||
export type TClusterNodeSchedulerState = 'active' | 'cordoned' | 'draining';
|
||||
|
||||
export interface IClusterConfig {
|
||||
enabled: boolean;
|
||||
nodeName: string;
|
||||
role: TClusterRole;
|
||||
bindHost: string;
|
||||
gossipPort: number;
|
||||
sharedSecret?: string;
|
||||
advertiseUrl?: string;
|
||||
controlPlaneUrl?: string;
|
||||
heartbeatIntervalMs?: number;
|
||||
seedNodes?: string[];
|
||||
}
|
||||
|
||||
export interface IClusterNodeStatus {
|
||||
nodeName: string;
|
||||
role: TClusterRole;
|
||||
endpoint?: string;
|
||||
healthy: boolean;
|
||||
schedulerState?: TClusterNodeSchedulerState;
|
||||
}
|
||||
|
||||
export interface IClusterNodeResources {
|
||||
gpuCount: number;
|
||||
totalVramGb: number;
|
||||
availableVramGb: number;
|
||||
maxSingleGpuVramGb: number;
|
||||
largestGpuGroupCount: number;
|
||||
largestGpuGroupVramGb: number;
|
||||
deploymentCount: number;
|
||||
topologyGroups: IClusterGpuTopologyGroup[];
|
||||
}
|
||||
|
||||
export interface IClusterGpuTopologyGroup {
|
||||
id: string;
|
||||
vendor: 'nvidia' | 'amd' | 'intel' | 'unknown';
|
||||
gpuIds: string[];
|
||||
gpuCount: number;
|
||||
totalVramGb: number;
|
||||
maxSingleGpuVramGb: number;
|
||||
busNumbers: number[];
|
||||
}
|
||||
|
||||
export interface IClusterDeploymentAdvertisement {
|
||||
modelId: string;
|
||||
engine: 'vllm';
|
||||
endpoint: string;
|
||||
healthy: boolean;
|
||||
containerId?: string;
|
||||
}
|
||||
|
||||
export interface IClusterNodeHeartbeat extends IClusterNodeStatus {
|
||||
endpoint: string;
|
||||
resources: IClusterNodeResources;
|
||||
deployments: IClusterDeploymentAdvertisement[];
|
||||
lastSeenAt: number;
|
||||
}
|
||||
|
||||
export interface IClusterModelLocation {
|
||||
modelId: string;
|
||||
nodeName: string;
|
||||
endpoint: string;
|
||||
healthy: boolean;
|
||||
engine: 'vllm';
|
||||
containerId?: string;
|
||||
}
|
||||
|
||||
export interface IClusterEnsureResponse {
|
||||
model: string;
|
||||
location: IClusterModelLocation;
|
||||
created: boolean;
|
||||
}
|
||||
|
||||
export interface IClusterDesiredDeployment {
|
||||
modelId: string;
|
||||
desiredReplicas: number;
|
||||
updatedAt: number;
|
||||
}
|
||||
|
||||
export interface IClusterStatusResponse {
|
||||
localNode: IClusterNodeHeartbeat | null;
|
||||
nodes: IClusterNodeHeartbeat[];
|
||||
models: Record<string, IClusterModelLocation[]>;
|
||||
desiredDeployments: IClusterDesiredDeployment[];
|
||||
}
|
||||
Reference in New Issue
Block a user