Files
modelgrid/ts/interfaces/cluster.ts
T

92 lines
2.1 KiB
TypeScript

/**
* Cluster and deployment interfaces.
*/
export type TClusterRole = 'standalone' | 'control-plane' | 'worker';
export type TClusterNodeSchedulerState = 'active' | 'cordoned' | 'draining';
export interface IClusterConfig {
enabled: boolean;
nodeName: string;
role: TClusterRole;
bindHost: string;
gossipPort: number;
sharedSecret?: string;
advertiseUrl?: string;
controlPlaneUrl?: string;
heartbeatIntervalMs?: number;
seedNodes?: string[];
}
export interface IClusterNodeStatus {
nodeName: string;
role: TClusterRole;
endpoint?: string;
healthy: boolean;
schedulerState?: TClusterNodeSchedulerState;
}
export interface IClusterNodeResources {
gpuCount: number;
totalVramGb: number;
availableVramGb: number;
maxSingleGpuVramGb: number;
largestGpuGroupCount: number;
largestGpuGroupVramGb: number;
deploymentCount: number;
topologyGroups: IClusterGpuTopologyGroup[];
}
export interface IClusterGpuTopologyGroup {
id: string;
vendor: 'nvidia' | 'amd' | 'intel' | 'unknown';
gpuIds: string[];
gpuCount: number;
totalVramGb: number;
maxSingleGpuVramGb: number;
busNumbers: number[];
}
export interface IClusterDeploymentAdvertisement {
modelId: string;
engine: 'vllm';
endpoint: string;
healthy: boolean;
containerId?: string;
}
export interface IClusterNodeHeartbeat extends IClusterNodeStatus {
endpoint: string;
resources: IClusterNodeResources;
deployments: IClusterDeploymentAdvertisement[];
lastSeenAt: number;
}
export interface IClusterModelLocation {
modelId: string;
nodeName: string;
endpoint: string;
healthy: boolean;
engine: 'vllm';
containerId?: string;
}
export interface IClusterEnsureResponse {
model: string;
location: IClusterModelLocation;
created: boolean;
}
export interface IClusterDesiredDeployment {
modelId: string;
desiredReplicas: number;
updatedAt: number;
}
export interface IClusterStatusResponse {
localNode: IClusterNodeHeartbeat | null;
nodes: IClusterNodeHeartbeat[];
models: Record<string, IClusterModelLocation[]>;
desiredDeployments: IClusterDesiredDeployment[];
}