133 lines
3.2 KiB
TypeScript
133 lines
3.2 KiB
TypeScript
|
|
/**
|
||
|
|
* ModelGrid GPU Interfaces
|
||
|
|
*
|
||
|
|
* Defines types for GPU detection and management.
|
||
|
|
*/
|
||
|
|
|
||
|
|
/**
|
||
|
|
* GPU vendor type
|
||
|
|
*/
|
||
|
|
export type TGpuVendor = 'nvidia' | 'amd' | 'intel' | 'unknown';
|
||
|
|
|
||
|
|
/**
|
||
|
|
* GPU information detected from the system
|
||
|
|
*/
|
||
|
|
export interface IGpuInfo {
|
||
|
|
/** Unique identifier for this GPU */
|
||
|
|
id: string;
|
||
|
|
/** GPU vendor */
|
||
|
|
vendor: TGpuVendor;
|
||
|
|
/** GPU model name (e.g., "NVIDIA GeForce RTX 4090") */
|
||
|
|
model: string;
|
||
|
|
/** Total VRAM in MB */
|
||
|
|
vram: number;
|
||
|
|
/** Driver version (if available) */
|
||
|
|
driverVersion?: string;
|
||
|
|
/** CUDA version (NVIDIA only) */
|
||
|
|
cudaVersion?: string;
|
||
|
|
/** Compute capability (NVIDIA only, e.g., "8.9") */
|
||
|
|
computeCapability?: string;
|
||
|
|
/** ROCm version (AMD only) */
|
||
|
|
rocmVersion?: string;
|
||
|
|
/** oneAPI version (Intel only) */
|
||
|
|
oneApiVersion?: string;
|
||
|
|
/** PCI slot identifier */
|
||
|
|
pciSlot: string;
|
||
|
|
/** PCI bus ID (e.g., "0000:01:00.0") */
|
||
|
|
pciBusId?: string;
|
||
|
|
/** GPU index in the system */
|
||
|
|
index: number;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Real-time GPU status
|
||
|
|
*/
|
||
|
|
export interface IGpuStatus {
|
||
|
|
/** GPU identifier */
|
||
|
|
id: string;
|
||
|
|
/** Current GPU utilization percentage (0-100) */
|
||
|
|
utilization: number;
|
||
|
|
/** Current memory usage in MB */
|
||
|
|
memoryUsed: number;
|
||
|
|
/** Total memory in MB */
|
||
|
|
memoryTotal: number;
|
||
|
|
/** Memory usage percentage */
|
||
|
|
memoryPercent: number;
|
||
|
|
/** Current temperature in Celsius */
|
||
|
|
temperature: number;
|
||
|
|
/** Current power usage in Watts */
|
||
|
|
powerUsage: number;
|
||
|
|
/** Power limit in Watts */
|
||
|
|
powerLimit: number;
|
||
|
|
/** Fan speed percentage (if available) */
|
||
|
|
fanSpeed?: number;
|
||
|
|
/** GPU clock speed in MHz */
|
||
|
|
gpuClock?: number;
|
||
|
|
/** Memory clock speed in MHz */
|
||
|
|
memoryClock?: number;
|
||
|
|
/** Last update timestamp */
|
||
|
|
lastUpdate: number;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Combined GPU information and status
|
||
|
|
*/
|
||
|
|
export interface IGpuFullStatus extends IGpuInfo {
|
||
|
|
/** Real-time status */
|
||
|
|
status: IGpuStatus;
|
||
|
|
/** Container ID assigned to this GPU (if any) */
|
||
|
|
assignedContainer?: string;
|
||
|
|
/** Whether the GPU is available for use */
|
||
|
|
available: boolean;
|
||
|
|
/** Health status */
|
||
|
|
health: 'healthy' | 'warning' | 'error' | 'unknown';
|
||
|
|
/** Health message (if warning or error) */
|
||
|
|
healthMessage?: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* System information including all GPUs
|
||
|
|
*/
|
||
|
|
export interface ISystemInfo {
|
||
|
|
/** System hostname */
|
||
|
|
hostname: string;
|
||
|
|
/** CPU model name */
|
||
|
|
cpuModel: string;
|
||
|
|
/** Number of CPU cores */
|
||
|
|
cpuCores: number;
|
||
|
|
/** Total RAM in MB */
|
||
|
|
ramTotal: number;
|
||
|
|
/** Available RAM in MB */
|
||
|
|
ramAvailable: number;
|
||
|
|
/** Operating system */
|
||
|
|
os: string;
|
||
|
|
/** Kernel version */
|
||
|
|
kernelVersion: string;
|
||
|
|
/** List of detected GPUs */
|
||
|
|
gpus: IGpuInfo[];
|
||
|
|
/** Docker version (if installed) */
|
||
|
|
dockerVersion?: string;
|
||
|
|
/** NVIDIA Container Toolkit version (if installed) */
|
||
|
|
nvidiaContainerVersion?: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Driver status for a vendor
|
||
|
|
*/
|
||
|
|
export interface IDriverStatus {
|
||
|
|
/** GPU vendor */
|
||
|
|
vendor: TGpuVendor;
|
||
|
|
/** Whether the driver is installed */
|
||
|
|
installed: boolean;
|
||
|
|
/** Driver version (if installed) */
|
||
|
|
version?: string;
|
||
|
|
/** CUDA/ROCm/oneAPI toolkit version (if installed) */
|
||
|
|
toolkitVersion?: string;
|
||
|
|
/** Container runtime support (e.g., nvidia-docker) */
|
||
|
|
containerSupport: boolean;
|
||
|
|
/** Container runtime version */
|
||
|
|
containerRuntimeVersion?: string;
|
||
|
|
/** List of detected issues */
|
||
|
|
issues: string[];
|
||
|
|
}
|