feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
@@ -96,9 +96,12 @@ export class GpuDetector {
|
||||
// Get CUDA version separately
|
||||
if (gpus.length > 0) {
|
||||
try {
|
||||
const { stdout: cudaOut } = await execAsync('nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1 && nvcc --version 2>/dev/null | grep "release" | sed "s/.*release \\([0-9.]*\\).*/\\1/"', {
|
||||
timeout: 5000,
|
||||
});
|
||||
const { stdout: cudaOut } = await execAsync(
|
||||
'nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1 && nvcc --version 2>/dev/null | grep "release" | sed "s/.*release \\([0-9.]*\\).*/\\1/"',
|
||||
{
|
||||
timeout: 5000,
|
||||
},
|
||||
);
|
||||
const cudaMatch = cudaOut.match(/(\d+\.\d+)/);
|
||||
if (cudaMatch) {
|
||||
for (const gpu of gpus) {
|
||||
@@ -142,7 +145,9 @@ export class GpuDetector {
|
||||
id: `amd-${index}`,
|
||||
vendor: 'amd',
|
||||
model: String(cardData['Card series'] || cardData['card_series'] || 'AMD GPU'),
|
||||
vram: this.parseMemory(String(cardData['VRAM Total Memory (B)'] || cardData['vram_total'] || '0')),
|
||||
vram: this.parseMemory(
|
||||
String(cardData['VRAM Total Memory (B)'] || cardData['vram_total'] || '0'),
|
||||
),
|
||||
driverVersion: String(cardData['Driver version'] || cardData['driver_version'] || ''),
|
||||
rocmVersion: await this.getRocmVersion(),
|
||||
pciSlot: String(cardData['PCI Bus'] || cardData['pci_bus'] || ''),
|
||||
@@ -371,14 +376,17 @@ export class GpuDetector {
|
||||
);
|
||||
|
||||
const parts = stdout.trim().split(',').map((p: string) => p.trim());
|
||||
const [utilization, memUsed, memTotal, temp, power, powerLimit, fan, gpuClock, memClock] = parts;
|
||||
const [utilization, memUsed, memTotal, temp, power, powerLimit, fan, gpuClock, memClock] =
|
||||
parts;
|
||||
|
||||
return {
|
||||
id: gpu.id,
|
||||
utilization: parseInt(utilization, 10) || 0,
|
||||
memoryUsed: parseInt(memUsed, 10) || 0,
|
||||
memoryTotal: parseInt(memTotal, 10) || gpu.vram,
|
||||
memoryPercent: memTotal ? Math.round((parseInt(memUsed, 10) / parseInt(memTotal, 10)) * 100) : 0,
|
||||
memoryPercent: memTotal
|
||||
? Math.round((parseInt(memUsed, 10) / parseInt(memTotal, 10)) * 100)
|
||||
: 0,
|
||||
temperature: parseInt(temp, 10) || 0,
|
||||
powerUsage: parseFloat(power) || 0,
|
||||
powerLimit: parseFloat(powerLimit) || 0,
|
||||
@@ -513,7 +521,7 @@ export class GpuDetector {
|
||||
bytes /= 1024;
|
||||
break;
|
||||
case 'B':
|
||||
bytes /= (1024 * 1024);
|
||||
bytes /= 1024 * 1024;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -542,7 +550,9 @@ export class GpuDetector {
|
||||
*/
|
||||
private async getRocmVersion(): Promise<string | undefined> {
|
||||
try {
|
||||
const { stdout } = await execAsync('cat /opt/rocm/.info/version 2>/dev/null || rocminfo 2>/dev/null | grep "ROCm" | head -1');
|
||||
const { stdout } = await execAsync(
|
||||
'cat /opt/rocm/.info/version 2>/dev/null || rocminfo 2>/dev/null | grep "ROCm" | head -1',
|
||||
);
|
||||
const match = stdout.match(/(\d+\.\d+(?:\.\d+)?)/);
|
||||
return match ? match[1] : undefined;
|
||||
} catch {
|
||||
@@ -555,7 +565,9 @@ export class GpuDetector {
|
||||
*/
|
||||
private async getOneApiVersion(): Promise<string | undefined> {
|
||||
try {
|
||||
const { stdout } = await execAsync('source /opt/intel/oneapi/setvars.sh 2>/dev/null && echo $ONEAPI_ROOT 2>/dev/null || cat /opt/intel/oneapi/compiler/latest/env/vars.sh 2>/dev/null | grep VERSION');
|
||||
const { stdout } = await execAsync(
|
||||
'source /opt/intel/oneapi/setvars.sh 2>/dev/null && echo $ONEAPI_ROOT 2>/dev/null || cat /opt/intel/oneapi/compiler/latest/env/vars.sh 2>/dev/null | grep VERSION',
|
||||
);
|
||||
const match = stdout.match(/(\d+\.\d+(?:\.\d+)?)/);
|
||||
return match ? match[1] : undefined;
|
||||
} catch {
|
||||
|
||||
@@ -105,7 +105,9 @@ export class SystemInfo {
|
||||
*/
|
||||
private async getNvidiaContainerVersion(): Promise<string | undefined> {
|
||||
try {
|
||||
const { stdout } = await execAsync('nvidia-container-cli --version 2>&1 | head -1', { timeout: 5000 });
|
||||
const { stdout } = await execAsync('nvidia-container-cli --version 2>&1 | head -1', {
|
||||
timeout: 5000,
|
||||
});
|
||||
const match = stdout.match(/version (\d+\.\d+\.\d+)/);
|
||||
return match ? match[1] : undefined;
|
||||
} catch {
|
||||
@@ -156,7 +158,9 @@ export class SystemInfo {
|
||||
*/
|
||||
public async getAvailableDiskSpace(path: string = '/var/lib'): Promise<number> {
|
||||
try {
|
||||
const { stdout } = await execAsync(`df -m "${path}" | tail -1 | awk '{print $4}'`, { timeout: 5000 });
|
||||
const { stdout } = await execAsync(`df -m "${path}" | tail -1 | awk '{print $4}'`, {
|
||||
timeout: 5000,
|
||||
});
|
||||
return parseInt(stdout.trim(), 10) || 0;
|
||||
} catch {
|
||||
return 0;
|
||||
@@ -198,7 +202,11 @@ export class SystemInfo {
|
||||
logger.logBoxLine(`OS: ${info.os}`);
|
||||
logger.logBoxLine(`Kernel: ${info.kernelVersion}`);
|
||||
logger.logBoxLine(`CPU: ${info.cpuModel} (${info.cpuCores} cores)`);
|
||||
logger.logBoxLine(`RAM: ${Math.round(info.ramTotal / 1024)} GB total, ${Math.round(info.ramAvailable / 1024)} GB available`);
|
||||
logger.logBoxLine(
|
||||
`RAM: ${Math.round(info.ramTotal / 1024)} GB total, ${
|
||||
Math.round(info.ramAvailable / 1024)
|
||||
} GB available`,
|
||||
);
|
||||
logger.logBoxLine('');
|
||||
|
||||
if (info.dockerVersion) {
|
||||
|
||||
Reference in New Issue
Block a user