feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

This commit is contained in:
2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
+11 -3
View File
@@ -105,7 +105,9 @@ export class SystemInfo {
*/
private async getNvidiaContainerVersion(): Promise<string | undefined> {
try {
const { stdout } = await execAsync('nvidia-container-cli --version 2>&1 | head -1', { timeout: 5000 });
const { stdout } = await execAsync('nvidia-container-cli --version 2>&1 | head -1', {
timeout: 5000,
});
const match = stdout.match(/version (\d+\.\d+\.\d+)/);
return match ? match[1] : undefined;
} catch {
@@ -156,7 +158,9 @@ export class SystemInfo {
*/
public async getAvailableDiskSpace(path: string = '/var/lib'): Promise<number> {
try {
const { stdout } = await execAsync(`df -m "${path}" | tail -1 | awk '{print $4}'`, { timeout: 5000 });
const { stdout } = await execAsync(`df -m "${path}" | tail -1 | awk '{print $4}'`, {
timeout: 5000,
});
return parseInt(stdout.trim(), 10) || 0;
} catch {
return 0;
@@ -198,7 +202,11 @@ export class SystemInfo {
logger.logBoxLine(`OS: ${info.os}`);
logger.logBoxLine(`Kernel: ${info.kernelVersion}`);
logger.logBoxLine(`CPU: ${info.cpuModel} (${info.cpuCores} cores)`);
logger.logBoxLine(`RAM: ${Math.round(info.ramTotal / 1024)} GB total, ${Math.round(info.ramAvailable / 1024)} GB available`);
logger.logBoxLine(
`RAM: ${Math.round(info.ramTotal / 1024)} GB total, ${
Math.round(info.ramAvailable / 1024)
} GB available`,
);
logger.logBoxLine('');
if (info.dockerVersion) {