feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

This commit is contained in:
2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
+38 -16
View File
@@ -86,22 +86,30 @@ export class GpuHandler {
logger.info('GPU Status');
logger.log('');
const gpuStatus = await this.gpuDetector.getGpuStatus();
const gpuInfo = await this.gpuDetector.detectGpus();
const gpuStatus = await this.gpuDetector.getAllGpuStatus();
if (gpuStatus.length === 0) {
if (gpuStatus.size === 0) {
logger.warn('No GPUs detected');
return;
}
for (const gpu of gpuStatus) {
const utilizationBar = this.createProgressBar(gpu.utilization, 30);
const memoryBar = this.createProgressBar(gpu.memoryUsed / gpu.memoryTotal * 100, 30);
for (const [gpuId, status] of gpuStatus) {
const info = gpuInfo.find((gpu) => gpu.id === gpuId);
const utilizationBar = this.createProgressBar(status.utilization, 30);
const memoryBar = this.createProgressBar(status.memoryUsed / status.memoryTotal * 100, 30);
logger.logBoxTitle(`GPU ${gpu.id}: ${gpu.name}`, 70, 'info');
logger.logBoxLine(`Utilization: ${utilizationBar} ${gpu.utilization.toFixed(1)}%`);
logger.logBoxLine(`Memory: ${memoryBar} ${Math.round(gpu.memoryUsed)}/${Math.round(gpu.memoryTotal)} MB`);
logger.logBoxLine(`Temperature: ${this.formatTemperature(gpu.temperature)}`);
logger.logBoxLine(`Power: ${gpu.powerDraw.toFixed(0)}W / ${gpu.powerLimit.toFixed(0)}W`);
logger.logBoxTitle(`GPU ${status.id}: ${info?.model || 'Unknown GPU'}`, 70, 'info');
logger.logBoxLine(`Utilization: ${utilizationBar} ${status.utilization.toFixed(1)}%`);
logger.logBoxLine(
`Memory: ${memoryBar} ${Math.round(status.memoryUsed)}/${
Math.round(status.memoryTotal)
} MB`,
);
logger.logBoxLine(`Temperature: ${this.formatTemperature(status.temperature)}`);
logger.logBoxLine(
`Power: ${status.powerUsage.toFixed(0)}W / ${status.powerLimit.toFixed(0)}W`,
);
logger.logBoxEnd();
logger.log('');
}
@@ -138,13 +146,23 @@ export class GpuHandler {
const status = await driver.getStatus();
logger.logBoxTitle(`${this.formatVendor(vendor)} Driver`, 60, status.installed ? 'success' : 'warning');
logger.logBoxLine(`Installed: ${status.installed ? theme.success('Yes') : theme.error('No')}`);
logger.logBoxTitle(
`${this.formatVendor(vendor)} Driver`,
60,
status.installed ? 'success' : 'warning',
);
logger.logBoxLine(
`Installed: ${status.installed ? theme.success('Yes') : theme.error('No')}`,
);
if (status.installed) {
logger.logBoxLine(`Version: ${status.version || 'Unknown'}`);
logger.logBoxLine(`Runtime: ${status.runtimeVersion || 'Unknown'}`);
logger.logBoxLine(`Container Support: ${status.containerSupport ? theme.success('Yes') : theme.warning('No')}`);
logger.logBoxLine(`Runtime: ${status.containerRuntimeVersion || 'Unknown'}`);
logger.logBoxLine(
`Container Support: ${
status.containerSupport ? theme.success('Yes') : theme.warning('No')
}`,
);
} else {
logger.logBoxLine('');
logger.logBoxLine(theme.dim('Run `modelgrid gpu install` to install drivers'));
@@ -183,14 +201,18 @@ export class GpuHandler {
logger.info(`Installing ${this.formatVendor(vendor)} drivers...`);
const success = await driver.install();
const success = await driver.install({
installToolkit: true,
installContainerSupport: true,
nonInteractive: false,
});
if (success) {
logger.success(`${this.formatVendor(vendor)} drivers installed successfully`);
// Setup container support
logger.info('Setting up container support...');
const containerSuccess = await driver.setupContainer();
const containerSuccess = await driver.installContainerSupport();
if (containerSuccess) {
logger.success('Container support configured');