feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
@@ -0,0 +1,192 @@
+import * as fs from 'node:fs/promises';
+import { CLUSTER, PATHS } from '../constants.ts';
+import type { IModelGridConfig } from '../interfaces/config.ts';
+import { logger } from '../logger.ts';
+
+export class ClusterHandler {
+  public async status(): Promise<void> {
+    const response = await this.request('/_cluster/status');
+    if (!response) {
+      return;
+    }
+
+    logger.log(JSON.stringify(response, null, 2));
+  }
+
+  public async nodes(): Promise<void> {
+    const response = await this.request('/_cluster/nodes');
+    if (!response) {
+      return;
+    }
+
+    logger.log(JSON.stringify(response, null, 2));
+  }
+
+  public async models(): Promise<void> {
+    const response = await this.request('/_cluster/status');
+    if (!response || typeof response !== 'object' || !('models' in response)) {
+      return;
+    }
+
+    logger.log(JSON.stringify((response as { models: unknown }).models, null, 2));
+  }
+
+  public async desired(): Promise<void> {
+    const response = await this.request('/_cluster/desired');
+    if (!response) {
+      return;
+    }
+
+    logger.log(JSON.stringify(response, null, 2));
+  }
+
+  public async ensure(model: string): Promise<void> {
+    if (!model) {
+      logger.error('Model ID is required');
+      return;
+    }
+
+    const response = await this.request('/_cluster/models/ensure', {
+      method: 'POST',
+      body: { model },
+    });
+    if (!response) {
+      return;
+    }
+
+    logger.log(JSON.stringify(response, null, 2));
+  }
+
+  public async scale(model: string, desiredReplicas: number): Promise<void> {
+    if (!model || Number.isNaN(desiredReplicas)) {
+      logger.error('Model ID and desired replica count are required');
+      return;
+    }
+
+    const response = await this.request('/_cluster/models/desired', {
+      method: 'POST',
+      body: { model, desiredReplicas },
+    });
+    if (!response) {
+      return;
+    }
+
+    logger.log(JSON.stringify(response, null, 2));
+  }
+
+  public async clear(model: string): Promise<void> {
+    if (!model) {
+      logger.error('Model ID is required');
+      return;
+    }
+
+    const response = await this.request('/_cluster/models/desired/remove', {
+      method: 'POST',
+      body: { model },
+    });
+    if (!response) {
+      return;
+    }
+
+    logger.log(JSON.stringify(response, null, 2));
+  }
+
+  public async cordon(nodeName: string): Promise<void> {
+    await this.updateNodeState('/_cluster/nodes/cordon', nodeName);
+  }
+
+  public async uncordon(nodeName: string): Promise<void> {
+    await this.updateNodeState('/_cluster/nodes/uncordon', nodeName);
+  }
+
+  public async drain(nodeName: string): Promise<void> {
+    await this.updateNodeState('/_cluster/nodes/drain', nodeName);
+  }
+
+  public async activate(nodeName: string): Promise<void> {
+    await this.updateNodeState('/_cluster/nodes/activate', nodeName);
+  }
+
+  private async request(
+    path: string,
+    options: {
+      method?: 'GET' | 'POST';
+      body?: unknown;
+    } = {},
+  ): Promise<unknown | null> {
+    const config = await this.readConfig();
+    if (!config) {
+      return null;
+    }
+
+    const endpoint = this.resolveEndpoint(config);
+    const headers: Record<string, string> = {
+      'Content-Type': 'application/json',
+    };
+
+    if (config.cluster.sharedSecret) {
+      headers[CLUSTER.AUTH_HEADER_NAME] = config.cluster.sharedSecret;
+    }
+
+    try {
+      const response = await fetch(`${endpoint}${path}`, {
+        method: options.method || 'GET',
+        headers,
+        body: options.body ? JSON.stringify(options.body) : undefined,
+      });
+
+      if (!response.ok) {
+        logger.error(`Cluster request failed: ${response.status} ${await response.text()}`);
+        return null;
+      }
+
+      return await response.json();
+    } catch (error) {
+      logger.error(
+        `Cluster request failed: ${error instanceof Error ? error.message : String(error)}`,
+      );
+      return null;
+    }
+  }
+
+  private async readConfig(): Promise<IModelGridConfig | null> {
+    try {
+      return JSON.parse(await fs.readFile(PATHS.CONFIG_FILE, 'utf-8')) as IModelGridConfig;
+    } catch (error) {
+      logger.error(
+        `Failed to read config: ${error instanceof Error ? error.message : String(error)}`,
+      );
+      return null;
+    }
+  }
+
+  private resolveEndpoint(config: IModelGridConfig): string {
+    if (config.cluster.controlPlaneUrl) {
+      return config.cluster.controlPlaneUrl;
+    }
+
+    if (config.cluster.advertiseUrl) {
+      return config.cluster.advertiseUrl;
+    }
+
+    const host = config.api.host === '0.0.0.0' ? '127.0.0.1' : config.api.host;
+    return `http://${host}:${config.api.port}`;
+  }
+
+  private async updateNodeState(path: string, nodeName: string): Promise<void> {
+    if (!nodeName) {
+      logger.error('Node name is required');
+      return;
+    }
+
+    const response = await this.request(path, {
+      method: 'POST',
+      body: { nodeName },
+    });
+    if (!response) {
+      return;
+    }
+
+    logger.log(JSON.stringify(response, null, 2));
+  }
+}
@@ -25,6 +25,26 @@ export class ConfigHandler {
      const configPath = PATHS.CONFIG_FILE;
      const configContent = await fs.readFile(configPath, 'utf-8');
      const config = JSON.parse(configContent) as IModelGridConfig;
+      const modelConfig = {
+        registryUrl: config.models.registryUrl ||
+          (config.models as { greenlistUrl?: string }).greenlistUrl ||
+          'https://list.modelgrid.com/catalog/models.json',
+        autoDeploy: config.models.autoDeploy ??
+          (config.models as { autoPull?: boolean }).autoPull ?? true,
+        defaultEngine: config.models.defaultEngine || 'vllm',
+        autoLoad: config.models.autoLoad || [],
+      };
+      const clusterConfig = config.cluster || {
+        enabled: false,
+        nodeName: 'modelgrid-local',
+        role: 'standalone',
+        bindHost: '0.0.0.0',
+        gossipPort: 7946,
+        sharedSecret: undefined,
+        advertiseUrl: undefined,
+        controlPlaneUrl: undefined,
+        heartbeatIntervalMs: 5000,
+      };

      // Overview
      logger.logBox(
@@ -48,9 +68,7 @@ export class ConfigHandler {
          `Host: ${theme.info(config.api.host)}`,
          `Port: ${theme.highlight(String(config.api.port))}`,
          `API Keys: ${config.api.apiKeys.length} configured`,
-          ...(config.api.rateLimit
-            ? [`Rate Limit: ${config.api.rateLimit} req/min`]
-            : []),
+          ...(config.api.rateLimit ? [`Rate Limit: ${config.api.rateLimit} req/min`] : []),
          '',
          theme.dim('Endpoint:'),
          `  http://${config.api.host}:${config.api.port}/v1/chat/completions`,
@@ -88,12 +106,33 @@ export class ConfigHandler {
      logger.logBox(
        'Models',
        [
-          `Auto Pull: ${config.models.autoPull ? theme.success('Enabled') : theme.dim('Disabled')}`,
-          `Default Container: ${config.models.defaultContainer}`,
-          `Auto Load: ${config.models.autoLoad.length} model(s)`,
+          `Auto Deploy: ${
+            modelConfig.autoDeploy ? theme.success('Enabled') : theme.dim('Disabled')
+          }`,
+          `Default Engine: ${modelConfig.defaultEngine}`,
+          `Auto Load: ${modelConfig.autoLoad.length} model(s)`,
          '',
-          theme.dim('Greenlist URL:'),
-          `  ${config.models.greenlistUrl}`,
+          theme.dim('Registry URL:'),
+          `  ${modelConfig.registryUrl}`,
+        ],
+        70,
+        'default',
+      );
+
+      logger.log('');
+      logger.logBox(
+        'Cluster',
+        [
+          `Enabled: ${clusterConfig.enabled ? theme.success('Yes') : theme.dim('No')}`,
+          `Node: ${clusterConfig.nodeName}`,
+          `Role: ${clusterConfig.role}`,
+          `Bind Host: ${clusterConfig.bindHost}:${clusterConfig.gossipPort}`,
+          `Shared Secret: ${
+            clusterConfig.sharedSecret ? theme.success('Configured') : theme.dim('Not set')
+          }`,
+          `Advertise URL: ${clusterConfig.advertiseUrl || theme.dim('Default loopback')}`,
+          `Control Plane: ${clusterConfig.controlPlaneUrl || theme.dim('Not configured')}`,
+          `Heartbeat: ${clusterConfig.heartbeatIntervalMs}ms`,
        ],
        70,
        'default',
@@ -110,7 +149,7 @@ export class ConfigHandler {
          name: c.name,
          type: c.type,
          image: c.image.length > 40 ? c.image.substring(0, 37) + '...' : c.image,
-          port: c.port,
+          port: String(c.port),
          gpus: c.gpuIds.length > 0 ? c.gpuIds.join(',') : theme.dim('None'),
        }));

@@ -189,11 +228,22 @@ export class ConfigHandler {
      },
      containers: [],
      models: {
-        greenlistUrl: 'https://code.foss.global/modelgrid.com/model_lists/raw/branch/main/greenlit.json',
-        autoPull: true,
-        defaultContainer: 'ollama',
+        registryUrl: 'https://list.modelgrid.com/catalog/models.json',
+        autoDeploy: true,
+        defaultEngine: 'vllm',
        autoLoad: [],
      },
+      cluster: {
+        enabled: false,
+        nodeName: 'modelgrid-local',
+        role: 'standalone',
+        bindHost: '0.0.0.0',
+        gossipPort: 7946,
+        sharedSecret: '',
+        advertiseUrl: 'http://127.0.0.1:8080',
+        heartbeatIntervalMs: 5000,
+        seedNodes: [],
+      },
      checkInterval: 30000,
    };

@@ -1,47 +1,36 @@
 /**
- * Container Handler
- *
- * CLI commands for container management.
+ * Deployment handler for container-backed runtimes.
 */

 import { logger } from '../logger.ts';
 import { theme } from '../colors.ts';
 import { ContainerManager } from '../containers/container-manager.ts';
-import { DockerManager } from '../docker/docker-manager.ts';
-import type { IContainerConfig } from '../interfaces/container.ts';
+import { VllmContainer } from '../containers/vllm.ts';
 import type { ITableColumn } from '../logger.ts';
 import * as helpers from '../helpers/index.ts';

-/**
- * Handler for container-related CLI commands
- */
 export class ContainerHandler {
  private containerManager: ContainerManager;
-  private dockerManager: DockerManager;

  constructor(containerManager: ContainerManager) {
    this.containerManager = containerManager;
-    this.dockerManager = new DockerManager();
  }

-  /**
-   * List all configured containers
-   */
  public async list(): Promise<void> {
    logger.log('');
-    logger.info('Containers');
+    logger.info('Deployments');
    logger.log('');

    const containers = this.containerManager.getAllContainers();

    if (containers.length === 0) {
      logger.logBox(
-        'No Containers',
+        'No Deployments',
        [
-          'No containers are configured.',
+          'No vLLM deployments are configured.',
          '',
-          theme.dim('Add a container with:'),
-          `  ${theme.command('modelgrid container add')}`,
+          theme.dim('Create one with:'),
+          `  ${theme.command('modelgrid run <model-id>')}`,
        ],
        60,
        'warning',
@@ -49,7 +38,7 @@ export class ContainerHandler {
      return;
    }

-    const rows = [];
+    const rows: Record<string, string | number>[] = [];

    for (const container of containers) {
      const status = await container.getStatus();
@@ -57,28 +46,22 @@ export class ContainerHandler {

      rows.push({
        id: config.id,
-        name: config.name,
-        type: this.formatContainerType(container.type),
-        status: status.running
-          ? theme.success('Running')
-          : theme.dim('Stopped'),
-        health: status.running
-          ? this.formatHealth(status.health)
-          : theme.dim('N/A'),
-        port: config.externalPort || config.port,
-        models: status.loadedModels.length,
+        model: config.models[0] || theme.dim('N/A'),
+        engine: this.formatContainerType(container.type),
+        status: status.running ? theme.success('Running') : theme.dim('Stopped'),
+        health: status.running ? this.formatHealth(status.health) : theme.dim('N/A'),
+        port: String(config.externalPort || config.port),
        gpus: config.gpuIds.length > 0 ? config.gpuIds.join(',') : theme.dim('None'),
      });
    }

    const columns: ITableColumn[] = [
      { header: 'ID', key: 'id', align: 'left' },
-      { header: 'Name', key: 'name', align: 'left', color: theme.highlight },
-      { header: 'Type', key: 'type', align: 'left' },
+      { header: 'Model', key: 'model', align: 'left', color: theme.highlight },
+      { header: 'Engine', key: 'engine', align: 'left' },
      { header: 'Status', key: 'status', align: 'left' },
      { header: 'Health', key: 'health', align: 'left' },
      { header: 'Port', key: 'port', align: 'right', color: theme.info },
-      { header: 'Models', key: 'models', align: 'right' },
      { header: 'GPUs', key: 'gpus', align: 'left' },
    ];

@@ -86,94 +69,70 @@ export class ContainerHandler {
    logger.log('');
  }

-  /**
-   * Add a new container interactively
-   */
  public async add(): Promise<void> {
-    const { prompt, close, select } = await helpers.createPrompt();
+    const { prompt, close } = await helpers.createPrompt();

    try {
      logger.log('');
-      logger.highlight('Add Container');
-      logger.dim('Configure a new AI model container');
+      logger.highlight('Create vLLM Deployment');
+      logger.dim('Provision a single-model vLLM runtime');
      logger.log('');

-      // Select container type
-      const typeIndex = await select('Select container type:', [
-        'Ollama - Easy to use, good for local models',
-        'vLLM - High performance, OpenAI compatible',
-        'TGI - HuggingFace Text Generation Inference',
-      ]);
-
-      const types = ['ollama', 'vllm', 'tgi'] as const;
-      const containerType = types[typeIndex];
-
-      // Container name
-      const name = await prompt('Container name: ');
-      if (!name.trim()) {
-        logger.error('Container name is required');
+      const modelName = await prompt('Model ID or Hugging Face repo: ');
+      if (!modelName.trim()) {
+        logger.error('Model ID is required');
        return;
      }

-      // Generate ID from name
-      const id = name.toLowerCase().replace(/[^a-z0-9-]/g, '-');
+      const name = await prompt(
+        `Deployment name [${modelName.split('/').pop() || 'deployment'}]: `,
+      );
+      const deploymentName = name.trim() || modelName.split('/').pop() || 'deployment';
+      const deploymentId = deploymentName.toLowerCase().replace(/[^a-z0-9-]/g, '-');

-      // Port
-      const defaultPorts = { ollama: 11434, vllm: 8000, tgi: 8080 };
-      const portStr = await prompt(`Port [${defaultPorts[containerType]}]: `);
-      const port = portStr ? parseInt(portStr, 10) : defaultPorts[containerType];
+      const portStr = await prompt('Port [8000]: ');
+      const port = portStr ? parseInt(portStr, 10) : 8000;

-      // GPU assignment
-      const gpuStr = await prompt('GPU IDs (comma-separated, or "all", or empty for none): ');
+      const gpuStr = await prompt('GPU IDs (comma-separated, or "all"): ');
      let gpuIds: string[] = [];

      if (gpuStr.trim().toLowerCase() === 'all') {
        const { GpuDetector } = await import('../hardware/gpu-detector.ts');
        const detector = new GpuDetector();
        const gpus = await detector.detectGpus();
-        gpuIds = gpus.map((g) => g.id);
+        gpuIds = gpus.map((gpu) => gpu.id);
      } else if (gpuStr.trim()) {
-        gpuIds = gpuStr.split(',').map((s) => s.trim());
+        gpuIds = gpuStr.split(',').map((value) => value.trim());
      }

-      // Build config
-      const config: IContainerConfig = {
-        id,
-        type: containerType,
-        name,
-        image: this.getDefaultImage(containerType),
+      const config = VllmContainer.createConfig(deploymentId, deploymentName, modelName, gpuIds, {
        port,
-        gpuIds,
-        models: [],
-      };
+      });
+      config.models = [modelName];

-      // Add container
-      await this.containerManager.addContainer(config);
+      this.containerManager.addContainer(config);

      logger.log('');
-      logger.success(`Container "${name}" added successfully`);
+      logger.success(`Deployment "${deploymentName}" added successfully`);
      logger.log('');
-      logger.dim('Start the container with:');
-      logger.log(`  ${theme.command(`modelgrid container start ${id}`)}`);
+      logger.dim('Start it with:');
+      logger.log(`  ${theme.command(`modelgrid container start ${deploymentId}`)}`);
      logger.log('');
    } finally {
      close();
    }
  }

-  /**
-   * Remove a container
-   */
  public async remove(containerId: string): Promise<void> {
    if (!containerId) {
-      logger.error('Container ID is required');
+      logger.error('Deployment ID is required');
      return;
    }

    const { prompt, close } = await helpers.createPrompt();

    try {
-      const confirm = await prompt(`Remove container "${containerId}"? (y/N): `);
+      const confirm = await prompt(`Remove deployment "${containerId}"? (y/N): `);

      if (confirm.toLowerCase() !== 'y') {
        logger.log('Aborted');
@@ -183,83 +142,72 @@ export class ContainerHandler {
      const success = await this.containerManager.removeContainer(containerId);

      if (success) {
-        logger.success(`Container "${containerId}" removed`);
+        logger.success(`Deployment "${containerId}" removed`);
      } else {
-        logger.error(`Failed to remove container "${containerId}"`);
+        logger.error(`Failed to remove deployment "${containerId}"`);
      }
    } finally {
      close();
    }
  }

-  /**
-   * Start a container
-   */
  public async start(containerId?: string): Promise<void> {
    if (containerId) {
-      // Start specific container
      const container = this.containerManager.getContainer(containerId);
      if (!container) {
-        logger.error(`Container "${containerId}" not found`);
+        logger.error(`Deployment "${containerId}" not found`);
        return;
      }

-      logger.info(`Starting container "${containerId}"...`);
+      logger.info(`Starting deployment "${containerId}"...`);
      const success = await container.start();

      if (success) {
-        logger.success(`Container "${containerId}" started`);
+        logger.success(`Deployment "${containerId}" started`);
      } else {
-        logger.error(`Failed to start container "${containerId}"`);
+        logger.error(`Failed to start deployment "${containerId}"`);
      }
-    } else {
-      // Start all containers
-      logger.info('Starting all containers...');
-      await this.containerManager.startAll();
-      logger.success('All containers started');
+      return;
    }
+
+    logger.info('Starting all deployments...');
+    await this.containerManager.startAll();
+    logger.success('All deployments started');
  }

-  /**
-   * Stop a container
-   */
  public async stop(containerId?: string): Promise<void> {
    if (containerId) {
-      // Stop specific container
      const container = this.containerManager.getContainer(containerId);
      if (!container) {
-        logger.error(`Container "${containerId}" not found`);
+        logger.error(`Deployment "${containerId}" not found`);
        return;
      }

-      logger.info(`Stopping container "${containerId}"...`);
+      logger.info(`Stopping deployment "${containerId}"...`);
      const success = await container.stop();

      if (success) {
-        logger.success(`Container "${containerId}" stopped`);
+        logger.success(`Deployment "${containerId}" stopped`);
      } else {
-        logger.error(`Failed to stop container "${containerId}"`);
+        logger.error(`Failed to stop deployment "${containerId}"`);
      }
-    } else {
-      // Stop all containers
-      logger.info('Stopping all containers...');
-      await this.containerManager.stopAll();
-      logger.success('All containers stopped');
+      return;
    }
+
+    logger.info('Stopping all deployments...');
+    await this.containerManager.stopAll();
+    logger.success('All deployments stopped');
  }

-  /**
-   * Show container logs
-   */
  public async logs(containerId: string, lines: number = 100): Promise<void> {
    if (!containerId) {
-      logger.error('Container ID is required');
+      logger.error('Deployment ID is required');
      return;
    }

    const container = this.containerManager.getContainer(containerId);
    if (!container) {
-      logger.error(`Container "${containerId}" not found`);
+      logger.error(`Deployment "${containerId}" not found`);
      return;
    }

@@ -267,13 +215,8 @@ export class ContainerHandler {
    console.log(logs);
  }

-  /**
-   * Format container type for display
-   */
  private formatContainerType(type: string): string {
    switch (type) {
-      case 'ollama':
-        return theme.containerOllama('Ollama');
      case 'vllm':
        return theme.containerVllm('vLLM');
      case 'tgi':
@@ -283,9 +226,6 @@ export class ContainerHandler {
    }
  }

-  /**
-   * Format health status
-   */
  private formatHealth(health: string): string {
    switch (health) {
      case 'healthy':
@@ -298,20 +238,4 @@ export class ContainerHandler {
        return theme.dim(health);
    }
  }
-
-  /**
-   * Get default image for container type
-   */
-  private getDefaultImage(type: string): string {
-    switch (type) {
-      case 'ollama':
-        return 'ollama/ollama:latest';
-      case 'vllm':
-        return 'vllm/vllm-openai:latest';
-      case 'tgi':
-        return 'ghcr.io/huggingface/text-generation-inference:latest';
-      default:
-        return '';
-    }
-  }
 }
@@ -86,22 +86,30 @@ export class GpuHandler {
    logger.info('GPU Status');
    logger.log('');

-    const gpuStatus = await this.gpuDetector.getGpuStatus();
+    const gpuInfo = await this.gpuDetector.detectGpus();
+    const gpuStatus = await this.gpuDetector.getAllGpuStatus();

-    if (gpuStatus.length === 0) {
+    if (gpuStatus.size === 0) {
      logger.warn('No GPUs detected');
      return;
    }

-    for (const gpu of gpuStatus) {
-      const utilizationBar = this.createProgressBar(gpu.utilization, 30);
-      const memoryBar = this.createProgressBar(gpu.memoryUsed / gpu.memoryTotal * 100, 30);
+    for (const [gpuId, status] of gpuStatus) {
+      const info = gpuInfo.find((gpu) => gpu.id === gpuId);
+      const utilizationBar = this.createProgressBar(status.utilization, 30);
+      const memoryBar = this.createProgressBar(status.memoryUsed / status.memoryTotal * 100, 30);

-      logger.logBoxTitle(`GPU ${gpu.id}: ${gpu.name}`, 70, 'info');
-      logger.logBoxLine(`Utilization: ${utilizationBar} ${gpu.utilization.toFixed(1)}%`);
-      logger.logBoxLine(`Memory:      ${memoryBar} ${Math.round(gpu.memoryUsed)}/${Math.round(gpu.memoryTotal)} MB`);
-      logger.logBoxLine(`Temperature: ${this.formatTemperature(gpu.temperature)}`);
-      logger.logBoxLine(`Power:       ${gpu.powerDraw.toFixed(0)}W / ${gpu.powerLimit.toFixed(0)}W`);
+      logger.logBoxTitle(`GPU ${status.id}: ${info?.model || 'Unknown GPU'}`, 70, 'info');
+      logger.logBoxLine(`Utilization: ${utilizationBar} ${status.utilization.toFixed(1)}%`);
+      logger.logBoxLine(
+        `Memory:      ${memoryBar} ${Math.round(status.memoryUsed)}/${
+          Math.round(status.memoryTotal)
+        } MB`,
+      );
+      logger.logBoxLine(`Temperature: ${this.formatTemperature(status.temperature)}`);
+      logger.logBoxLine(
+        `Power:       ${status.powerUsage.toFixed(0)}W / ${status.powerLimit.toFixed(0)}W`,
+      );
      logger.logBoxEnd();
      logger.log('');
    }
@@ -138,13 +146,23 @@ export class GpuHandler {

      const status = await driver.getStatus();

-      logger.logBoxTitle(`${this.formatVendor(vendor)} Driver`, 60, status.installed ? 'success' : 'warning');
-      logger.logBoxLine(`Installed: ${status.installed ? theme.success('Yes') : theme.error('No')}`);
+      logger.logBoxTitle(
+        `${this.formatVendor(vendor)} Driver`,
+        60,
+        status.installed ? 'success' : 'warning',
+      );
+      logger.logBoxLine(
+        `Installed: ${status.installed ? theme.success('Yes') : theme.error('No')}`,
+      );

      if (status.installed) {
        logger.logBoxLine(`Version: ${status.version || 'Unknown'}`);
-        logger.logBoxLine(`Runtime: ${status.runtimeVersion || 'Unknown'}`);
-        logger.logBoxLine(`Container Support: ${status.containerSupport ? theme.success('Yes') : theme.warning('No')}`);
+        logger.logBoxLine(`Runtime: ${status.containerRuntimeVersion || 'Unknown'}`);
+        logger.logBoxLine(
+          `Container Support: ${
+            status.containerSupport ? theme.success('Yes') : theme.warning('No')
+          }`,
+        );
      } else {
        logger.logBoxLine('');
        logger.logBoxLine(theme.dim('Run `modelgrid gpu install` to install drivers'));
@@ -183,14 +201,18 @@ export class GpuHandler {

      logger.info(`Installing ${this.formatVendor(vendor)} drivers...`);

-      const success = await driver.install();
+      const success = await driver.install({
+        installToolkit: true,
+        installContainerSupport: true,
+        nonInteractive: false,
+      });

      if (success) {
        logger.success(`${this.formatVendor(vendor)} drivers installed successfully`);

        // Setup container support
        logger.info('Setting up container support...');
-        const containerSuccess = await driver.setupContainer();
+        const containerSuccess = await driver.installContainerSupport();

        if (containerSuccess) {
          logger.success('Container support configured');
@@ -1,55 +1,48 @@
 /**
- * Model Handler
- *
- * CLI commands for model management.
+ * Model handler for catalog-backed vLLM deployments.
 */

 import { logger } from '../logger.ts';
 import { theme } from '../colors.ts';
+import { ClusterCoordinator } from '../cluster/coordinator.ts';
 import { ContainerManager } from '../containers/container-manager.ts';
 import { ModelRegistry } from '../models/registry.ts';
 import { ModelLoader } from '../models/loader.ts';
 import type { ITableColumn } from '../logger.ts';

-/**
- * Handler for model-related CLI commands
- */
 export class ModelHandler {
  private containerManager: ContainerManager;
+  private clusterCoordinator: ClusterCoordinator;
  private modelRegistry: ModelRegistry;
  private modelLoader: ModelLoader;

  constructor(
    containerManager: ContainerManager,
+    clusterCoordinator: ClusterCoordinator,
    modelRegistry: ModelRegistry,
  ) {
    this.containerManager = containerManager;
+    this.clusterCoordinator = clusterCoordinator;
    this.modelRegistry = modelRegistry;
    this.modelLoader = new ModelLoader(modelRegistry, containerManager);
  }

-  /**
-   * List all available models
-   */
  public async list(): Promise<void> {
    logger.log('');
-    logger.info('Models');
+    logger.info('Model Catalog');
    logger.log('');

-    // Get loaded models from containers
    const loadedModels = await this.containerManager.getAllAvailableModels();
+    const catalogModels = await this.modelRegistry.getAllModels();

-    // Get greenlit models
-    const greenlitModels = await this.modelRegistry.getAllGreenlitModels();
-
-    if (loadedModels.size === 0 && greenlitModels.length === 0) {
+    if (loadedModels.size === 0 && catalogModels.length === 0) {
      logger.logBox(
        'No Models',
        [
-          'No models are loaded or greenlit.',
+          'The local registry cache is empty.',
          '',
-          theme.dim('Pull a model with:'),
-          `  ${theme.command('modelgrid model pull <name>')}`,
+          theme.dim('Refresh with:'),
+          `  ${theme.command('modelgrid model refresh')}`,
        ],
        60,
        'warning',
@@ -57,56 +50,51 @@ export class ModelHandler {
      return;
    }

-    // Show loaded models
    if (loadedModels.size > 0) {
-      logger.info(`Loaded Models (${loadedModels.size}):`);
+      logger.info(`Running Deployments (${loadedModels.size}):`);
      logger.log('');

-      const rows = [];
-      for (const [name, info] of loadedModels) {
+      const rows: Record<string, string | number>[] = [];
+      for (const [name, endpoints] of loadedModels) {
+        const primaryEndpoint = endpoints[0];
        rows.push({
-          name,
-          container: info.container,
-          size: info.size ? this.formatSize(info.size) : theme.dim('N/A'),
-          format: info.format || theme.dim('N/A'),
-          modified: info.modifiedAt
-            ? new Date(info.modifiedAt).toLocaleDateString()
-            : theme.dim('N/A'),
+          model: name,
+          engine: primaryEndpoint?.type || 'vllm',
+          replicas: String(endpoints.length),
+          endpoint: primaryEndpoint?.url || theme.dim('N/A'),
        });
      }

      const columns: ITableColumn[] = [
-        { header: 'Name', key: 'name', align: 'left', color: theme.highlight },
-        { header: 'Container', key: 'container', align: 'left' },
-        { header: 'Size', key: 'size', align: 'right', color: theme.info },
-        { header: 'Format', key: 'format', align: 'left' },
-        { header: 'Modified', key: 'modified', align: 'left', color: theme.dim },
+        { header: 'Model', key: 'model', align: 'left', color: theme.highlight },
+        { header: 'Engine', key: 'engine', align: 'left' },
+        { header: 'Replicas', key: 'replicas', align: 'right', color: theme.info },
+        { header: 'Endpoint', key: 'endpoint', align: 'left', color: theme.dim },
      ];

      logger.logTable(columns, rows);
      logger.log('');
    }

-    // Show greenlit models (not yet loaded)
    const loadedNames = new Set(loadedModels.keys());
-    const unloadedGreenlit = greenlitModels.filter((m) => !loadedNames.has(m.name));
+    const available = catalogModels.filter((model) => !loadedNames.has(model.id));

-    if (unloadedGreenlit.length > 0) {
-      logger.info(`Available to Pull (${unloadedGreenlit.length}):`);
+    if (available.length > 0) {
+      logger.info(`Available To Deploy (${available.length}):`);
      logger.log('');

-      const rows = unloadedGreenlit.map((m) => ({
-        name: m.name,
-        container: m.container,
-        vram: `${m.minVram} GB`,
-        tags: m.tags?.join(', ') || theme.dim('None'),
+      const rows: Record<string, string | number>[] = available.map((model) => ({
+        model: model.id,
+        family: model.metadata?.family || theme.dim('N/A'),
+        vram: `${model.requirements.minVramGb} GB`,
+        capabilities: this.formatCapabilities(model.capabilities),
      }));

      const columns: ITableColumn[] = [
-        { header: 'Name', key: 'name', align: 'left' },
-        { header: 'Container', key: 'container', align: 'left' },
+        { header: 'Model', key: 'model', align: 'left' },
+        { header: 'Family', key: 'family', align: 'left' },
        { header: 'Min VRAM', key: 'vram', align: 'right', color: theme.info },
-        { header: 'Tags', key: 'tags', align: 'left', color: theme.dim },
+        { header: 'Capabilities', key: 'capabilities', align: 'left', color: theme.dim },
      ];

      logger.logTable(columns, rows);
@@ -114,47 +102,42 @@ export class ModelHandler {
    }
  }

-  /**
-   * Pull a model
-   */
  public async pull(modelName: string): Promise<void> {
    if (!modelName) {
-      logger.error('Model name is required');
+      logger.error('Model ID is required');
      return;
    }

    logger.log('');
-    logger.info(`Pulling model: ${modelName}`);
+    logger.info(`Deploying model: ${modelName}`);
    logger.log('');

-    const result = await this.modelLoader.loadModel(modelName);
+    const result = await this.clusterCoordinator.ensureModelViaControlPlane(modelName);

-    if (result.success) {
-      if (result.alreadyLoaded) {
-        logger.success(`Model "${modelName}" is already loaded`);
+    if (result) {
+      if (result.created) {
+        logger.success(`Model "${result.model}" deployed successfully`);
      } else {
-        logger.success(`Model "${modelName}" pulled successfully`);
-      }
-      if (result.container) {
-        logger.dim(`Container: ${result.container}`);
+        logger.success(`Model "${result.model}" is already available`);
      }
+      logger.dim(`Node: ${result.location.nodeName}`);
+      logger.dim(`Endpoint: ${result.location.endpoint}`);
    } else {
-      logger.error(`Failed to pull model: ${result.error}`);
+      logger.error(`Failed to deploy model: could not schedule ${modelName}`);
    }

    logger.log('');
  }

-  /**
-   * Remove a model
-   */
  public async remove(modelName: string): Promise<void> {
    if (!modelName) {
-      logger.error('Model name is required');
+      logger.error('Model ID is required');
      return;
    }

-    logger.info(`Removing model: ${modelName}`);
+    logger.info(`Removing deployment for model: ${modelName}`);
+
+    await this.clusterCoordinator.clearDesiredDeployment(modelName);

    const success = await this.modelLoader.unloadModel(modelName);

@@ -165,38 +148,27 @@ export class ModelHandler {
    }
  }

-  /**
-   * Show model loading status and recommendations
-   */
  public async status(): Promise<void> {
    logger.log('');
    await this.modelLoader.printStatus();
  }

-  /**
-   * Refresh greenlist cache
-   */
  public async refresh(): Promise<void> {
-    logger.info('Refreshing greenlist...');
-
-    await this.modelRegistry.refreshGreenlist();
-
-    logger.success('Greenlist refreshed');
+    logger.info('Refreshing model catalog...');
+    await this.modelRegistry.fetchCatalog(true);
+    logger.success('Model catalog refreshed');
  }

-  /**
-   * Format file size
-   */
-  private formatSize(bytes: number): string {
-    const units = ['B', 'KB', 'MB', 'GB', 'TB'];
-    let size = bytes;
-    let unitIndex = 0;
+  private formatCapabilities(capabilities: {
+    chat?: boolean;
+    completions?: boolean;
+    embeddings?: boolean;
+    tools?: boolean;
+  }): string {
+    const enabled = Object.entries(capabilities)
+      .filter(([, value]) => value)
+      .map(([key]) => key);

-    while (size >= 1024 && unitIndex < units.length - 1) {
-      size /= 1024;
-      unitIndex++;
-    }
-
-    return `${size.toFixed(1)} ${units[unitIndex]}`;
+    return enabled.length > 0 ? enabled.join(', ') : theme.dim('none');
  }
 }
@@ -27,7 +27,9 @@ export class ServiceHandler {
  public async enable(): Promise<void> {
    this.checkRootAccess('This command must be run as root.');
    await this.modelgrid.getSystemd().install();
-    logger.log('ModelGrid service has been installed. Use "modelgrid service start" to start the service.');
+    logger.log(
+      'ModelGrid service has been installed. Use "modelgrid service start" to start the service.',
+    );
  }

  /**
@@ -41,7 +43,9 @@ export class ServiceHandler {
      }
      await this.modelgrid.getDaemon().start();
    } catch (error) {
-      logger.error(`Daemon start failed: ${error instanceof Error ? error.message : String(error)}`);
+      logger.error(
+        `Daemon start failed: ${error instanceof Error ? error.message : String(error)}`,
+      );
      process.exit(1);
    }
  }
@@ -127,13 +131,18 @@ export class ServiceHandler {

      try {
        const currentVersion = this.modelgrid.getVersion();
-        const apiUrl = 'https://code.foss.global/api/v1/repos/modelgrid.com/modelgrid/releases/latest';
+        const apiUrl =
+          'https://code.foss.global/api/v1/repos/modelgrid.com/modelgrid/releases/latest';
        const response = execSync(`curl -sSL ${apiUrl}`).toString();
        const release = JSON.parse(response);
        const latestVersion = release.tag_name;

-        const normalizedCurrent = currentVersion.startsWith('v') ? currentVersion : `v${currentVersion}`;
-        const normalizedLatest = latestVersion.startsWith('v') ? latestVersion : `v${latestVersion}`;
+        const normalizedCurrent = currentVersion.startsWith('v')
+          ? currentVersion
+          : `v${currentVersion}`;
+        const normalizedLatest = latestVersion.startsWith('v')
+          ? latestVersion
+          : `v${latestVersion}`;

        logger.dim(`Current version: ${normalizedCurrent}`);
        logger.dim(`Latest version:  ${normalizedLatest}`);
@@ -149,7 +158,8 @@ export class ServiceHandler {
        logger.dim('Downloading and installing...');
        console.log('');

-        const installUrl = 'https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh';
+        const installUrl =
+          'https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh';

        execSync(`curl -sSL ${installUrl} | bash`, {
          stdio: 'inherit',