/** * Model handler for catalog-backed vLLM deployments. */ import { logger } from '../logger.ts'; import { theme } from '../colors.ts'; import { ClusterCoordinator } from '../cluster/coordinator.ts'; import { ContainerManager } from '../containers/container-manager.ts'; import { ModelRegistry } from '../models/registry.ts'; import { ModelLoader } from '../models/loader.ts'; import type { ITableColumn } from '../logger.ts'; export class ModelHandler { private containerManager: ContainerManager; private clusterCoordinator: ClusterCoordinator; private modelRegistry: ModelRegistry; private modelLoader: ModelLoader; constructor( containerManager: ContainerManager, clusterCoordinator: ClusterCoordinator, modelRegistry: ModelRegistry, ) { this.containerManager = containerManager; this.clusterCoordinator = clusterCoordinator; this.modelRegistry = modelRegistry; this.modelLoader = new ModelLoader(modelRegistry, containerManager); } public async list(): Promise { logger.log(''); logger.info('Model Catalog'); logger.log(''); const loadedModels = await this.containerManager.getAllAvailableModels(); const catalogModels = await this.modelRegistry.getAllModels(); if (loadedModels.size === 0 && catalogModels.length === 0) { logger.logBox( 'No Models', [ 'The local registry cache is empty.', '', theme.dim('Refresh with:'), ` ${theme.command('modelgrid model refresh')}`, ], 60, 'warning', ); return; } if (loadedModels.size > 0) { logger.info(`Running Deployments (${loadedModels.size}):`); logger.log(''); const rows: Record[] = []; for (const [name, endpoints] of loadedModels) { const primaryEndpoint = endpoints[0]; rows.push({ model: name, engine: primaryEndpoint?.type || 'vllm', replicas: String(endpoints.length), endpoint: primaryEndpoint?.url || theme.dim('N/A'), }); } const columns: ITableColumn[] = [ { header: 'Model', key: 'model', align: 'left', color: theme.highlight }, { header: 'Engine', key: 'engine', align: 'left' }, { header: 'Replicas', key: 'replicas', align: 'right', color: theme.info }, { header: 'Endpoint', key: 'endpoint', align: 'left', color: theme.dim }, ]; logger.logTable(columns, rows); logger.log(''); } const loadedNames = new Set(loadedModels.keys()); const available = catalogModels.filter((model) => !loadedNames.has(model.id)); if (available.length > 0) { logger.info(`Available To Deploy (${available.length}):`); logger.log(''); const rows: Record[] = available.map((model) => ({ model: model.id, family: model.metadata?.family || theme.dim('N/A'), vram: `${model.requirements.minVramGb} GB`, capabilities: this.formatCapabilities(model.capabilities), })); const columns: ITableColumn[] = [ { header: 'Model', key: 'model', align: 'left' }, { header: 'Family', key: 'family', align: 'left' }, { header: 'Min VRAM', key: 'vram', align: 'right', color: theme.info }, { header: 'Capabilities', key: 'capabilities', align: 'left', color: theme.dim }, ]; logger.logTable(columns, rows); logger.log(''); } } public async pull(modelName: string): Promise { if (!modelName) { logger.error('Model ID is required'); return; } logger.log(''); logger.info(`Deploying model: ${modelName}`); logger.log(''); const result = await this.clusterCoordinator.ensureModelViaControlPlane(modelName); if (result) { if (result.created) { logger.success(`Model "${result.model}" deployed successfully`); } else { logger.success(`Model "${result.model}" is already available`); } logger.dim(`Node: ${result.location.nodeName}`); logger.dim(`Endpoint: ${result.location.endpoint}`); } else { logger.error(`Failed to deploy model: could not schedule ${modelName}`); } logger.log(''); } public async remove(modelName: string): Promise { if (!modelName) { logger.error('Model ID is required'); return; } logger.info(`Removing deployment for model: ${modelName}`); await this.clusterCoordinator.clearDesiredDeployment(modelName); const success = await this.modelLoader.unloadModel(modelName); if (success) { logger.success(`Model "${modelName}" removed`); } else { logger.error(`Failed to remove model "${modelName}"`); } } public async status(): Promise { logger.log(''); await this.modelLoader.printStatus(); } public async refresh(): Promise { logger.info('Refreshing model catalog...'); await this.modelRegistry.fetchCatalog(true); logger.success('Model catalog refreshed'); } private formatCapabilities(capabilities: { chat?: boolean; completions?: boolean; embeddings?: boolean; tools?: boolean; }): string { const enabled = Object.entries(capabilities) .filter(([, value]) => value) .map(([key]) => key); return enabled.length > 0 ? enabled.join(', ') : theme.dim('none'); } }