/** * Model Loader * * Handles automatic model loading with greenlist validation. */ import type { TContainerType } from '../interfaces/container.ts'; import { logger } from '../logger.ts'; import { ModelRegistry } from './registry.ts'; import { ContainerManager } from '../containers/container-manager.ts'; import { GpuDetector } from '../hardware/gpu-detector.ts'; /** * Model load result */ export interface IModelLoadResult { success: boolean; model: string; container?: string; error?: string; alreadyLoaded?: boolean; } /** * Model loader with greenlist validation */ export class ModelLoader { private registry: ModelRegistry; private containerManager: ContainerManager; private gpuDetector: GpuDetector; private autoPull: boolean; constructor( registry: ModelRegistry, containerManager: ContainerManager, autoPull: boolean = true, ) { this.registry = registry; this.containerManager = containerManager; this.gpuDetector = new GpuDetector(); this.autoPull = autoPull; } /** * Load a model with greenlist validation */ public async loadModel(modelName: string): Promise { logger.info(`Loading model: ${modelName}`); // Step 1: Check if model is already loaded in any container const container = await this.containerManager.findContainerForModel(modelName); if (container) { logger.dim(`Model ${modelName} is already available in container ${container.getConfig().id}`); return { success: true, model: modelName, container: container.getConfig().id, alreadyLoaded: true, }; } // Step 2: Check if model is greenlit const isGreenlit = await this.registry.isModelGreenlit(modelName); if (!isGreenlit) { logger.error(`Model ${modelName} is not in the greenlit list`); logger.info('Only greenlit models can be auto-pulled for security reasons.'); logger.info('Contact your administrator to add this model to the greenlist.'); return { success: false, model: modelName, error: `Model "${modelName}" is not greenlit. Request via admin or add to greenlist.`, }; } // Step 3: Get model info from greenlist const modelInfo = await this.registry.getGreenlitModel(modelName); if (!modelInfo) { return { success: false, model: modelName, error: 'Failed to get model info from greenlist', }; } // Step 4: Check VRAM requirements const gpus = await this.gpuDetector.detectGpus(); const totalVram = gpus.reduce((sum, gpu) => sum + gpu.vram, 0); const totalVramGb = Math.round(totalVram / 1024); if (modelInfo.minVram > totalVramGb) { logger.error(`Insufficient VRAM for model ${modelName}`); logger.info(`Required: ${modelInfo.minVram}GB, Available: ${totalVramGb}GB`); return { success: false, model: modelName, error: `Insufficient VRAM. Required: ${modelInfo.minVram}GB, Available: ${totalVramGb}GB`, }; } // Step 5: Find or create appropriate container const containerType = modelInfo.container; let targetContainer = await this.findAvailableContainer(containerType); if (!targetContainer) { logger.warn(`No ${containerType} container available`); // Could auto-create container here if desired return { success: false, model: modelName, error: `No ${containerType} container available to load model`, }; } // Step 6: Pull the model if auto-pull is enabled if (this.autoPull) { logger.info(`Pulling model ${modelName} to ${containerType} container...`); const pullSuccess = await targetContainer.pullModel(modelName, (progress) => { const percent = progress.percent !== undefined ? ` (${progress.percent}%)` : ''; logger.dim(` ${progress.status}${percent}`); }); if (!pullSuccess) { return { success: false, model: modelName, error: 'Failed to pull model', }; } } logger.success(`Model ${modelName} loaded successfully`); return { success: true, model: modelName, container: targetContainer.getConfig().id, }; } /** * Find an available container of the specified type */ private async findAvailableContainer( containerType: TContainerType, ): Promise { const containers = this.containerManager.getAllContainers(); for (const container of containers) { if (container.type !== containerType) { continue; } const status = await container.getStatus(); if (status.running) { return container; } } // No running container found, try to start one for (const container of containers) { if (container.type !== containerType) { continue; } logger.info(`Starting ${containerType} container: ${container.getConfig().name}`); const started = await container.start(); if (started) { return container; } } return null; } /** * Preload a list of models */ public async preloadModels(modelNames: string[]): Promise> { const results = new Map(); for (const modelName of modelNames) { const result = await this.loadModel(modelName); results.set(modelName, result); if (!result.success) { logger.warn(`Failed to preload model: ${modelName}`); } } return results; } /** * Unload a model from a container */ public async unloadModel(modelName: string): Promise { const container = await this.containerManager.findContainerForModel(modelName); if (!container) { logger.warn(`Model ${modelName} not found in any container`); return false; } return container.removeModel(modelName); } /** * Check if auto-pull is enabled */ public isAutoPullEnabled(): boolean { return this.autoPull; } /** * Enable or disable auto-pull */ public setAutoPull(enabled: boolean): void { this.autoPull = enabled; } /** * Get loading recommendations for available VRAM */ public async getRecommendations(): Promise<{ canLoad: string[]; cannotLoad: string[]; loaded: string[]; }> { const gpus = await this.gpuDetector.detectGpus(); const totalVramGb = Math.round(gpus.reduce((sum, gpu) => sum + gpu.vram, 0) / 1024); const allModels = await this.registry.getAllGreenlitModels(); const availableModels = await this.containerManager.getAllAvailableModels(); const loadedNames = new Set(availableModels.keys()); const canLoad: string[] = []; const cannotLoad: string[] = []; const loaded: string[] = []; for (const model of allModels) { if (loadedNames.has(model.name)) { loaded.push(model.name); } else if (model.minVram <= totalVramGb) { canLoad.push(model.name); } else { cannotLoad.push(model.name); } } return { canLoad, cannotLoad, loaded }; } /** * Print loading status */ public async printStatus(): Promise { const recommendations = await this.getRecommendations(); logger.logBoxTitle('Model Loading Status', 60, 'info'); logger.logBoxLine(`Loaded Models (${recommendations.loaded.length}):`); if (recommendations.loaded.length > 0) { for (const model of recommendations.loaded) { logger.logBoxLine(` - ${model}`); } } else { logger.logBoxLine(' None'); } logger.logBoxLine(''); logger.logBoxLine(`Available to Load (${recommendations.canLoad.length}):`); for (const model of recommendations.canLoad.slice(0, 5)) { logger.logBoxLine(` - ${model}`); } if (recommendations.canLoad.length > 5) { logger.logBoxLine(` ... and ${recommendations.canLoad.length - 5} more`); } logger.logBoxLine(''); logger.logBoxLine(`Insufficient VRAM (${recommendations.cannotLoad.length}):`); for (const model of recommendations.cannotLoad.slice(0, 3)) { const info = await this.registry.getGreenlitModel(model); logger.logBoxLine(` - ${model} (needs ${info?.minVram || '?'}GB)`); } if (recommendations.cannotLoad.length > 3) { logger.logBoxLine(` ... and ${recommendations.cannotLoad.length - 3} more`); } logger.logBoxEnd(); } }