modelgrid/ts/models/loader.ts

/**
 * Model Loader
 *
 * Handles automatic model loading with greenlist validation.
 */

import type { TContainerType } from '../interfaces/container.ts';
import { logger } from '../logger.ts';
import { ModelRegistry } from './registry.ts';
import { ContainerManager } from '../containers/container-manager.ts';
import { GpuDetector } from '../hardware/gpu-detector.ts';

/**
 * Model load result
 */
export interface IModelLoadResult {
  success: boolean;
  model: string;
  container?: string;
  error?: string;
  alreadyLoaded?: boolean;
}

/**
 * Model loader with greenlist validation
 */
export class ModelLoader {
  private registry: ModelRegistry;
  private containerManager: ContainerManager;
  private gpuDetector: GpuDetector;
  private autoPull: boolean;

  constructor(
    registry: ModelRegistry,
    containerManager: ContainerManager,
    autoPull: boolean = true,
  ) {
    this.registry = registry;
    this.containerManager = containerManager;
    this.gpuDetector = new GpuDetector();
    this.autoPull = autoPull;
  }

  /**
   * Load a model with greenlist validation
   */
  public async loadModel(modelName: string): Promise<IModelLoadResult> {
    logger.info(`Loading model: ${modelName}`);

    // Step 1: Check if model is already loaded in any container
    const container = await this.containerManager.findContainerForModel(modelName);
    if (container) {
      logger.dim(`Model ${modelName} is already available in container ${container.getConfig().id}`);
      return {
        success: true,
        model: modelName,
        container: container.getConfig().id,
        alreadyLoaded: true,
      };
    }

    // Step 2: Check if model is greenlit
    const isGreenlit = await this.registry.isModelGreenlit(modelName);
    if (!isGreenlit) {
      logger.error(`Model ${modelName} is not in the greenlit list`);
      logger.info('Only greenlit models can be auto-pulled for security reasons.');
      logger.info('Contact your administrator to add this model to the greenlist.');
      return {
        success: false,
        model: modelName,
        error: `Model "${modelName}" is not greenlit. Request via admin or add to greenlist.`,
      };
    }

    // Step 3: Get model info from greenlist
    const modelInfo = await this.registry.getGreenlitModel(modelName);
    if (!modelInfo) {
      return {
        success: false,
        model: modelName,
        error: 'Failed to get model info from greenlist',
      };
    }

    // Step 4: Check VRAM requirements
    const gpus = await this.gpuDetector.detectGpus();
    const totalVram = gpus.reduce((sum, gpu) => sum + gpu.vram, 0);
    const totalVramGb = Math.round(totalVram / 1024);

    if (modelInfo.minVram > totalVramGb) {
      logger.error(`Insufficient VRAM for model ${modelName}`);
      logger.info(`Required: ${modelInfo.minVram}GB, Available: ${totalVramGb}GB`);
      return {
        success: false,
        model: modelName,
        error: `Insufficient VRAM. Required: ${modelInfo.minVram}GB, Available: ${totalVramGb}GB`,
      };
    }

    // Step 5: Find or create appropriate container
    const containerType = modelInfo.container;
    let targetContainer = await this.findAvailableContainer(containerType);

    if (!targetContainer) {
      logger.warn(`No ${containerType} container available`);

      // Could auto-create container here if desired
      return {
        success: false,
        model: modelName,
        error: `No ${containerType} container available to load model`,
      };
    }

    // Step 6: Pull the model if auto-pull is enabled
    if (this.autoPull) {
      logger.info(`Pulling model ${modelName} to ${containerType} container...`);

      const pullSuccess = await targetContainer.pullModel(modelName, (progress) => {
        const percent = progress.percent !== undefined ? ` (${progress.percent}%)` : '';
        logger.dim(`  ${progress.status}${percent}`);
      });

      if (!pullSuccess) {
        return {
          success: false,
          model: modelName,
          error: 'Failed to pull model',
        };
      }
    }

    logger.success(`Model ${modelName} loaded successfully`);
    return {
      success: true,
      model: modelName,
      container: targetContainer.getConfig().id,
    };
  }

  /**
   * Find an available container of the specified type
   */
  private async findAvailableContainer(
    containerType: TContainerType,
  ): Promise<import('../containers/base-container.ts').BaseContainer | null> {
    const containers = this.containerManager.getAllContainers();

    for (const container of containers) {
      if (container.type !== containerType) {
        continue;
      }

      const status = await container.getStatus();
      if (status.running) {
        return container;
      }
    }

    // No running container found, try to start one
    for (const container of containers) {
      if (container.type !== containerType) {
        continue;
      }

      logger.info(`Starting ${containerType} container: ${container.getConfig().name}`);
      const started = await container.start();
      if (started) {
        return container;
      }
    }

    return null;
  }

  /**
   * Preload a list of models
   */
  public async preloadModels(modelNames: string[]): Promise<Map<string, IModelLoadResult>> {
    const results = new Map<string, IModelLoadResult>();

    for (const modelName of modelNames) {
      const result = await this.loadModel(modelName);
      results.set(modelName, result);

      if (!result.success) {
        logger.warn(`Failed to preload model: ${modelName}`);
      }
    }

    return results;
  }

  /**
   * Unload a model from a container
   */
  public async unloadModel(modelName: string): Promise<boolean> {
    const container = await this.containerManager.findContainerForModel(modelName);
    if (!container) {
      logger.warn(`Model ${modelName} not found in any container`);
      return false;
    }

    return container.removeModel(modelName);
  }

  /**
   * Check if auto-pull is enabled
   */
  public isAutoPullEnabled(): boolean {
    return this.autoPull;
  }

  /**
   * Enable or disable auto-pull
   */
  public setAutoPull(enabled: boolean): void {
    this.autoPull = enabled;
  }

  /**
   * Get loading recommendations for available VRAM
   */
  public async getRecommendations(): Promise<{
    canLoad: string[];
    cannotLoad: string[];
    loaded: string[];
  }> {
    const gpus = await this.gpuDetector.detectGpus();
    const totalVramGb = Math.round(gpus.reduce((sum, gpu) => sum + gpu.vram, 0) / 1024);

    const allModels = await this.registry.getAllGreenlitModels();
    const availableModels = await this.containerManager.getAllAvailableModels();
    const loadedNames = new Set(availableModels.keys());

    const canLoad: string[] = [];
    const cannotLoad: string[] = [];
    const loaded: string[] = [];

    for (const model of allModels) {
      if (loadedNames.has(model.name)) {
        loaded.push(model.name);
      } else if (model.minVram <= totalVramGb) {
        canLoad.push(model.name);
      } else {
        cannotLoad.push(model.name);
      }
    }

    return { canLoad, cannotLoad, loaded };
  }

  /**
   * Print loading status
   */
  public async printStatus(): Promise<void> {
    const recommendations = await this.getRecommendations();

    logger.logBoxTitle('Model Loading Status', 60, 'info');

    logger.logBoxLine(`Loaded Models (${recommendations.loaded.length}):`);
    if (recommendations.loaded.length > 0) {
      for (const model of recommendations.loaded) {
        logger.logBoxLine(`  - ${model}`);
      }
    } else {
      logger.logBoxLine('  None');
    }

    logger.logBoxLine('');
    logger.logBoxLine(`Available to Load (${recommendations.canLoad.length}):`);
    for (const model of recommendations.canLoad.slice(0, 5)) {
      logger.logBoxLine(`  - ${model}`);
    }
    if (recommendations.canLoad.length > 5) {
      logger.logBoxLine(`  ... and ${recommendations.canLoad.length - 5} more`);
    }

    logger.logBoxLine('');
    logger.logBoxLine(`Insufficient VRAM (${recommendations.cannotLoad.length}):`);
    for (const model of recommendations.cannotLoad.slice(0, 3)) {
      const info = await this.registry.getGreenlitModel(model);
      logger.logBoxLine(`  - ${model} (needs ${info?.minVram || '?'}GB)`);
    }
    if (recommendations.cannotLoad.length > 3) {
      logger.logBoxLine(`  ... and ${recommendations.cannotLoad.length - 3} more`);
    }

    logger.logBoxEnd();
  }
}