feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
@@ -1,18 +1,16 @@
 /**
- * Model Loader
- *
- * Handles automatic model loading with greenlist validation.
+ * Model loader for vLLM deployments.
 */

-import type { TContainerType } from '../interfaces/container.ts';
+import type { IModelCatalogEntry } from '../interfaces/catalog.ts';
+import type { IGpuInfo } from '../interfaces/gpu.ts';
+import { filterOutUsedGpus, selectPlacementForModel } from '../cluster/placement.ts';
+import { VllmContainer } from '../containers/vllm.ts';
 import { logger } from '../logger.ts';
 import { ModelRegistry } from './registry.ts';
 import { ContainerManager } from '../containers/container-manager.ts';
 import { GpuDetector } from '../hardware/gpu-detector.ts';

-/**
- * Model load result
- */
 export interface IModelLoadResult {
  success: boolean;
  model: string;
@@ -21,161 +19,112 @@ export interface IModelLoadResult {
  alreadyLoaded?: boolean;
 }

-/**
- * Model loader with greenlist validation
- */
+export interface IModelLoadOptions {
+  forceNewReplica?: boolean;
+  replicaOrdinal?: number;
+}
+
 export class ModelLoader {
  private registry: ModelRegistry;
  private containerManager: ContainerManager;
  private gpuDetector: GpuDetector;
-  private autoPull: boolean;
+  private autoDeploy: boolean;

  constructor(
    registry: ModelRegistry,
    containerManager: ContainerManager,
-    autoPull: boolean = true,
+    autoDeploy: boolean = true,
  ) {
    this.registry = registry;
    this.containerManager = containerManager;
    this.gpuDetector = new GpuDetector();
-    this.autoPull = autoPull;
+    this.autoDeploy = autoDeploy;
  }

-  /**
-   * Load a model with greenlist validation
-   */
-  public async loadModel(modelName: string): Promise<IModelLoadResult> {
+  public async loadModel(
+    modelName: string,
+    options: IModelLoadOptions = {},
+  ): Promise<IModelLoadResult> {
    logger.info(`Loading model: ${modelName}`);

-    // Step 1: Check if model is already loaded in any container
-    const container = await this.containerManager.findContainerForModel(modelName);
-    if (container) {
-      logger.dim(`Model ${modelName} is already available in container ${container.getConfig().id}`);
+    const modelInfo = await this.registry.getModel(modelName);
+    const resolvedModelName = modelInfo?.id || modelName;
+
+    const existing = await this.containerManager.findContainerForModel(resolvedModelName);
+    if (existing && !options.forceNewReplica) {
      return {
        success: true,
-        model: modelName,
-        container: container.getConfig().id,
+        model: resolvedModelName,
+        container: existing.getConfig().id,
        alreadyLoaded: true,
      };
    }

-    // Step 2: Check if model is greenlit
-    const isGreenlit = await this.registry.isModelGreenlit(modelName);
-    if (!isGreenlit) {
-      logger.error(`Model ${modelName} is not in the greenlit list`);
-      logger.info('Only greenlit models can be auto-pulled for security reasons.');
-      logger.info('Contact your administrator to add this model to the greenlist.');
-      return {
-        success: false,
-        model: modelName,
-        error: `Model "${modelName}" is not greenlit. Request via admin or add to greenlist.`,
-      };
-    }
-
-    // Step 3: Get model info from greenlist
-    const modelInfo = await this.registry.getGreenlitModel(modelName);
    if (!modelInfo) {
      return {
        success: false,
-        model: modelName,
-        error: 'Failed to get model info from greenlist',
+        model: resolvedModelName,
+        error: `Model "${modelName}" is not listed in the registry`,
      };
    }

-    // Step 4: Check VRAM requirements
-    const gpus = await this.gpuDetector.detectGpus();
-    const totalVram = gpus.reduce((sum, gpu) => sum + gpu.vram, 0);
-    const totalVramGb = Math.round(totalVram / 1024);
-
-    if (modelInfo.minVram > totalVramGb) {
-      logger.error(`Insufficient VRAM for model ${modelName}`);
-      logger.info(`Required: ${modelInfo.minVram}GB, Available: ${totalVramGb}GB`);
+    const placement = this.planPlacement(modelInfo, await this.gpuDetector.detectGpus());
+    if (!placement) {
      return {
        success: false,
-        model: modelName,
-        error: `Insufficient VRAM. Required: ${modelInfo.minVram}GB, Available: ${totalVramGb}GB`,
+        model: resolvedModelName,
+        error: 'Insufficient GPU capacity for deployment',
      };
    }

-    // Step 5: Find or create appropriate container
-    const containerType = modelInfo.container;
-    let targetContainer = await this.findAvailableContainer(containerType);
-
-    if (!targetContainer) {
-      logger.warn(`No ${containerType} container available`);
-
-      // Could auto-create container here if desired
+    if (!this.autoDeploy) {
      return {
        success: false,
-        model: modelName,
-        error: `No ${containerType} container available to load model`,
+        model: resolvedModelName,
+        error: 'Automatic deployments are disabled',
      };
    }

-    // Step 6: Pull the model if auto-pull is enabled
-    if (this.autoPull) {
-      logger.info(`Pulling model ${modelName} to ${containerType} container...`);
+    const deploymentId = this.createDeploymentId(
+      modelInfo.id,
+      options.replicaOrdinal ?? this.getExistingReplicaCount(modelInfo.id),
+    );
+    const deploymentName = this.createDeploymentName(
+      modelInfo.id,
+      options.replicaOrdinal ?? this.getExistingReplicaCount(modelInfo.id),
+    );
+    const config = VllmContainer.createConfig(
+      deploymentId,
+      deploymentName,
+      modelInfo.source.repo,
+      placement.gpuIds,
+      {
+        env: {
+          ...(modelInfo.launchDefaults?.env || {}),
+        },
+        command: this.buildVllmCommand(modelInfo, placement.tensorParallelSize),
+      },
+    );
+    config.models = [modelInfo.id];

-      const pullSuccess = await targetContainer.pullModel(modelName, (progress) => {
-        const percent = progress.percent !== undefined ? ` (${progress.percent}%)` : '';
-        logger.dim(`  ${progress.status}${percent}`);
-      });
-
-      if (!pullSuccess) {
-        return {
-          success: false,
-          model: modelName,
-          error: 'Failed to pull model',
-        };
-      }
+    const container = this.containerManager.addContainer(config);
+    const started = await container.start();
+    if (!started) {
+      await this.containerManager.removeContainer(config.id);
+      return {
+        success: false,
+        model: resolvedModelName,
+        error: 'Failed to start vLLM deployment',
+      };
    }

-    logger.success(`Model ${modelName} loaded successfully`);
    return {
      success: true,
-      model: modelName,
-      container: targetContainer.getConfig().id,
+      model: modelInfo.id,
+      container: config.id,
    };
  }

-  /**
-   * Find an available container of the specified type
-   */
-  private async findAvailableContainer(
-    containerType: TContainerType,
-  ): Promise<import('../containers/base-container.ts').BaseContainer | null> {
-    const containers = this.containerManager.getAllContainers();
-
-    for (const container of containers) {
-      if (container.type !== containerType) {
-        continue;
-      }
-
-      const status = await container.getStatus();
-      if (status.running) {
-        return container;
-      }
-    }
-
-    // No running container found, try to start one
-    for (const container of containers) {
-      if (container.type !== containerType) {
-        continue;
-      }
-
-      logger.info(`Starting ${containerType} container: ${container.getConfig().name}`);
-      const started = await container.start();
-      if (started) {
-        return container;
-      }
-    }
-
-    return null;
-  }
-
-  /**
-   * Preload a list of models
-   */
  public async preloadModels(modelNames: string[]): Promise<Map<string, IModelLoadResult>> {
    const results = new Map<string, IModelLoadResult>();

@@ -191,36 +140,45 @@ export class ModelLoader {
    return results;
  }

-  /**
-   * Unload a model from a container
-   */
  public async unloadModel(modelName: string): Promise<boolean> {
-    const container = await this.containerManager.findContainerForModel(modelName);
-    if (!container) {
+    const modelInfo = await this.registry.getModel(modelName);
+    const canonicalModel = modelInfo?.id || modelName;
+    const containers = this.containerManager.getAllContainers().filter((container) =>
+      container.getConfig().models.includes(canonicalModel)
+    );
+
+    if (containers.length === 0) {
      logger.warn(`Model ${modelName} not found in any container`);
      return false;
    }

-    return container.removeModel(modelName);
+    let allRemoved = true;
+    for (const container of containers) {
+      const removed = await this.containerManager.removeContainer(container.getConfig().id);
+      allRemoved = allRemoved && removed;
+    }
+
+    return allRemoved;
+  }
+
+  public async deployReplica(
+    modelName: string,
+    replicaOrdinal?: number,
+  ): Promise<IModelLoadResult> {
+    return this.loadModel(modelName, {
+      forceNewReplica: true,
+      replicaOrdinal,
+    });
  }

-  /**
-   * Check if auto-pull is enabled
-   */
  public isAutoPullEnabled(): boolean {
-    return this.autoPull;
+    return this.autoDeploy;
  }

-  /**
-   * Enable or disable auto-pull
-   */
  public setAutoPull(enabled: boolean): void {
-    this.autoPull = enabled;
+    this.autoDeploy = enabled;
  }

-  /**
-   * Get loading recommendations for available VRAM
-   */
  public async getRecommendations(): Promise<{
    canLoad: string[];
    cannotLoad: string[];
@@ -229,7 +187,7 @@ export class ModelLoader {
    const gpus = await this.gpuDetector.detectGpus();
    const totalVramGb = Math.round(gpus.reduce((sum, gpu) => sum + gpu.vram, 0) / 1024);

-    const allModels = await this.registry.getAllGreenlitModels();
+    const allModels = await this.registry.getAllModels();
    const availableModels = await this.containerManager.getAllAvailableModels();
    const loadedNames = new Set(availableModels.keys());

@@ -238,27 +196,24 @@ export class ModelLoader {
    const loaded: string[] = [];

    for (const model of allModels) {
-      if (loadedNames.has(model.name)) {
-        loaded.push(model.name);
-      } else if (model.minVram <= totalVramGb) {
-        canLoad.push(model.name);
+      if (loadedNames.has(model.id)) {
+        loaded.push(model.id);
+      } else if (model.requirements.minVramGb <= totalVramGb) {
+        canLoad.push(model.id);
      } else {
-        cannotLoad.push(model.name);
+        cannotLoad.push(model.id);
      }
    }

    return { canLoad, cannotLoad, loaded };
  }

-  /**
-   * Print loading status
-   */
  public async printStatus(): Promise<void> {
    const recommendations = await this.getRecommendations();

-    logger.logBoxTitle('Model Loading Status', 60, 'info');
+    logger.logBoxTitle('Model Deployment Status', 70, 'info');

-    logger.logBoxLine(`Loaded Models (${recommendations.loaded.length}):`);
+    logger.logBoxLine(`Running Deployments (${recommendations.loaded.length}):`);
    if (recommendations.loaded.length > 0) {
      for (const model of recommendations.loaded) {
        logger.logBoxLine(`  - ${model}`);
@@ -268,7 +223,7 @@ export class ModelLoader {
    }

    logger.logBoxLine('');
-    logger.logBoxLine(`Available to Load (${recommendations.canLoad.length}):`);
+    logger.logBoxLine(`Ready To Deploy (${recommendations.canLoad.length}):`);
    for (const model of recommendations.canLoad.slice(0, 5)) {
      logger.logBoxLine(`  - ${model}`);
    }
@@ -277,10 +232,10 @@ export class ModelLoader {
    }

    logger.logBoxLine('');
-    logger.logBoxLine(`Insufficient VRAM (${recommendations.cannotLoad.length}):`);
+    logger.logBoxLine(`Needs Larger GPUs (${recommendations.cannotLoad.length}):`);
    for (const model of recommendations.cannotLoad.slice(0, 3)) {
-      const info = await this.registry.getGreenlitModel(model);
-      logger.logBoxLine(`  - ${model} (needs ${info?.minVram || '?'}GB)`);
+      const info = await this.registry.getModel(model);
+      logger.logBoxLine(`  - ${model} (needs ${info?.requirements.minVramGb || '?'}GB)`);
    }
    if (recommendations.cannotLoad.length > 3) {
      logger.logBoxLine(`  ... and ${recommendations.cannotLoad.length - 3} more`);
@@ -288,4 +243,96 @@ export class ModelLoader {

    logger.logBoxEnd();
  }
+
+  private planPlacement(
+    modelInfo: IModelCatalogEntry,
+    gpus: IGpuInfo[],
+  ): { gpuIds: string[]; tensorParallelSize: number } | null {
+    const usedGpuIds = this.containerManager.getAllContainers().flatMap((container) =>
+      container.getConfig().gpuIds
+    );
+    const freeGpus = filterOutUsedGpus(gpus, usedGpuIds);
+
+    const preferredPlacement = selectPlacementForModel(modelInfo, freeGpus);
+    if (preferredPlacement) {
+      return {
+        gpuIds: preferredPlacement.gpuIds,
+        tensorParallelSize: preferredPlacement.tensorParallelSize,
+      };
+    }
+
+    const fallbackPlacement = selectPlacementForModel(modelInfo, gpus);
+    if (!fallbackPlacement) {
+      return null;
+    }
+
+    return {
+      gpuIds: fallbackPlacement.gpuIds,
+      tensorParallelSize: fallbackPlacement.tensorParallelSize,
+    };
+  }
+
+  private buildVllmCommand(
+    modelInfo: IModelCatalogEntry,
+    tensorParallelSize: number,
+  ): string[] {
+    const command = ['--model', modelInfo.source.repo];
+
+    if (tensorParallelSize > 1) {
+      command.push('--tensor-parallel-size', String(tensorParallelSize));
+    }
+
+    if (modelInfo.launchDefaults?.maxModelLen) {
+      command.push('--max-model-len', String(modelInfo.launchDefaults.maxModelLen));
+    }
+
+    if (modelInfo.launchDefaults?.gpuMemoryUtilization) {
+      command.push(
+        '--gpu-memory-utilization',
+        String(modelInfo.launchDefaults.gpuMemoryUtilization),
+      );
+    }
+
+    if (modelInfo.launchDefaults?.quantization) {
+      command.push('--quantization', modelInfo.launchDefaults.quantization);
+    }
+
+    if (modelInfo.launchDefaults?.dtype) {
+      command.push('--dtype', modelInfo.launchDefaults.dtype);
+    }
+
+    if (modelInfo.launchDefaults?.generationConfig) {
+      command.push('--generation-config', modelInfo.launchDefaults.generationConfig);
+    }
+
+    if (modelInfo.launchDefaults?.extraArgs) {
+      command.push(...modelInfo.launchDefaults.extraArgs);
+    }
+
+    return command;
+  }
+
+  private getExistingReplicaCount(modelId: string): number {
+    return this.containerManager.getAllContainers().filter((container) =>
+      container.getConfig().models.includes(modelId)
+    ).length;
+  }
+
+  private createDeploymentId(modelId: string, replicaOrdinal: number): string {
+    const baseId = modelId.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, '').slice(
+      0,
+      32,
+    );
+    const suffix = replicaOrdinal > 0 ? `-r${replicaOrdinal + 1}` : '';
+    return `vllm-${baseId}${suffix}`;
+  }
+
+  private createDeploymentName(modelId: string, replicaOrdinal: number): string {
+    const baseName = modelId.split('/').pop() || modelId;
+    if (replicaOrdinal === 0) {
+      return baseName;
+    }
+
+    return `${baseName} replica ${replicaOrdinal + 1}`;
+  }
 }
@@ -1,252 +1,205 @@
 /**
- * Model Registry
- *
- * Manages the greenlit model list and model availability.
+ * Model registry backed by list.modelgrid.com.
 */

-import type { IGreenlitModel, IGreenlitModelsList } from '../interfaces/config.ts';
-import type { TContainerType } from '../interfaces/container.ts';
+import * as fs from 'node:fs/promises';
+import type { IModelCatalog, IModelCatalogEntry } from '../interfaces/catalog.ts';
 import { MODEL_REGISTRY, TIMING } from '../constants.ts';
 import { logger } from '../logger.ts';

-/**
- * Model registry for managing greenlit models
- */
 export class ModelRegistry {
-  private greenlistUrl: string;
-  private cachedGreenlist: IGreenlitModelsList | null = null;
+  private catalogUrl: string;
+  private cachedCatalog: IModelCatalog | null = null;
  private cacheTime: number = 0;

-  constructor(greenlistUrl: string = MODEL_REGISTRY.DEFAULT_GREENLIST_URL) {
-    this.greenlistUrl = greenlistUrl;
+  constructor(catalogUrl: string = MODEL_REGISTRY.DEFAULT_CATALOG_URL) {
+    this.catalogUrl = catalogUrl;
  }

-  /**
-   * Set the greenlist URL
-   */
-  public setGreenlistUrl(url: string): void {
-    this.greenlistUrl = url;
-    this.cachedGreenlist = null;
+  public setCatalogUrl(url: string): void {
+    this.catalogUrl = url;
+    this.cachedCatalog = null;
    this.cacheTime = 0;
  }

-  /**
-   * Fetch the greenlit model list from remote URL
-   */
-  public async fetchGreenlist(forceRefresh: boolean = false): Promise<IGreenlitModelsList> {
-    // Return cached data if still valid
+  public async fetchCatalog(forceRefresh: boolean = false): Promise<IModelCatalog> {
    if (
      !forceRefresh &&
-      this.cachedGreenlist &&
+      this.cachedCatalog &&
      Date.now() - this.cacheTime < TIMING.GREENLIST_CACHE_DURATION_MS
    ) {
-      return this.cachedGreenlist;
+      return this.cachedCatalog;
    }

    try {
-      logger.dim(`Fetching greenlit models from: ${this.greenlistUrl}`);
+      logger.dim(`Fetching model catalog from: ${this.catalogUrl}`);
+      const catalog = await this.readCatalogSource(this.catalogUrl);

-      const controller = new AbortController();
-      const timeout = setTimeout(() => controller.abort(), 30000);
+      if (!Array.isArray(catalog.models)) {
+        throw new Error('Invalid catalog format: missing models array');
+      }

-      const response = await fetch(this.greenlistUrl, {
+      this.cachedCatalog = catalog;
+      this.cacheTime = Date.now();
+
+      logger.dim(`Loaded ${catalog.models.length} catalog models`);
+      return catalog;
+    } catch (error) {
+      logger.warn(
+        `Failed to fetch model catalog: ${error instanceof Error ? error.message : String(error)}`,
+      );
+
+      if (!this.cachedCatalog) {
+        logger.dim('Using fallback catalog');
+        return this.getFallbackCatalog();
+      }
+
+      return this.cachedCatalog;
+    }
+  }
+
+  public async isModelListed(modelName: string): Promise<boolean> {
+    return (await this.getModel(modelName)) !== null;
+  }
+
+  public async getModel(modelName: string): Promise<IModelCatalogEntry | null> {
+    const catalog = await this.fetchCatalog();
+    const normalized = this.normalizeModelName(modelName);
+
+    return catalog.models.find((model) => {
+      const candidates = [model.id, ...(model.aliases || [])];
+      return candidates.some((candidate) => this.normalizeModelName(candidate) === normalized);
+    }) || null;
+  }
+
+  public async getAllModels(): Promise<IModelCatalogEntry[]> {
+    const catalog = await this.fetchCatalog();
+    return catalog.models;
+  }
+
+  public async getModelsByEngine(engine: 'vllm'): Promise<IModelCatalogEntry[]> {
+    const catalog = await this.fetchCatalog();
+    return catalog.models.filter((model) => model.engine === engine);
+  }
+
+  public async getModelsWithinVram(maxVramGb: number): Promise<IModelCatalogEntry[]> {
+    const catalog = await this.fetchCatalog();
+    return catalog.models.filter((model) => model.requirements.minVramGb <= maxVramGb);
+  }
+
+  public async getRecommendedEngine(modelName: string): Promise<'vllm' | null> {
+    const model = await this.getModel(modelName);
+    return model ? model.engine : null;
+  }
+
+  public async getMinVram(modelName: string): Promise<number | null> {
+    const model = await this.getModel(modelName);
+    return model ? model.requirements.minVramGb : null;
+  }
+
+  public async modelFitsInVram(modelName: string, availableVramGb: number): Promise<boolean> {
+    const minVram = await this.getMinVram(modelName);
+    if (minVram === null) {
+      return false;
+    }
+
+    return availableVramGb >= minVram;
+  }
+
+  public async searchModels(pattern: string): Promise<IModelCatalogEntry[]> {
+    const catalog = await this.fetchCatalog();
+    const normalizedPattern = pattern.toLowerCase();
+
+    return catalog.models.filter((model) =>
+      model.id.toLowerCase().includes(normalizedPattern) ||
+      model.aliases?.some((alias) => alias.toLowerCase().includes(normalizedPattern)) ||
+      model.metadata?.summary?.toLowerCase().includes(normalizedPattern) ||
+      model.metadata?.tags?.some((tag) => tag.toLowerCase().includes(normalizedPattern))
+    );
+  }
+
+  public async getModelsByTags(tags: string[]): Promise<IModelCatalogEntry[]> {
+    const catalog = await this.fetchCatalog();
+    const normalizedTags = tags.map((tag) => tag.toLowerCase());
+
+    return catalog.models.filter((model) =>
+      model.metadata?.tags?.some((tag) => normalizedTags.includes(tag.toLowerCase()))
+    );
+  }
+
+  public clearCache(): void {
+    this.cachedCatalog = null;
+    this.cacheTime = 0;
+  }
+
+  public async printSummary(): Promise<void> {
+    const catalog = await this.fetchCatalog();
+
+    logger.logBoxTitle('Model Catalog', 70, 'info');
+    logger.logBoxLine(`Version: ${catalog.version}`);
+    logger.logBoxLine(`Generated: ${catalog.generatedAt}`);
+    logger.logBoxLine(`Total Models: ${catalog.models.length}`);
+    logger.logBoxLine('');
+
+    for (const model of catalog.models.slice(0, 10)) {
+      logger.logBoxLine(
+        `- ${model.id} (${model.requirements.minVramGb}GB, ${model.engine})`,
+      );
+    }
+
+    if (catalog.models.length > 10) {
+      logger.logBoxLine(`... and ${catalog.models.length - 10} more`);
+    }
+
+    logger.logBoxEnd();
+  }
+
+  private async readCatalogSource(source: string): Promise<IModelCatalog> {
+    if (source.startsWith('file://')) {
+      const filePath = new URL(source);
+      const content = await fs.readFile(filePath, 'utf-8');
+      return JSON.parse(content) as IModelCatalog;
+    }
+
+    if (source.startsWith('/')) {
+      const content = await fs.readFile(source, 'utf-8');
+      return JSON.parse(content) as IModelCatalog;
+    }
+
+    const controller = new AbortController();
+    const timeout = setTimeout(() => controller.abort(), 30000);
+
+    try {
+      const response = await fetch(source, {
        signal: controller.signal,
        headers: {
-          'Accept': 'application/json',
+          Accept: 'application/json',
          'User-Agent': 'ModelGrid/1.0',
        },
      });

-      clearTimeout(timeout);
-
      if (!response.ok) {
        throw new Error(`HTTP ${response.status}: ${response.statusText}`);
      }

-      const greenlist = await response.json() as IGreenlitModelsList;
-
-      // Validate structure
-      if (!greenlist.models || !Array.isArray(greenlist.models)) {
-        throw new Error('Invalid greenlist format: missing models array');
-      }
-
-      // Cache the result
-      this.cachedGreenlist = greenlist;
-      this.cacheTime = Date.now();
-
-      logger.dim(`Loaded ${greenlist.models.length} greenlit models`);
-      return greenlist;
-    } catch (error) {
-      logger.warn(`Failed to fetch greenlist: ${error instanceof Error ? error.message : String(error)}`);
-
-      // Return fallback if we have no cache
-      if (!this.cachedGreenlist) {
-        logger.dim('Using fallback greenlist');
-        return this.getFallbackGreenlist();
-      }
-
-      // Return stale cache
-      return this.cachedGreenlist;
+      return await response.json() as IModelCatalog;
+    } finally {
+      clearTimeout(timeout);
    }
  }

-  /**
-   * Get fallback greenlist
-   */
-  private getFallbackGreenlist(): IGreenlitModelsList {
+  private getFallbackCatalog(): IModelCatalog {
    return {
      version: '1.0',
-      lastUpdated: new Date().toISOString(),
-      models: MODEL_REGISTRY.FALLBACK_GREENLIST as unknown as IGreenlitModel[],
+      generatedAt: new Date().toISOString(),
+      models: MODEL_REGISTRY.FALLBACK_CATALOG as unknown as IModelCatalogEntry[],
    };
  }

-  /**
-   * Check if a model is greenlit
-   */
-  public async isModelGreenlit(modelName: string): Promise<boolean> {
-    const greenlist = await this.fetchGreenlist();
-    return greenlist.models.some((m) => this.normalizeModelName(m.name) === this.normalizeModelName(modelName));
-  }
-
-  /**
-   * Get greenlit model info
-   */
-  public async getGreenlitModel(modelName: string): Promise<IGreenlitModel | null> {
-    const greenlist = await this.fetchGreenlist();
-    const normalized = this.normalizeModelName(modelName);
-    return greenlist.models.find((m) => this.normalizeModelName(m.name) === normalized) || null;
-  }
-
-  /**
-   * Get all greenlit models
-   */
-  public async getAllGreenlitModels(): Promise<IGreenlitModel[]> {
-    const greenlist = await this.fetchGreenlist();
-    return greenlist.models;
-  }
-
-  /**
-   * Get greenlit models by container type
-   */
-  public async getModelsByContainer(containerType: TContainerType): Promise<IGreenlitModel[]> {
-    const greenlist = await this.fetchGreenlist();
-    return greenlist.models.filter((m) => m.container === containerType);
-  }
-
-  /**
-   * Get greenlit models that fit within VRAM limit
-   */
-  public async getModelsWithinVram(maxVramGb: number): Promise<IGreenlitModel[]> {
-    const greenlist = await this.fetchGreenlist();
-    return greenlist.models.filter((m) => m.minVram <= maxVramGb);
-  }
-
-  /**
-   * Get recommended container type for a model
-   */
-  public async getRecommendedContainer(modelName: string): Promise<TContainerType | null> {
-    const model = await this.getGreenlitModel(modelName);
-    return model ? model.container : null;
-  }
-
-  /**
-   * Get minimum VRAM required for a model
-   */
-  public async getMinVram(modelName: string): Promise<number | null> {
-    const model = await this.getGreenlitModel(modelName);
-    return model ? model.minVram : null;
-  }
-
-  /**
-   * Check if model fits in available VRAM
-   */
-  public async modelFitsInVram(modelName: string, availableVramGb: number): Promise<boolean> {
-    const minVram = await this.getMinVram(modelName);
-    if (minVram === null) {
-      // Model not in greenlist, assume it might fit
-      return true;
-    }
-    return availableVramGb >= minVram;
-  }
-
-  /**
-   * Normalize model name for comparison
-   * Handles variations like "llama3:8b" vs "llama3:8B" vs "meta-llama/llama-3-8b"
-   */
  private normalizeModelName(name: string): string {
    return name
      .toLowerCase()
-      .replace(/[^a-z0-9:.-]/g, '')
+      .replace(/[^a-z0-9:/._-]/g, '')
      .trim();
  }
-
-  /**
-   * Search models by name pattern
-   */
-  public async searchModels(pattern: string): Promise<IGreenlitModel[]> {
-    const greenlist = await this.fetchGreenlist();
-    const normalizedPattern = pattern.toLowerCase();
-
-    return greenlist.models.filter((m) =>
-      m.name.toLowerCase().includes(normalizedPattern) ||
-      m.description?.toLowerCase().includes(normalizedPattern) ||
-      m.tags?.some((t) => t.toLowerCase().includes(normalizedPattern))
-    );
-  }
-
-  /**
-   * Get models by tags
-   */
-  public async getModelsByTags(tags: string[]): Promise<IGreenlitModel[]> {
-    const greenlist = await this.fetchGreenlist();
-    const normalizedTags = tags.map((t) => t.toLowerCase());
-
-    return greenlist.models.filter((m) =>
-      m.tags?.some((t) => normalizedTags.includes(t.toLowerCase()))
-    );
-  }
-
-  /**
-   * Clear the cached greenlist
-   */
-  public clearCache(): void {
-    this.cachedGreenlist = null;
-    this.cacheTime = 0;
-  }
-
-  /**
-   * Print greenlist summary
-   */
-  public async printSummary(): Promise<void> {
-    const greenlist = await this.fetchGreenlist();
-
-    // Group by container type
-    const byContainer = new Map<string, IGreenlitModel[]>();
-    for (const model of greenlist.models) {
-      if (!byContainer.has(model.container)) {
-        byContainer.set(model.container, []);
-      }
-      byContainer.get(model.container)!.push(model);
-    }
-
-    logger.logBoxTitle('Greenlit Models', 60, 'info');
-    logger.logBoxLine(`Version: ${greenlist.version}`);
-    logger.logBoxLine(`Last Updated: ${greenlist.lastUpdated}`);
-    logger.logBoxLine(`Total Models: ${greenlist.models.length}`);
-    logger.logBoxLine('');
-
-    for (const [container, models] of byContainer) {
-      logger.logBoxLine(`${container.toUpperCase()} (${models.length}):`);
-      for (const model of models.slice(0, 5)) {
-        logger.logBoxLine(`  - ${model.name} (${model.minVram}GB VRAM)`);
-      }
-      if (models.length > 5) {
-        logger.logBoxLine(`  ... and ${models.length - 5} more`);
-      }
-      logger.logBoxLine('');
-    }
-
-    logger.logBoxEnd();
-  }
 }