292 lines
8.3 KiB
TypeScript
292 lines
8.3 KiB
TypeScript
/**
|
|
* Model Loader
|
|
*
|
|
* Handles automatic model loading with greenlist validation.
|
|
*/
|
|
|
|
import type { TContainerType } from '../interfaces/container.ts';
|
|
import { logger } from '../logger.ts';
|
|
import { ModelRegistry } from './registry.ts';
|
|
import { ContainerManager } from '../containers/container-manager.ts';
|
|
import { GpuDetector } from '../hardware/gpu-detector.ts';
|
|
|
|
/**
|
|
* Model load result
|
|
*/
|
|
export interface IModelLoadResult {
|
|
success: boolean;
|
|
model: string;
|
|
container?: string;
|
|
error?: string;
|
|
alreadyLoaded?: boolean;
|
|
}
|
|
|
|
/**
|
|
* Model loader with greenlist validation
|
|
*/
|
|
export class ModelLoader {
|
|
private registry: ModelRegistry;
|
|
private containerManager: ContainerManager;
|
|
private gpuDetector: GpuDetector;
|
|
private autoPull: boolean;
|
|
|
|
constructor(
|
|
registry: ModelRegistry,
|
|
containerManager: ContainerManager,
|
|
autoPull: boolean = true,
|
|
) {
|
|
this.registry = registry;
|
|
this.containerManager = containerManager;
|
|
this.gpuDetector = new GpuDetector();
|
|
this.autoPull = autoPull;
|
|
}
|
|
|
|
/**
|
|
* Load a model with greenlist validation
|
|
*/
|
|
public async loadModel(modelName: string): Promise<IModelLoadResult> {
|
|
logger.info(`Loading model: ${modelName}`);
|
|
|
|
// Step 1: Check if model is already loaded in any container
|
|
const container = await this.containerManager.findContainerForModel(modelName);
|
|
if (container) {
|
|
logger.dim(`Model ${modelName} is already available in container ${container.getConfig().id}`);
|
|
return {
|
|
success: true,
|
|
model: modelName,
|
|
container: container.getConfig().id,
|
|
alreadyLoaded: true,
|
|
};
|
|
}
|
|
|
|
// Step 2: Check if model is greenlit
|
|
const isGreenlit = await this.registry.isModelGreenlit(modelName);
|
|
if (!isGreenlit) {
|
|
logger.error(`Model ${modelName} is not in the greenlit list`);
|
|
logger.info('Only greenlit models can be auto-pulled for security reasons.');
|
|
logger.info('Contact your administrator to add this model to the greenlist.');
|
|
return {
|
|
success: false,
|
|
model: modelName,
|
|
error: `Model "${modelName}" is not greenlit. Request via admin or add to greenlist.`,
|
|
};
|
|
}
|
|
|
|
// Step 3: Get model info from greenlist
|
|
const modelInfo = await this.registry.getGreenlitModel(modelName);
|
|
if (!modelInfo) {
|
|
return {
|
|
success: false,
|
|
model: modelName,
|
|
error: 'Failed to get model info from greenlist',
|
|
};
|
|
}
|
|
|
|
// Step 4: Check VRAM requirements
|
|
const gpus = await this.gpuDetector.detectGpus();
|
|
const totalVram = gpus.reduce((sum, gpu) => sum + gpu.vram, 0);
|
|
const totalVramGb = Math.round(totalVram / 1024);
|
|
|
|
if (modelInfo.minVram > totalVramGb) {
|
|
logger.error(`Insufficient VRAM for model ${modelName}`);
|
|
logger.info(`Required: ${modelInfo.minVram}GB, Available: ${totalVramGb}GB`);
|
|
return {
|
|
success: false,
|
|
model: modelName,
|
|
error: `Insufficient VRAM. Required: ${modelInfo.minVram}GB, Available: ${totalVramGb}GB`,
|
|
};
|
|
}
|
|
|
|
// Step 5: Find or create appropriate container
|
|
const containerType = modelInfo.container;
|
|
let targetContainer = await this.findAvailableContainer(containerType);
|
|
|
|
if (!targetContainer) {
|
|
logger.warn(`No ${containerType} container available`);
|
|
|
|
// Could auto-create container here if desired
|
|
return {
|
|
success: false,
|
|
model: modelName,
|
|
error: `No ${containerType} container available to load model`,
|
|
};
|
|
}
|
|
|
|
// Step 6: Pull the model if auto-pull is enabled
|
|
if (this.autoPull) {
|
|
logger.info(`Pulling model ${modelName} to ${containerType} container...`);
|
|
|
|
const pullSuccess = await targetContainer.pullModel(modelName, (progress) => {
|
|
const percent = progress.percent !== undefined ? ` (${progress.percent}%)` : '';
|
|
logger.dim(` ${progress.status}${percent}`);
|
|
});
|
|
|
|
if (!pullSuccess) {
|
|
return {
|
|
success: false,
|
|
model: modelName,
|
|
error: 'Failed to pull model',
|
|
};
|
|
}
|
|
}
|
|
|
|
logger.success(`Model ${modelName} loaded successfully`);
|
|
return {
|
|
success: true,
|
|
model: modelName,
|
|
container: targetContainer.getConfig().id,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Find an available container of the specified type
|
|
*/
|
|
private async findAvailableContainer(
|
|
containerType: TContainerType,
|
|
): Promise<import('../containers/base-container.ts').BaseContainer | null> {
|
|
const containers = this.containerManager.getAllContainers();
|
|
|
|
for (const container of containers) {
|
|
if (container.type !== containerType) {
|
|
continue;
|
|
}
|
|
|
|
const status = await container.getStatus();
|
|
if (status.running) {
|
|
return container;
|
|
}
|
|
}
|
|
|
|
// No running container found, try to start one
|
|
for (const container of containers) {
|
|
if (container.type !== containerType) {
|
|
continue;
|
|
}
|
|
|
|
logger.info(`Starting ${containerType} container: ${container.getConfig().name}`);
|
|
const started = await container.start();
|
|
if (started) {
|
|
return container;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Preload a list of models
|
|
*/
|
|
public async preloadModels(modelNames: string[]): Promise<Map<string, IModelLoadResult>> {
|
|
const results = new Map<string, IModelLoadResult>();
|
|
|
|
for (const modelName of modelNames) {
|
|
const result = await this.loadModel(modelName);
|
|
results.set(modelName, result);
|
|
|
|
if (!result.success) {
|
|
logger.warn(`Failed to preload model: ${modelName}`);
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Unload a model from a container
|
|
*/
|
|
public async unloadModel(modelName: string): Promise<boolean> {
|
|
const container = await this.containerManager.findContainerForModel(modelName);
|
|
if (!container) {
|
|
logger.warn(`Model ${modelName} not found in any container`);
|
|
return false;
|
|
}
|
|
|
|
return container.removeModel(modelName);
|
|
}
|
|
|
|
/**
|
|
* Check if auto-pull is enabled
|
|
*/
|
|
public isAutoPullEnabled(): boolean {
|
|
return this.autoPull;
|
|
}
|
|
|
|
/**
|
|
* Enable or disable auto-pull
|
|
*/
|
|
public setAutoPull(enabled: boolean): void {
|
|
this.autoPull = enabled;
|
|
}
|
|
|
|
/**
|
|
* Get loading recommendations for available VRAM
|
|
*/
|
|
public async getRecommendations(): Promise<{
|
|
canLoad: string[];
|
|
cannotLoad: string[];
|
|
loaded: string[];
|
|
}> {
|
|
const gpus = await this.gpuDetector.detectGpus();
|
|
const totalVramGb = Math.round(gpus.reduce((sum, gpu) => sum + gpu.vram, 0) / 1024);
|
|
|
|
const allModels = await this.registry.getAllGreenlitModels();
|
|
const availableModels = await this.containerManager.getAllAvailableModels();
|
|
const loadedNames = new Set(availableModels.keys());
|
|
|
|
const canLoad: string[] = [];
|
|
const cannotLoad: string[] = [];
|
|
const loaded: string[] = [];
|
|
|
|
for (const model of allModels) {
|
|
if (loadedNames.has(model.name)) {
|
|
loaded.push(model.name);
|
|
} else if (model.minVram <= totalVramGb) {
|
|
canLoad.push(model.name);
|
|
} else {
|
|
cannotLoad.push(model.name);
|
|
}
|
|
}
|
|
|
|
return { canLoad, cannotLoad, loaded };
|
|
}
|
|
|
|
/**
|
|
* Print loading status
|
|
*/
|
|
public async printStatus(): Promise<void> {
|
|
const recommendations = await this.getRecommendations();
|
|
|
|
logger.logBoxTitle('Model Loading Status', 60, 'info');
|
|
|
|
logger.logBoxLine(`Loaded Models (${recommendations.loaded.length}):`);
|
|
if (recommendations.loaded.length > 0) {
|
|
for (const model of recommendations.loaded) {
|
|
logger.logBoxLine(` - ${model}`);
|
|
}
|
|
} else {
|
|
logger.logBoxLine(' None');
|
|
}
|
|
|
|
logger.logBoxLine('');
|
|
logger.logBoxLine(`Available to Load (${recommendations.canLoad.length}):`);
|
|
for (const model of recommendations.canLoad.slice(0, 5)) {
|
|
logger.logBoxLine(` - ${model}`);
|
|
}
|
|
if (recommendations.canLoad.length > 5) {
|
|
logger.logBoxLine(` ... and ${recommendations.canLoad.length - 5} more`);
|
|
}
|
|
|
|
logger.logBoxLine('');
|
|
logger.logBoxLine(`Insufficient VRAM (${recommendations.cannotLoad.length}):`);
|
|
for (const model of recommendations.cannotLoad.slice(0, 3)) {
|
|
const info = await this.registry.getGreenlitModel(model);
|
|
logger.logBoxLine(` - ${model} (needs ${info?.minVram || '?'}GB)`);
|
|
}
|
|
if (recommendations.cannotLoad.length > 3) {
|
|
logger.logBoxLine(` ... and ${recommendations.cannotLoad.length - 3} more`);
|
|
}
|
|
|
|
logger.logBoxEnd();
|
|
}
|
|
}
|