Files
modelgrid/ts/cli/model-handler.ts
T

175 lines
5.3 KiB
TypeScript

/**
* Model handler for catalog-backed vLLM deployments.
*/
import { logger } from '../logger.ts';
import { theme } from '../colors.ts';
import { ClusterCoordinator } from '../cluster/coordinator.ts';
import { ContainerManager } from '../containers/container-manager.ts';
import { ModelRegistry } from '../models/registry.ts';
import { ModelLoader } from '../models/loader.ts';
import type { ITableColumn } from '../logger.ts';
export class ModelHandler {
private containerManager: ContainerManager;
private clusterCoordinator: ClusterCoordinator;
private modelRegistry: ModelRegistry;
private modelLoader: ModelLoader;
constructor(
containerManager: ContainerManager,
clusterCoordinator: ClusterCoordinator,
modelRegistry: ModelRegistry,
) {
this.containerManager = containerManager;
this.clusterCoordinator = clusterCoordinator;
this.modelRegistry = modelRegistry;
this.modelLoader = new ModelLoader(modelRegistry, containerManager);
}
public async list(): Promise<void> {
logger.log('');
logger.info('Model Catalog');
logger.log('');
const loadedModels = await this.containerManager.getAllAvailableModels();
const catalogModels = await this.modelRegistry.getAllModels();
if (loadedModels.size === 0 && catalogModels.length === 0) {
logger.logBox(
'No Models',
[
'The local registry cache is empty.',
'',
theme.dim('Refresh with:'),
` ${theme.command('modelgrid model refresh')}`,
],
60,
'warning',
);
return;
}
if (loadedModels.size > 0) {
logger.info(`Running Deployments (${loadedModels.size}):`);
logger.log('');
const rows: Record<string, string | number>[] = [];
for (const [name, endpoints] of loadedModels) {
const primaryEndpoint = endpoints[0];
rows.push({
model: name,
engine: primaryEndpoint?.type || 'vllm',
replicas: String(endpoints.length),
endpoint: primaryEndpoint?.url || theme.dim('N/A'),
});
}
const columns: ITableColumn[] = [
{ header: 'Model', key: 'model', align: 'left', color: theme.highlight },
{ header: 'Engine', key: 'engine', align: 'left' },
{ header: 'Replicas', key: 'replicas', align: 'right', color: theme.info },
{ header: 'Endpoint', key: 'endpoint', align: 'left', color: theme.dim },
];
logger.logTable(columns, rows);
logger.log('');
}
const loadedNames = new Set(loadedModels.keys());
const available = catalogModels.filter((model) => !loadedNames.has(model.id));
if (available.length > 0) {
logger.info(`Available To Deploy (${available.length}):`);
logger.log('');
const rows: Record<string, string | number>[] = available.map((model) => ({
model: model.id,
family: model.metadata?.family || theme.dim('N/A'),
vram: `${model.requirements.minVramGb} GB`,
capabilities: this.formatCapabilities(model.capabilities),
}));
const columns: ITableColumn[] = [
{ header: 'Model', key: 'model', align: 'left' },
{ header: 'Family', key: 'family', align: 'left' },
{ header: 'Min VRAM', key: 'vram', align: 'right', color: theme.info },
{ header: 'Capabilities', key: 'capabilities', align: 'left', color: theme.dim },
];
logger.logTable(columns, rows);
logger.log('');
}
}
public async pull(modelName: string): Promise<void> {
if (!modelName) {
logger.error('Model ID is required');
return;
}
logger.log('');
logger.info(`Deploying model: ${modelName}`);
logger.log('');
const result = await this.clusterCoordinator.ensureModelViaControlPlane(modelName);
if (result) {
if (result.created) {
logger.success(`Model "${result.model}" deployed successfully`);
} else {
logger.success(`Model "${result.model}" is already available`);
}
logger.dim(`Node: ${result.location.nodeName}`);
logger.dim(`Endpoint: ${result.location.endpoint}`);
} else {
logger.error(`Failed to deploy model: could not schedule ${modelName}`);
}
logger.log('');
}
public async remove(modelName: string): Promise<void> {
if (!modelName) {
logger.error('Model ID is required');
return;
}
logger.info(`Removing deployment for model: ${modelName}`);
await this.clusterCoordinator.clearDesiredDeployment(modelName);
const success = await this.modelLoader.unloadModel(modelName);
if (success) {
logger.success(`Model "${modelName}" removed`);
} else {
logger.error(`Failed to remove model "${modelName}"`);
}
}
public async status(): Promise<void> {
logger.log('');
await this.modelLoader.printStatus();
}
public async refresh(): Promise<void> {
logger.info('Refreshing model catalog...');
await this.modelRegistry.fetchCatalog(true);
logger.success('Model catalog refreshed');
}
private formatCapabilities(capabilities: {
chat?: boolean;
completions?: boolean;
embeddings?: boolean;
tools?: boolean;
}): string {
const enabled = Object.entries(capabilities)
.filter(([, value]) => value)
.map(([key]) => key);
return enabled.length > 0 ? enabled.join(', ') : theme.dim('none');
}
}