175 lines
5.3 KiB
TypeScript
175 lines
5.3 KiB
TypeScript
/**
|
|
* Model handler for catalog-backed vLLM deployments.
|
|
*/
|
|
|
|
import { logger } from '../logger.ts';
|
|
import { theme } from '../colors.ts';
|
|
import { ClusterCoordinator } from '../cluster/coordinator.ts';
|
|
import { ContainerManager } from '../containers/container-manager.ts';
|
|
import { ModelRegistry } from '../models/registry.ts';
|
|
import { ModelLoader } from '../models/loader.ts';
|
|
import type { ITableColumn } from '../logger.ts';
|
|
|
|
export class ModelHandler {
|
|
private containerManager: ContainerManager;
|
|
private clusterCoordinator: ClusterCoordinator;
|
|
private modelRegistry: ModelRegistry;
|
|
private modelLoader: ModelLoader;
|
|
|
|
constructor(
|
|
containerManager: ContainerManager,
|
|
clusterCoordinator: ClusterCoordinator,
|
|
modelRegistry: ModelRegistry,
|
|
) {
|
|
this.containerManager = containerManager;
|
|
this.clusterCoordinator = clusterCoordinator;
|
|
this.modelRegistry = modelRegistry;
|
|
this.modelLoader = new ModelLoader(modelRegistry, containerManager);
|
|
}
|
|
|
|
public async list(): Promise<void> {
|
|
logger.log('');
|
|
logger.info('Model Catalog');
|
|
logger.log('');
|
|
|
|
const loadedModels = await this.containerManager.getAllAvailableModels();
|
|
const catalogModels = await this.modelRegistry.getAllModels();
|
|
|
|
if (loadedModels.size === 0 && catalogModels.length === 0) {
|
|
logger.logBox(
|
|
'No Models',
|
|
[
|
|
'The local registry cache is empty.',
|
|
'',
|
|
theme.dim('Refresh with:'),
|
|
` ${theme.command('modelgrid model refresh')}`,
|
|
],
|
|
60,
|
|
'warning',
|
|
);
|
|
return;
|
|
}
|
|
|
|
if (loadedModels.size > 0) {
|
|
logger.info(`Running Deployments (${loadedModels.size}):`);
|
|
logger.log('');
|
|
|
|
const rows: Record<string, string | number>[] = [];
|
|
for (const [name, endpoints] of loadedModels) {
|
|
const primaryEndpoint = endpoints[0];
|
|
rows.push({
|
|
model: name,
|
|
engine: primaryEndpoint?.type || 'vllm',
|
|
replicas: String(endpoints.length),
|
|
endpoint: primaryEndpoint?.url || theme.dim('N/A'),
|
|
});
|
|
}
|
|
|
|
const columns: ITableColumn[] = [
|
|
{ header: 'Model', key: 'model', align: 'left', color: theme.highlight },
|
|
{ header: 'Engine', key: 'engine', align: 'left' },
|
|
{ header: 'Replicas', key: 'replicas', align: 'right', color: theme.info },
|
|
{ header: 'Endpoint', key: 'endpoint', align: 'left', color: theme.dim },
|
|
];
|
|
|
|
logger.logTable(columns, rows);
|
|
logger.log('');
|
|
}
|
|
|
|
const loadedNames = new Set(loadedModels.keys());
|
|
const available = catalogModels.filter((model) => !loadedNames.has(model.id));
|
|
|
|
if (available.length > 0) {
|
|
logger.info(`Available To Deploy (${available.length}):`);
|
|
logger.log('');
|
|
|
|
const rows: Record<string, string | number>[] = available.map((model) => ({
|
|
model: model.id,
|
|
family: model.metadata?.family || theme.dim('N/A'),
|
|
vram: `${model.requirements.minVramGb} GB`,
|
|
capabilities: this.formatCapabilities(model.capabilities),
|
|
}));
|
|
|
|
const columns: ITableColumn[] = [
|
|
{ header: 'Model', key: 'model', align: 'left' },
|
|
{ header: 'Family', key: 'family', align: 'left' },
|
|
{ header: 'Min VRAM', key: 'vram', align: 'right', color: theme.info },
|
|
{ header: 'Capabilities', key: 'capabilities', align: 'left', color: theme.dim },
|
|
];
|
|
|
|
logger.logTable(columns, rows);
|
|
logger.log('');
|
|
}
|
|
}
|
|
|
|
public async pull(modelName: string): Promise<void> {
|
|
if (!modelName) {
|
|
logger.error('Model ID is required');
|
|
return;
|
|
}
|
|
|
|
logger.log('');
|
|
logger.info(`Deploying model: ${modelName}`);
|
|
logger.log('');
|
|
|
|
const result = await this.clusterCoordinator.ensureModelViaControlPlane(modelName);
|
|
|
|
if (result) {
|
|
if (result.created) {
|
|
logger.success(`Model "${result.model}" deployed successfully`);
|
|
} else {
|
|
logger.success(`Model "${result.model}" is already available`);
|
|
}
|
|
logger.dim(`Node: ${result.location.nodeName}`);
|
|
logger.dim(`Endpoint: ${result.location.endpoint}`);
|
|
} else {
|
|
logger.error(`Failed to deploy model: could not schedule ${modelName}`);
|
|
}
|
|
|
|
logger.log('');
|
|
}
|
|
|
|
public async remove(modelName: string): Promise<void> {
|
|
if (!modelName) {
|
|
logger.error('Model ID is required');
|
|
return;
|
|
}
|
|
|
|
logger.info(`Removing deployment for model: ${modelName}`);
|
|
|
|
await this.clusterCoordinator.clearDesiredDeployment(modelName);
|
|
|
|
const success = await this.modelLoader.unloadModel(modelName);
|
|
|
|
if (success) {
|
|
logger.success(`Model "${modelName}" removed`);
|
|
} else {
|
|
logger.error(`Failed to remove model "${modelName}"`);
|
|
}
|
|
}
|
|
|
|
public async status(): Promise<void> {
|
|
logger.log('');
|
|
await this.modelLoader.printStatus();
|
|
}
|
|
|
|
public async refresh(): Promise<void> {
|
|
logger.info('Refreshing model catalog...');
|
|
await this.modelRegistry.fetchCatalog(true);
|
|
logger.success('Model catalog refreshed');
|
|
}
|
|
|
|
private formatCapabilities(capabilities: {
|
|
chat?: boolean;
|
|
completions?: boolean;
|
|
embeddings?: boolean;
|
|
tools?: boolean;
|
|
}): string {
|
|
const enabled = Object.entries(capabilities)
|
|
.filter(([, value]) => value)
|
|
.map(([key]) => key);
|
|
|
|
return enabled.length > 0 ? enabled.join(', ') : theme.dim('none');
|
|
}
|
|
}
|