feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
+61
-89
@@ -1,55 +1,48 @@
|
||||
/**
|
||||
* Model Handler
|
||||
*
|
||||
* CLI commands for model management.
|
||||
* Model handler for catalog-backed vLLM deployments.
|
||||
*/
|
||||
|
||||
import { logger } from '../logger.ts';
|
||||
import { theme } from '../colors.ts';
|
||||
import { ClusterCoordinator } from '../cluster/coordinator.ts';
|
||||
import { ContainerManager } from '../containers/container-manager.ts';
|
||||
import { ModelRegistry } from '../models/registry.ts';
|
||||
import { ModelLoader } from '../models/loader.ts';
|
||||
import type { ITableColumn } from '../logger.ts';
|
||||
|
||||
/**
|
||||
* Handler for model-related CLI commands
|
||||
*/
|
||||
export class ModelHandler {
|
||||
private containerManager: ContainerManager;
|
||||
private clusterCoordinator: ClusterCoordinator;
|
||||
private modelRegistry: ModelRegistry;
|
||||
private modelLoader: ModelLoader;
|
||||
|
||||
constructor(
|
||||
containerManager: ContainerManager,
|
||||
clusterCoordinator: ClusterCoordinator,
|
||||
modelRegistry: ModelRegistry,
|
||||
) {
|
||||
this.containerManager = containerManager;
|
||||
this.clusterCoordinator = clusterCoordinator;
|
||||
this.modelRegistry = modelRegistry;
|
||||
this.modelLoader = new ModelLoader(modelRegistry, containerManager);
|
||||
}
|
||||
|
||||
/**
|
||||
* List all available models
|
||||
*/
|
||||
public async list(): Promise<void> {
|
||||
logger.log('');
|
||||
logger.info('Models');
|
||||
logger.info('Model Catalog');
|
||||
logger.log('');
|
||||
|
||||
// Get loaded models from containers
|
||||
const loadedModels = await this.containerManager.getAllAvailableModels();
|
||||
const catalogModels = await this.modelRegistry.getAllModels();
|
||||
|
||||
// Get greenlit models
|
||||
const greenlitModels = await this.modelRegistry.getAllGreenlitModels();
|
||||
|
||||
if (loadedModels.size === 0 && greenlitModels.length === 0) {
|
||||
if (loadedModels.size === 0 && catalogModels.length === 0) {
|
||||
logger.logBox(
|
||||
'No Models',
|
||||
[
|
||||
'No models are loaded or greenlit.',
|
||||
'The local registry cache is empty.',
|
||||
'',
|
||||
theme.dim('Pull a model with:'),
|
||||
` ${theme.command('modelgrid model pull <name>')}`,
|
||||
theme.dim('Refresh with:'),
|
||||
` ${theme.command('modelgrid model refresh')}`,
|
||||
],
|
||||
60,
|
||||
'warning',
|
||||
@@ -57,56 +50,51 @@ export class ModelHandler {
|
||||
return;
|
||||
}
|
||||
|
||||
// Show loaded models
|
||||
if (loadedModels.size > 0) {
|
||||
logger.info(`Loaded Models (${loadedModels.size}):`);
|
||||
logger.info(`Running Deployments (${loadedModels.size}):`);
|
||||
logger.log('');
|
||||
|
||||
const rows = [];
|
||||
for (const [name, info] of loadedModels) {
|
||||
const rows: Record<string, string | number>[] = [];
|
||||
for (const [name, endpoints] of loadedModels) {
|
||||
const primaryEndpoint = endpoints[0];
|
||||
rows.push({
|
||||
name,
|
||||
container: info.container,
|
||||
size: info.size ? this.formatSize(info.size) : theme.dim('N/A'),
|
||||
format: info.format || theme.dim('N/A'),
|
||||
modified: info.modifiedAt
|
||||
? new Date(info.modifiedAt).toLocaleDateString()
|
||||
: theme.dim('N/A'),
|
||||
model: name,
|
||||
engine: primaryEndpoint?.type || 'vllm',
|
||||
replicas: String(endpoints.length),
|
||||
endpoint: primaryEndpoint?.url || theme.dim('N/A'),
|
||||
});
|
||||
}
|
||||
|
||||
const columns: ITableColumn[] = [
|
||||
{ header: 'Name', key: 'name', align: 'left', color: theme.highlight },
|
||||
{ header: 'Container', key: 'container', align: 'left' },
|
||||
{ header: 'Size', key: 'size', align: 'right', color: theme.info },
|
||||
{ header: 'Format', key: 'format', align: 'left' },
|
||||
{ header: 'Modified', key: 'modified', align: 'left', color: theme.dim },
|
||||
{ header: 'Model', key: 'model', align: 'left', color: theme.highlight },
|
||||
{ header: 'Engine', key: 'engine', align: 'left' },
|
||||
{ header: 'Replicas', key: 'replicas', align: 'right', color: theme.info },
|
||||
{ header: 'Endpoint', key: 'endpoint', align: 'left', color: theme.dim },
|
||||
];
|
||||
|
||||
logger.logTable(columns, rows);
|
||||
logger.log('');
|
||||
}
|
||||
|
||||
// Show greenlit models (not yet loaded)
|
||||
const loadedNames = new Set(loadedModels.keys());
|
||||
const unloadedGreenlit = greenlitModels.filter((m) => !loadedNames.has(m.name));
|
||||
const available = catalogModels.filter((model) => !loadedNames.has(model.id));
|
||||
|
||||
if (unloadedGreenlit.length > 0) {
|
||||
logger.info(`Available to Pull (${unloadedGreenlit.length}):`);
|
||||
if (available.length > 0) {
|
||||
logger.info(`Available To Deploy (${available.length}):`);
|
||||
logger.log('');
|
||||
|
||||
const rows = unloadedGreenlit.map((m) => ({
|
||||
name: m.name,
|
||||
container: m.container,
|
||||
vram: `${m.minVram} GB`,
|
||||
tags: m.tags?.join(', ') || theme.dim('None'),
|
||||
const rows: Record<string, string | number>[] = available.map((model) => ({
|
||||
model: model.id,
|
||||
family: model.metadata?.family || theme.dim('N/A'),
|
||||
vram: `${model.requirements.minVramGb} GB`,
|
||||
capabilities: this.formatCapabilities(model.capabilities),
|
||||
}));
|
||||
|
||||
const columns: ITableColumn[] = [
|
||||
{ header: 'Name', key: 'name', align: 'left' },
|
||||
{ header: 'Container', key: 'container', align: 'left' },
|
||||
{ header: 'Model', key: 'model', align: 'left' },
|
||||
{ header: 'Family', key: 'family', align: 'left' },
|
||||
{ header: 'Min VRAM', key: 'vram', align: 'right', color: theme.info },
|
||||
{ header: 'Tags', key: 'tags', align: 'left', color: theme.dim },
|
||||
{ header: 'Capabilities', key: 'capabilities', align: 'left', color: theme.dim },
|
||||
];
|
||||
|
||||
logger.logTable(columns, rows);
|
||||
@@ -114,47 +102,42 @@ export class ModelHandler {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull a model
|
||||
*/
|
||||
public async pull(modelName: string): Promise<void> {
|
||||
if (!modelName) {
|
||||
logger.error('Model name is required');
|
||||
logger.error('Model ID is required');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log('');
|
||||
logger.info(`Pulling model: ${modelName}`);
|
||||
logger.info(`Deploying model: ${modelName}`);
|
||||
logger.log('');
|
||||
|
||||
const result = await this.modelLoader.loadModel(modelName);
|
||||
const result = await this.clusterCoordinator.ensureModelViaControlPlane(modelName);
|
||||
|
||||
if (result.success) {
|
||||
if (result.alreadyLoaded) {
|
||||
logger.success(`Model "${modelName}" is already loaded`);
|
||||
if (result) {
|
||||
if (result.created) {
|
||||
logger.success(`Model "${result.model}" deployed successfully`);
|
||||
} else {
|
||||
logger.success(`Model "${modelName}" pulled successfully`);
|
||||
}
|
||||
if (result.container) {
|
||||
logger.dim(`Container: ${result.container}`);
|
||||
logger.success(`Model "${result.model}" is already available`);
|
||||
}
|
||||
logger.dim(`Node: ${result.location.nodeName}`);
|
||||
logger.dim(`Endpoint: ${result.location.endpoint}`);
|
||||
} else {
|
||||
logger.error(`Failed to pull model: ${result.error}`);
|
||||
logger.error(`Failed to deploy model: could not schedule ${modelName}`);
|
||||
}
|
||||
|
||||
logger.log('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a model
|
||||
*/
|
||||
public async remove(modelName: string): Promise<void> {
|
||||
if (!modelName) {
|
||||
logger.error('Model name is required');
|
||||
logger.error('Model ID is required');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`Removing model: ${modelName}`);
|
||||
logger.info(`Removing deployment for model: ${modelName}`);
|
||||
|
||||
await this.clusterCoordinator.clearDesiredDeployment(modelName);
|
||||
|
||||
const success = await this.modelLoader.unloadModel(modelName);
|
||||
|
||||
@@ -165,38 +148,27 @@ export class ModelHandler {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Show model loading status and recommendations
|
||||
*/
|
||||
public async status(): Promise<void> {
|
||||
logger.log('');
|
||||
await this.modelLoader.printStatus();
|
||||
}
|
||||
|
||||
/**
|
||||
* Refresh greenlist cache
|
||||
*/
|
||||
public async refresh(): Promise<void> {
|
||||
logger.info('Refreshing greenlist...');
|
||||
|
||||
await this.modelRegistry.refreshGreenlist();
|
||||
|
||||
logger.success('Greenlist refreshed');
|
||||
logger.info('Refreshing model catalog...');
|
||||
await this.modelRegistry.fetchCatalog(true);
|
||||
logger.success('Model catalog refreshed');
|
||||
}
|
||||
|
||||
/**
|
||||
* Format file size
|
||||
*/
|
||||
private formatSize(bytes: number): string {
|
||||
const units = ['B', 'KB', 'MB', 'GB', 'TB'];
|
||||
let size = bytes;
|
||||
let unitIndex = 0;
|
||||
private formatCapabilities(capabilities: {
|
||||
chat?: boolean;
|
||||
completions?: boolean;
|
||||
embeddings?: boolean;
|
||||
tools?: boolean;
|
||||
}): string {
|
||||
const enabled = Object.entries(capabilities)
|
||||
.filter(([, value]) => value)
|
||||
.map(([key]) => key);
|
||||
|
||||
while (size >= 1024 && unitIndex < units.length - 1) {
|
||||
size /= 1024;
|
||||
unitIndex++;
|
||||
}
|
||||
|
||||
return `${size.toFixed(1)} ${units[unitIndex]}`;
|
||||
return enabled.length > 0 ? enabled.join(', ') : theme.dim('none');
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user