508 lines
14 KiB
TypeScript
508 lines
14 KiB
TypeScript
/**
|
|
* ModelGrid CLI
|
|
*
|
|
* Command line interface for ModelGrid.
|
|
*/
|
|
|
|
import { ModelGrid } from './modelgrid.ts';
|
|
import { logger } from './logger.ts';
|
|
import { theme } from './colors.ts';
|
|
import { VERSION } from './constants.ts';
|
|
|
|
/**
|
|
* CLI handler for ModelGrid
|
|
*/
|
|
export class ModelGridCli {
|
|
private readonly modelgrid: ModelGrid;
|
|
|
|
constructor() {
|
|
this.modelgrid = new ModelGrid();
|
|
}
|
|
|
|
/**
|
|
* Parse command line arguments and execute the appropriate command
|
|
*/
|
|
public async parseAndExecute(args: string[]): Promise<void> {
|
|
const debugOptions = this.extractDebugOptions(args);
|
|
|
|
// Check for version flag
|
|
if (debugOptions.cleanedArgs.includes('--version') || debugOptions.cleanedArgs.includes('-v')) {
|
|
this.showVersion();
|
|
return;
|
|
}
|
|
|
|
// Get the command (default to help if none provided)
|
|
const command = debugOptions.cleanedArgs[2] || 'help';
|
|
const commandArgs = debugOptions.cleanedArgs.slice(3);
|
|
|
|
await this.executeCommand(command, commandArgs, debugOptions.debugMode);
|
|
}
|
|
|
|
/**
|
|
* Extract debug options from args
|
|
*/
|
|
private extractDebugOptions(args: string[]): { debugMode: boolean; cleanedArgs: string[] } {
|
|
const debugMode = args.includes('--debug') || args.includes('-d');
|
|
const cleanedArgs = args.filter((arg) => arg !== '--debug' && arg !== '-d');
|
|
return { debugMode, cleanedArgs };
|
|
}
|
|
|
|
/**
|
|
* Execute a command
|
|
*/
|
|
private async executeCommand(
|
|
command: string,
|
|
commandArgs: string[],
|
|
debugMode: boolean,
|
|
): Promise<void> {
|
|
const serviceHandler = this.modelgrid.getServiceHandler();
|
|
const gpuHandler = this.modelgrid.getGpuHandler();
|
|
const containerHandler = this.modelgrid.getContainerHandler();
|
|
const clusterHandler = this.modelgrid.getClusterHandler();
|
|
const modelHandler = this.modelgrid.getModelHandler();
|
|
const configHandler = this.modelgrid.getConfigHandler();
|
|
|
|
// Service commands
|
|
if (command === 'service') {
|
|
const subcommand = commandArgs[0] || 'status';
|
|
|
|
switch (subcommand) {
|
|
case 'enable':
|
|
await serviceHandler.enable();
|
|
break;
|
|
case 'disable':
|
|
await serviceHandler.disable();
|
|
break;
|
|
case 'start':
|
|
await serviceHandler.start();
|
|
break;
|
|
case 'stop':
|
|
await serviceHandler.stop();
|
|
break;
|
|
case 'restart':
|
|
await serviceHandler.stop();
|
|
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
await serviceHandler.start();
|
|
break;
|
|
case 'status':
|
|
await serviceHandler.status();
|
|
break;
|
|
case 'logs':
|
|
await serviceHandler.logs();
|
|
break;
|
|
case 'start-daemon':
|
|
await serviceHandler.daemonStart(debugMode);
|
|
break;
|
|
default:
|
|
this.showServiceHelp();
|
|
break;
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (command === 'cluster') {
|
|
const subcommand = commandArgs[0] || 'status';
|
|
const subcommandArgs = commandArgs.slice(1);
|
|
|
|
switch (subcommand) {
|
|
case 'status':
|
|
await clusterHandler.status();
|
|
break;
|
|
case 'nodes':
|
|
await clusterHandler.nodes();
|
|
break;
|
|
case 'models':
|
|
await clusterHandler.models();
|
|
break;
|
|
case 'desired':
|
|
await clusterHandler.desired();
|
|
break;
|
|
case 'ensure':
|
|
await clusterHandler.ensure(subcommandArgs[0]);
|
|
break;
|
|
case 'scale':
|
|
await clusterHandler.scale(subcommandArgs[0], parseInt(subcommandArgs[1] || '', 10));
|
|
break;
|
|
case 'clear':
|
|
await clusterHandler.clear(subcommandArgs[0]);
|
|
break;
|
|
case 'cordon':
|
|
await clusterHandler.cordon(subcommandArgs[0]);
|
|
break;
|
|
case 'uncordon':
|
|
await clusterHandler.uncordon(subcommandArgs[0]);
|
|
break;
|
|
case 'drain':
|
|
await clusterHandler.drain(subcommandArgs[0]);
|
|
break;
|
|
case 'activate':
|
|
await clusterHandler.activate(subcommandArgs[0]);
|
|
break;
|
|
default:
|
|
this.showClusterHelp();
|
|
break;
|
|
}
|
|
return;
|
|
}
|
|
|
|
// GPU commands
|
|
if (command === 'gpu') {
|
|
const subcommand = commandArgs[0] || 'list';
|
|
|
|
switch (subcommand) {
|
|
case 'list':
|
|
case 'ls':
|
|
await gpuHandler.list();
|
|
break;
|
|
case 'status':
|
|
await gpuHandler.status();
|
|
break;
|
|
case 'drivers':
|
|
await gpuHandler.drivers();
|
|
break;
|
|
case 'install':
|
|
await gpuHandler.install();
|
|
break;
|
|
default:
|
|
this.showGpuHelp();
|
|
break;
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Container commands
|
|
if (command === 'container') {
|
|
const subcommand = commandArgs[0] || 'list';
|
|
const subcommandArgs = commandArgs.slice(1);
|
|
|
|
switch (subcommand) {
|
|
case 'list':
|
|
case 'ls':
|
|
await containerHandler.list();
|
|
break;
|
|
case 'add':
|
|
await containerHandler.add();
|
|
break;
|
|
case 'remove':
|
|
case 'rm':
|
|
await containerHandler.remove(subcommandArgs[0]);
|
|
break;
|
|
case 'start':
|
|
await containerHandler.start(subcommandArgs[0]);
|
|
break;
|
|
case 'stop':
|
|
await containerHandler.stop(subcommandArgs[0]);
|
|
break;
|
|
case 'logs':
|
|
await containerHandler.logs(subcommandArgs[0], parseInt(subcommandArgs[1] || '100', 10));
|
|
break;
|
|
default:
|
|
this.showContainerHelp();
|
|
break;
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Model commands
|
|
if (command === 'model') {
|
|
const subcommand = commandArgs[0] || 'list';
|
|
const subcommandArgs = commandArgs.slice(1);
|
|
|
|
switch (subcommand) {
|
|
case 'list':
|
|
case 'ls':
|
|
await modelHandler.list();
|
|
break;
|
|
case 'pull':
|
|
await modelHandler.pull(subcommandArgs[0]);
|
|
break;
|
|
case 'remove':
|
|
case 'rm':
|
|
await modelHandler.remove(subcommandArgs[0]);
|
|
break;
|
|
case 'status':
|
|
await modelHandler.status();
|
|
break;
|
|
case 'refresh':
|
|
await modelHandler.refresh();
|
|
break;
|
|
default:
|
|
this.showModelHelp();
|
|
break;
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Config commands
|
|
if (command === 'config') {
|
|
const subcommand = commandArgs[0] || 'show';
|
|
const subcommandArgs = commandArgs.slice(1);
|
|
|
|
switch (subcommand) {
|
|
case 'show':
|
|
case 'display':
|
|
await configHandler.show();
|
|
break;
|
|
case 'init':
|
|
await configHandler.init();
|
|
break;
|
|
case 'apikey':
|
|
const keySubcommand = subcommandArgs[0] || 'list';
|
|
switch (keySubcommand) {
|
|
case 'add':
|
|
await configHandler.addApiKey(subcommandArgs[1]);
|
|
break;
|
|
case 'remove':
|
|
case 'rm':
|
|
await configHandler.removeApiKey(subcommandArgs[1]);
|
|
break;
|
|
case 'list':
|
|
case 'ls':
|
|
default:
|
|
await configHandler.listApiKeys();
|
|
break;
|
|
}
|
|
break;
|
|
default:
|
|
this.showConfigHelp();
|
|
break;
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Top-level commands
|
|
switch (command) {
|
|
case 'run':
|
|
await modelHandler.pull(commandArgs[0]);
|
|
break;
|
|
case 'ps':
|
|
await containerHandler.list();
|
|
break;
|
|
case 'update':
|
|
await serviceHandler.update();
|
|
break;
|
|
case 'uninstall':
|
|
await serviceHandler.uninstall();
|
|
break;
|
|
case 'help':
|
|
case '--help':
|
|
case '-h':
|
|
this.showHelp();
|
|
break;
|
|
default:
|
|
logger.error(`Unknown command: ${command}`);
|
|
logger.log('');
|
|
this.showHelp();
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Display version information
|
|
*/
|
|
private showVersion(): void {
|
|
logger.log(`ModelGrid version ${VERSION}`);
|
|
logger.log('GPU Infrastructure & AI Model Management (https://modelgrid.com)');
|
|
}
|
|
|
|
/**
|
|
* Display help message
|
|
*/
|
|
private showHelp(): void {
|
|
console.log('');
|
|
logger.highlight('ModelGrid - AI Infrastructure Management');
|
|
logger.dim('GPU detection, container orchestration, and OpenAI-compatible API');
|
|
console.log('');
|
|
|
|
logger.log(theme.info('Usage:'));
|
|
logger.log(` ${theme.command('modelgrid')} ${theme.dim('<command> [options]')}`);
|
|
console.log('');
|
|
|
|
logger.log(theme.info('Commands:'));
|
|
this.printCommand('run <model>', 'Deploy a vLLM model');
|
|
this.printCommand('ps', 'List active deployments');
|
|
this.printCommand('service <subcommand>', 'Manage systemd service');
|
|
this.printCommand('gpu <subcommand>', 'Manage GPU hardware');
|
|
this.printCommand('container <subcommand>', 'Manage deployments directly');
|
|
this.printCommand('model <subcommand>', 'Browse and deploy catalog models');
|
|
this.printCommand('cluster <subcommand>', 'Inspect cluster control plane');
|
|
this.printCommand('config <subcommand>', 'Manage configuration');
|
|
this.printCommand('update', 'Update ModelGrid', theme.dim('(requires root)'));
|
|
this.printCommand('uninstall', 'Remove ModelGrid', theme.dim('(requires root)'));
|
|
this.printCommand('help, --help, -h', 'Show this help message');
|
|
this.printCommand('--version, -v', 'Show version information');
|
|
console.log('');
|
|
|
|
logger.log(theme.info('Quick Start:'));
|
|
logger.dim(' modelgrid gpu list # Detect GPUs');
|
|
logger.dim(' modelgrid model list # Browse catalog');
|
|
logger.dim(' modelgrid run <model> # Deploy a vLLM model');
|
|
logger.dim(' modelgrid ps # List active deployments');
|
|
logger.dim(' modelgrid service enable # Install as service');
|
|
console.log('');
|
|
|
|
logger.log(theme.info('API Usage:'));
|
|
logger.dim(' curl -X POST http://localhost:8080/v1/chat/completions \\');
|
|
logger.dim(' -H "Authorization: Bearer YOUR_API_KEY" \\');
|
|
logger.dim(' -H "Content-Type: application/json" \\');
|
|
logger.dim(
|
|
' -d \'{"model": "llama3", "messages": [{"role": "user", "content": "Hello"}]}\'',
|
|
);
|
|
console.log('');
|
|
}
|
|
|
|
/**
|
|
* Helper to print a command
|
|
*/
|
|
private printCommand(command: string, description: string, extra?: string): void {
|
|
const paddedCommand = command.padEnd(28);
|
|
logger.log(` ${theme.command(paddedCommand)} ${description}${extra ? ' ' + extra : ''}`);
|
|
}
|
|
|
|
/**
|
|
* Display service help
|
|
*/
|
|
private showServiceHelp(): void {
|
|
logger.log(`
|
|
ModelGrid - Service Management Commands
|
|
|
|
Usage:
|
|
modelgrid service <subcommand>
|
|
|
|
Subcommands:
|
|
enable Install and enable the systemd service (requires root)
|
|
disable Stop and disable the systemd service (requires root)
|
|
start Start the systemd service
|
|
stop Stop the systemd service
|
|
restart Restart the systemd service
|
|
status Show service status
|
|
logs Show service logs in real-time
|
|
start-daemon Start the daemon process directly
|
|
|
|
Options:
|
|
--debug, -d Enable debug mode
|
|
`);
|
|
}
|
|
|
|
/**
|
|
* Display GPU help
|
|
*/
|
|
private showGpuHelp(): void {
|
|
logger.log(`
|
|
ModelGrid - GPU Management Commands
|
|
|
|
Usage:
|
|
modelgrid gpu <subcommand>
|
|
|
|
Subcommands:
|
|
list List detected GPUs
|
|
status Show GPU utilization and status
|
|
drivers Check GPU driver status
|
|
install Install GPU drivers (requires root)
|
|
|
|
Examples:
|
|
modelgrid gpu list # Show all detected GPUs
|
|
modelgrid gpu status # Show current GPU utilization
|
|
`);
|
|
}
|
|
|
|
/**
|
|
* Display container help
|
|
*/
|
|
private showContainerHelp(): void {
|
|
logger.log(`
|
|
ModelGrid - Container Management Commands
|
|
|
|
Usage:
|
|
modelgrid container <subcommand> [arguments]
|
|
|
|
Subcommands:
|
|
list List all configured deployments
|
|
add Add a vLLM deployment interactively
|
|
remove <id> Remove a deployment by ID
|
|
start [id] Start a deployment (or all if no ID)
|
|
stop [id] Stop a deployment (or all if no ID)
|
|
logs <id> Show deployment logs
|
|
|
|
Examples:
|
|
modelgrid container add # Add new deployment
|
|
modelgrid container start qwen2 # Start specific deployment
|
|
modelgrid container logs qwen2 # View deployment logs
|
|
`);
|
|
}
|
|
|
|
/**
|
|
* Display model help
|
|
*/
|
|
private showModelHelp(): void {
|
|
logger.log(`
|
|
ModelGrid - Model Management Commands
|
|
|
|
Usage:
|
|
modelgrid model <subcommand> [arguments]
|
|
|
|
Subcommands:
|
|
list List all catalog models
|
|
pull <name> Deploy a model from the registry
|
|
remove <name> Remove a deployed model
|
|
status Show deployment recommendations
|
|
refresh Refresh the model catalog cache
|
|
|
|
Examples:
|
|
modelgrid model list # Show all models
|
|
modelgrid model pull meta-llama/Llama-3.1-8B-Instruct
|
|
modelgrid model status # Show GPU-fit recommendations
|
|
`);
|
|
}
|
|
|
|
private showClusterHelp(): void {
|
|
logger.log(`
|
|
ModelGrid - Cluster Commands
|
|
|
|
Usage:
|
|
modelgrid cluster <subcommand> [arguments]
|
|
|
|
Subcommands:
|
|
status Show cluster status
|
|
nodes List registered nodes
|
|
models List clustered model locations
|
|
desired Show desired deployment targets
|
|
ensure <name> Ask the control plane to schedule a model
|
|
scale <name> <replicas> Set desired replica count
|
|
clear <name> Remove desired deployment target
|
|
cordon <node> Prevent new placements on a node
|
|
uncordon <node> Re-enable placements on a node
|
|
drain <node> Mark a node for evacuation
|
|
activate <node> Mark a node active again
|
|
|
|
Examples:
|
|
modelgrid cluster status
|
|
modelgrid cluster ensure meta-llama/Llama-3.1-8B-Instruct
|
|
modelgrid cluster cordon worker-a
|
|
`);
|
|
}
|
|
|
|
/**
|
|
* Display config help
|
|
*/
|
|
private showConfigHelp(): void {
|
|
logger.log(`
|
|
ModelGrid - Configuration Commands
|
|
|
|
Usage:
|
|
modelgrid config <subcommand> [arguments]
|
|
|
|
Subcommands:
|
|
show Display current configuration
|
|
init Initialize default configuration
|
|
apikey list List configured API keys
|
|
apikey add [key] Add an API key (generates if not provided)
|
|
apikey remove <key> Remove an API key
|
|
|
|
Examples:
|
|
modelgrid config show # Show current config
|
|
modelgrid config apikey add # Generate new API key
|
|
`);
|
|
}
|
|
}
|