Files
modelgrid/ts/cli/container-handler.ts
T

242 lines
7.1 KiB
TypeScript

/**
* Deployment handler for container-backed runtimes.
*/
import { logger } from '../logger.ts';
import { theme } from '../colors.ts';
import { ContainerManager } from '../containers/container-manager.ts';
import { VllmContainer } from '../containers/vllm.ts';
import type { ITableColumn } from '../logger.ts';
import * as helpers from '../helpers/index.ts';
export class ContainerHandler {
private containerManager: ContainerManager;
constructor(containerManager: ContainerManager) {
this.containerManager = containerManager;
}
public async list(): Promise<void> {
logger.log('');
logger.info('Deployments');
logger.log('');
const containers = this.containerManager.getAllContainers();
if (containers.length === 0) {
logger.logBox(
'No Deployments',
[
'No vLLM deployments are configured.',
'',
theme.dim('Create one with:'),
` ${theme.command('modelgrid run <model-id>')}`,
],
60,
'warning',
);
return;
}
const rows: Record<string, string | number>[] = [];
for (const container of containers) {
const status = await container.getStatus();
const config = container.getConfig();
rows.push({
id: config.id,
model: config.models[0] || theme.dim('N/A'),
engine: this.formatContainerType(container.type),
status: status.running ? theme.success('Running') : theme.dim('Stopped'),
health: status.running ? this.formatHealth(status.health) : theme.dim('N/A'),
port: String(config.externalPort || config.port),
gpus: config.gpuIds.length > 0 ? config.gpuIds.join(',') : theme.dim('None'),
});
}
const columns: ITableColumn[] = [
{ header: 'ID', key: 'id', align: 'left' },
{ header: 'Model', key: 'model', align: 'left', color: theme.highlight },
{ header: 'Engine', key: 'engine', align: 'left' },
{ header: 'Status', key: 'status', align: 'left' },
{ header: 'Health', key: 'health', align: 'left' },
{ header: 'Port', key: 'port', align: 'right', color: theme.info },
{ header: 'GPUs', key: 'gpus', align: 'left' },
];
logger.logTable(columns, rows);
logger.log('');
}
public async add(): Promise<void> {
const { prompt, close } = await helpers.createPrompt();
try {
logger.log('');
logger.highlight('Create vLLM Deployment');
logger.dim('Provision a single-model vLLM runtime');
logger.log('');
const modelName = await prompt('Model ID or Hugging Face repo: ');
if (!modelName.trim()) {
logger.error('Model ID is required');
return;
}
const name = await prompt(
`Deployment name [${modelName.split('/').pop() || 'deployment'}]: `,
);
const deploymentName = name.trim() || modelName.split('/').pop() || 'deployment';
const deploymentId = deploymentName.toLowerCase().replace(/[^a-z0-9-]/g, '-');
const portStr = await prompt('Port [8000]: ');
const port = portStr ? parseInt(portStr, 10) : 8000;
const gpuStr = await prompt('GPU IDs (comma-separated, or "all"): ');
let gpuIds: string[] = [];
if (gpuStr.trim().toLowerCase() === 'all') {
const { GpuDetector } = await import('../hardware/gpu-detector.ts');
const detector = new GpuDetector();
const gpus = await detector.detectGpus();
gpuIds = gpus.map((gpu) => gpu.id);
} else if (gpuStr.trim()) {
gpuIds = gpuStr.split(',').map((value) => value.trim());
}
const config = VllmContainer.createConfig(deploymentId, deploymentName, modelName, gpuIds, {
port,
});
config.models = [modelName];
this.containerManager.addContainer(config);
logger.log('');
logger.success(`Deployment "${deploymentName}" added successfully`);
logger.log('');
logger.dim('Start it with:');
logger.log(` ${theme.command(`modelgrid container start ${deploymentId}`)}`);
logger.log('');
} finally {
close();
}
}
public async remove(containerId: string): Promise<void> {
if (!containerId) {
logger.error('Deployment ID is required');
return;
}
const { prompt, close } = await helpers.createPrompt();
try {
const confirm = await prompt(`Remove deployment "${containerId}"? (y/N): `);
if (confirm.toLowerCase() !== 'y') {
logger.log('Aborted');
return;
}
const success = await this.containerManager.removeContainer(containerId);
if (success) {
logger.success(`Deployment "${containerId}" removed`);
} else {
logger.error(`Failed to remove deployment "${containerId}"`);
}
} finally {
close();
}
}
public async start(containerId?: string): Promise<void> {
if (containerId) {
const container = this.containerManager.getContainer(containerId);
if (!container) {
logger.error(`Deployment "${containerId}" not found`);
return;
}
logger.info(`Starting deployment "${containerId}"...`);
const success = await container.start();
if (success) {
logger.success(`Deployment "${containerId}" started`);
} else {
logger.error(`Failed to start deployment "${containerId}"`);
}
return;
}
logger.info('Starting all deployments...');
await this.containerManager.startAll();
logger.success('All deployments started');
}
public async stop(containerId?: string): Promise<void> {
if (containerId) {
const container = this.containerManager.getContainer(containerId);
if (!container) {
logger.error(`Deployment "${containerId}" not found`);
return;
}
logger.info(`Stopping deployment "${containerId}"...`);
const success = await container.stop();
if (success) {
logger.success(`Deployment "${containerId}" stopped`);
} else {
logger.error(`Failed to stop deployment "${containerId}"`);
}
return;
}
logger.info('Stopping all deployments...');
await this.containerManager.stopAll();
logger.success('All deployments stopped');
}
public async logs(containerId: string, lines: number = 100): Promise<void> {
if (!containerId) {
logger.error('Deployment ID is required');
return;
}
const container = this.containerManager.getContainer(containerId);
if (!container) {
logger.error(`Deployment "${containerId}" not found`);
return;
}
const logs = await container.getLogs(lines);
console.log(logs);
}
private formatContainerType(type: string): string {
switch (type) {
case 'vllm':
return theme.containerVllm('vLLM');
case 'tgi':
return theme.containerTgi('TGI');
default:
return type;
}
}
private formatHealth(health: string): string {
switch (health) {
case 'healthy':
return theme.success('Healthy');
case 'unhealthy':
return theme.error('Unhealthy');
case 'starting':
return theme.warning('Starting');
default:
return theme.dim(health);
}
}
}