242 lines
7.1 KiB
TypeScript
242 lines
7.1 KiB
TypeScript
/**
|
|
* Deployment handler for container-backed runtimes.
|
|
*/
|
|
|
|
import { logger } from '../logger.ts';
|
|
import { theme } from '../colors.ts';
|
|
import { ContainerManager } from '../containers/container-manager.ts';
|
|
import { VllmContainer } from '../containers/vllm.ts';
|
|
import type { ITableColumn } from '../logger.ts';
|
|
import * as helpers from '../helpers/index.ts';
|
|
|
|
export class ContainerHandler {
|
|
private containerManager: ContainerManager;
|
|
|
|
constructor(containerManager: ContainerManager) {
|
|
this.containerManager = containerManager;
|
|
}
|
|
|
|
public async list(): Promise<void> {
|
|
logger.log('');
|
|
logger.info('Deployments');
|
|
logger.log('');
|
|
|
|
const containers = this.containerManager.getAllContainers();
|
|
|
|
if (containers.length === 0) {
|
|
logger.logBox(
|
|
'No Deployments',
|
|
[
|
|
'No vLLM deployments are configured.',
|
|
'',
|
|
theme.dim('Create one with:'),
|
|
` ${theme.command('modelgrid run <model-id>')}`,
|
|
],
|
|
60,
|
|
'warning',
|
|
);
|
|
return;
|
|
}
|
|
|
|
const rows: Record<string, string | number>[] = [];
|
|
|
|
for (const container of containers) {
|
|
const status = await container.getStatus();
|
|
const config = container.getConfig();
|
|
|
|
rows.push({
|
|
id: config.id,
|
|
model: config.models[0] || theme.dim('N/A'),
|
|
engine: this.formatContainerType(container.type),
|
|
status: status.running ? theme.success('Running') : theme.dim('Stopped'),
|
|
health: status.running ? this.formatHealth(status.health) : theme.dim('N/A'),
|
|
port: String(config.externalPort || config.port),
|
|
gpus: config.gpuIds.length > 0 ? config.gpuIds.join(',') : theme.dim('None'),
|
|
});
|
|
}
|
|
|
|
const columns: ITableColumn[] = [
|
|
{ header: 'ID', key: 'id', align: 'left' },
|
|
{ header: 'Model', key: 'model', align: 'left', color: theme.highlight },
|
|
{ header: 'Engine', key: 'engine', align: 'left' },
|
|
{ header: 'Status', key: 'status', align: 'left' },
|
|
{ header: 'Health', key: 'health', align: 'left' },
|
|
{ header: 'Port', key: 'port', align: 'right', color: theme.info },
|
|
{ header: 'GPUs', key: 'gpus', align: 'left' },
|
|
];
|
|
|
|
logger.logTable(columns, rows);
|
|
logger.log('');
|
|
}
|
|
|
|
public async add(): Promise<void> {
|
|
const { prompt, close } = await helpers.createPrompt();
|
|
|
|
try {
|
|
logger.log('');
|
|
logger.highlight('Create vLLM Deployment');
|
|
logger.dim('Provision a single-model vLLM runtime');
|
|
logger.log('');
|
|
|
|
const modelName = await prompt('Model ID or Hugging Face repo: ');
|
|
if (!modelName.trim()) {
|
|
logger.error('Model ID is required');
|
|
return;
|
|
}
|
|
|
|
const name = await prompt(
|
|
`Deployment name [${modelName.split('/').pop() || 'deployment'}]: `,
|
|
);
|
|
const deploymentName = name.trim() || modelName.split('/').pop() || 'deployment';
|
|
const deploymentId = deploymentName.toLowerCase().replace(/[^a-z0-9-]/g, '-');
|
|
|
|
const portStr = await prompt('Port [8000]: ');
|
|
const port = portStr ? parseInt(portStr, 10) : 8000;
|
|
|
|
const gpuStr = await prompt('GPU IDs (comma-separated, or "all"): ');
|
|
let gpuIds: string[] = [];
|
|
|
|
if (gpuStr.trim().toLowerCase() === 'all') {
|
|
const { GpuDetector } = await import('../hardware/gpu-detector.ts');
|
|
const detector = new GpuDetector();
|
|
const gpus = await detector.detectGpus();
|
|
gpuIds = gpus.map((gpu) => gpu.id);
|
|
} else if (gpuStr.trim()) {
|
|
gpuIds = gpuStr.split(',').map((value) => value.trim());
|
|
}
|
|
|
|
const config = VllmContainer.createConfig(deploymentId, deploymentName, modelName, gpuIds, {
|
|
port,
|
|
});
|
|
config.models = [modelName];
|
|
|
|
this.containerManager.addContainer(config);
|
|
|
|
logger.log('');
|
|
logger.success(`Deployment "${deploymentName}" added successfully`);
|
|
logger.log('');
|
|
logger.dim('Start it with:');
|
|
logger.log(` ${theme.command(`modelgrid container start ${deploymentId}`)}`);
|
|
logger.log('');
|
|
} finally {
|
|
close();
|
|
}
|
|
}
|
|
|
|
public async remove(containerId: string): Promise<void> {
|
|
if (!containerId) {
|
|
logger.error('Deployment ID is required');
|
|
return;
|
|
}
|
|
|
|
const { prompt, close } = await helpers.createPrompt();
|
|
|
|
try {
|
|
const confirm = await prompt(`Remove deployment "${containerId}"? (y/N): `);
|
|
|
|
if (confirm.toLowerCase() !== 'y') {
|
|
logger.log('Aborted');
|
|
return;
|
|
}
|
|
|
|
const success = await this.containerManager.removeContainer(containerId);
|
|
|
|
if (success) {
|
|
logger.success(`Deployment "${containerId}" removed`);
|
|
} else {
|
|
logger.error(`Failed to remove deployment "${containerId}"`);
|
|
}
|
|
} finally {
|
|
close();
|
|
}
|
|
}
|
|
|
|
public async start(containerId?: string): Promise<void> {
|
|
if (containerId) {
|
|
const container = this.containerManager.getContainer(containerId);
|
|
if (!container) {
|
|
logger.error(`Deployment "${containerId}" not found`);
|
|
return;
|
|
}
|
|
|
|
logger.info(`Starting deployment "${containerId}"...`);
|
|
const success = await container.start();
|
|
|
|
if (success) {
|
|
logger.success(`Deployment "${containerId}" started`);
|
|
} else {
|
|
logger.error(`Failed to start deployment "${containerId}"`);
|
|
}
|
|
return;
|
|
}
|
|
|
|
logger.info('Starting all deployments...');
|
|
await this.containerManager.startAll();
|
|
logger.success('All deployments started');
|
|
}
|
|
|
|
public async stop(containerId?: string): Promise<void> {
|
|
if (containerId) {
|
|
const container = this.containerManager.getContainer(containerId);
|
|
if (!container) {
|
|
logger.error(`Deployment "${containerId}" not found`);
|
|
return;
|
|
}
|
|
|
|
logger.info(`Stopping deployment "${containerId}"...`);
|
|
const success = await container.stop();
|
|
|
|
if (success) {
|
|
logger.success(`Deployment "${containerId}" stopped`);
|
|
} else {
|
|
logger.error(`Failed to stop deployment "${containerId}"`);
|
|
}
|
|
return;
|
|
}
|
|
|
|
logger.info('Stopping all deployments...');
|
|
await this.containerManager.stopAll();
|
|
logger.success('All deployments stopped');
|
|
}
|
|
|
|
public async logs(containerId: string, lines: number = 100): Promise<void> {
|
|
if (!containerId) {
|
|
logger.error('Deployment ID is required');
|
|
return;
|
|
}
|
|
|
|
const container = this.containerManager.getContainer(containerId);
|
|
if (!container) {
|
|
logger.error(`Deployment "${containerId}" not found`);
|
|
return;
|
|
}
|
|
|
|
const logs = await container.getLogs(lines);
|
|
console.log(logs);
|
|
}
|
|
|
|
private formatContainerType(type: string): string {
|
|
switch (type) {
|
|
case 'vllm':
|
|
return theme.containerVllm('vLLM');
|
|
case 'tgi':
|
|
return theme.containerTgi('TGI');
|
|
default:
|
|
return type;
|
|
}
|
|
}
|
|
|
|
private formatHealth(health: string): string {
|
|
switch (health) {
|
|
case 'healthy':
|
|
return theme.success('Healthy');
|
|
case 'unhealthy':
|
|
return theme.error('Unhealthy');
|
|
case 'starting':
|
|
return theme.warning('Starting');
|
|
default:
|
|
return theme.dim(health);
|
|
}
|
|
}
|
|
}
|