feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

This commit is contained in:
2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
+62 -138
View File
@@ -1,47 +1,36 @@
/**
* Container Handler
*
* CLI commands for container management.
* Deployment handler for container-backed runtimes.
*/
import { logger } from '../logger.ts';
import { theme } from '../colors.ts';
import { ContainerManager } from '../containers/container-manager.ts';
import { DockerManager } from '../docker/docker-manager.ts';
import type { IContainerConfig } from '../interfaces/container.ts';
import { VllmContainer } from '../containers/vllm.ts';
import type { ITableColumn } from '../logger.ts';
import * as helpers from '../helpers/index.ts';
/**
* Handler for container-related CLI commands
*/
export class ContainerHandler {
private containerManager: ContainerManager;
private dockerManager: DockerManager;
constructor(containerManager: ContainerManager) {
this.containerManager = containerManager;
this.dockerManager = new DockerManager();
}
/**
* List all configured containers
*/
public async list(): Promise<void> {
logger.log('');
logger.info('Containers');
logger.info('Deployments');
logger.log('');
const containers = this.containerManager.getAllContainers();
if (containers.length === 0) {
logger.logBox(
'No Containers',
'No Deployments',
[
'No containers are configured.',
'No vLLM deployments are configured.',
'',
theme.dim('Add a container with:'),
` ${theme.command('modelgrid container add')}`,
theme.dim('Create one with:'),
` ${theme.command('modelgrid run <model-id>')}`,
],
60,
'warning',
@@ -49,7 +38,7 @@ export class ContainerHandler {
return;
}
const rows = [];
const rows: Record<string, string | number>[] = [];
for (const container of containers) {
const status = await container.getStatus();
@@ -57,28 +46,22 @@ export class ContainerHandler {
rows.push({
id: config.id,
name: config.name,
type: this.formatContainerType(container.type),
status: status.running
? theme.success('Running')
: theme.dim('Stopped'),
health: status.running
? this.formatHealth(status.health)
: theme.dim('N/A'),
port: config.externalPort || config.port,
models: status.loadedModels.length,
model: config.models[0] || theme.dim('N/A'),
engine: this.formatContainerType(container.type),
status: status.running ? theme.success('Running') : theme.dim('Stopped'),
health: status.running ? this.formatHealth(status.health) : theme.dim('N/A'),
port: String(config.externalPort || config.port),
gpus: config.gpuIds.length > 0 ? config.gpuIds.join(',') : theme.dim('None'),
});
}
const columns: ITableColumn[] = [
{ header: 'ID', key: 'id', align: 'left' },
{ header: 'Name', key: 'name', align: 'left', color: theme.highlight },
{ header: 'Type', key: 'type', align: 'left' },
{ header: 'Model', key: 'model', align: 'left', color: theme.highlight },
{ header: 'Engine', key: 'engine', align: 'left' },
{ header: 'Status', key: 'status', align: 'left' },
{ header: 'Health', key: 'health', align: 'left' },
{ header: 'Port', key: 'port', align: 'right', color: theme.info },
{ header: 'Models', key: 'models', align: 'right' },
{ header: 'GPUs', key: 'gpus', align: 'left' },
];
@@ -86,94 +69,70 @@ export class ContainerHandler {
logger.log('');
}
/**
* Add a new container interactively
*/
public async add(): Promise<void> {
const { prompt, close, select } = await helpers.createPrompt();
const { prompt, close } = await helpers.createPrompt();
try {
logger.log('');
logger.highlight('Add Container');
logger.dim('Configure a new AI model container');
logger.highlight('Create vLLM Deployment');
logger.dim('Provision a single-model vLLM runtime');
logger.log('');
// Select container type
const typeIndex = await select('Select container type:', [
'Ollama - Easy to use, good for local models',
'vLLM - High performance, OpenAI compatible',
'TGI - HuggingFace Text Generation Inference',
]);
const types = ['ollama', 'vllm', 'tgi'] as const;
const containerType = types[typeIndex];
// Container name
const name = await prompt('Container name: ');
if (!name.trim()) {
logger.error('Container name is required');
const modelName = await prompt('Model ID or Hugging Face repo: ');
if (!modelName.trim()) {
logger.error('Model ID is required');
return;
}
// Generate ID from name
const id = name.toLowerCase().replace(/[^a-z0-9-]/g, '-');
const name = await prompt(
`Deployment name [${modelName.split('/').pop() || 'deployment'}]: `,
);
const deploymentName = name.trim() || modelName.split('/').pop() || 'deployment';
const deploymentId = deploymentName.toLowerCase().replace(/[^a-z0-9-]/g, '-');
// Port
const defaultPorts = { ollama: 11434, vllm: 8000, tgi: 8080 };
const portStr = await prompt(`Port [${defaultPorts[containerType]}]: `);
const port = portStr ? parseInt(portStr, 10) : defaultPorts[containerType];
const portStr = await prompt('Port [8000]: ');
const port = portStr ? parseInt(portStr, 10) : 8000;
// GPU assignment
const gpuStr = await prompt('GPU IDs (comma-separated, or "all", or empty for none): ');
const gpuStr = await prompt('GPU IDs (comma-separated, or "all"): ');
let gpuIds: string[] = [];
if (gpuStr.trim().toLowerCase() === 'all') {
const { GpuDetector } = await import('../hardware/gpu-detector.ts');
const detector = new GpuDetector();
const gpus = await detector.detectGpus();
gpuIds = gpus.map((g) => g.id);
gpuIds = gpus.map((gpu) => gpu.id);
} else if (gpuStr.trim()) {
gpuIds = gpuStr.split(',').map((s) => s.trim());
gpuIds = gpuStr.split(',').map((value) => value.trim());
}
// Build config
const config: IContainerConfig = {
id,
type: containerType,
name,
image: this.getDefaultImage(containerType),
const config = VllmContainer.createConfig(deploymentId, deploymentName, modelName, gpuIds, {
port,
gpuIds,
models: [],
};
});
config.models = [modelName];
// Add container
await this.containerManager.addContainer(config);
this.containerManager.addContainer(config);
logger.log('');
logger.success(`Container "${name}" added successfully`);
logger.success(`Deployment "${deploymentName}" added successfully`);
logger.log('');
logger.dim('Start the container with:');
logger.log(` ${theme.command(`modelgrid container start ${id}`)}`);
logger.dim('Start it with:');
logger.log(` ${theme.command(`modelgrid container start ${deploymentId}`)}`);
logger.log('');
} finally {
close();
}
}
/**
* Remove a container
*/
public async remove(containerId: string): Promise<void> {
if (!containerId) {
logger.error('Container ID is required');
logger.error('Deployment ID is required');
return;
}
const { prompt, close } = await helpers.createPrompt();
try {
const confirm = await prompt(`Remove container "${containerId}"? (y/N): `);
const confirm = await prompt(`Remove deployment "${containerId}"? (y/N): `);
if (confirm.toLowerCase() !== 'y') {
logger.log('Aborted');
@@ -183,83 +142,72 @@ export class ContainerHandler {
const success = await this.containerManager.removeContainer(containerId);
if (success) {
logger.success(`Container "${containerId}" removed`);
logger.success(`Deployment "${containerId}" removed`);
} else {
logger.error(`Failed to remove container "${containerId}"`);
logger.error(`Failed to remove deployment "${containerId}"`);
}
} finally {
close();
}
}
/**
* Start a container
*/
public async start(containerId?: string): Promise<void> {
if (containerId) {
// Start specific container
const container = this.containerManager.getContainer(containerId);
if (!container) {
logger.error(`Container "${containerId}" not found`);
logger.error(`Deployment "${containerId}" not found`);
return;
}
logger.info(`Starting container "${containerId}"...`);
logger.info(`Starting deployment "${containerId}"...`);
const success = await container.start();
if (success) {
logger.success(`Container "${containerId}" started`);
logger.success(`Deployment "${containerId}" started`);
} else {
logger.error(`Failed to start container "${containerId}"`);
logger.error(`Failed to start deployment "${containerId}"`);
}
} else {
// Start all containers
logger.info('Starting all containers...');
await this.containerManager.startAll();
logger.success('All containers started');
return;
}
logger.info('Starting all deployments...');
await this.containerManager.startAll();
logger.success('All deployments started');
}
/**
* Stop a container
*/
public async stop(containerId?: string): Promise<void> {
if (containerId) {
// Stop specific container
const container = this.containerManager.getContainer(containerId);
if (!container) {
logger.error(`Container "${containerId}" not found`);
logger.error(`Deployment "${containerId}" not found`);
return;
}
logger.info(`Stopping container "${containerId}"...`);
logger.info(`Stopping deployment "${containerId}"...`);
const success = await container.stop();
if (success) {
logger.success(`Container "${containerId}" stopped`);
logger.success(`Deployment "${containerId}" stopped`);
} else {
logger.error(`Failed to stop container "${containerId}"`);
logger.error(`Failed to stop deployment "${containerId}"`);
}
} else {
// Stop all containers
logger.info('Stopping all containers...');
await this.containerManager.stopAll();
logger.success('All containers stopped');
return;
}
logger.info('Stopping all deployments...');
await this.containerManager.stopAll();
logger.success('All deployments stopped');
}
/**
* Show container logs
*/
public async logs(containerId: string, lines: number = 100): Promise<void> {
if (!containerId) {
logger.error('Container ID is required');
logger.error('Deployment ID is required');
return;
}
const container = this.containerManager.getContainer(containerId);
if (!container) {
logger.error(`Container "${containerId}" not found`);
logger.error(`Deployment "${containerId}" not found`);
return;
}
@@ -267,13 +215,8 @@ export class ContainerHandler {
console.log(logs);
}
/**
* Format container type for display
*/
private formatContainerType(type: string): string {
switch (type) {
case 'ollama':
return theme.containerOllama('Ollama');
case 'vllm':
return theme.containerVllm('vLLM');
case 'tgi':
@@ -283,9 +226,6 @@ export class ContainerHandler {
}
}
/**
* Format health status
*/
private formatHealth(health: string): string {
switch (health) {
case 'healthy':
@@ -298,20 +238,4 @@ export class ContainerHandler {
return theme.dim(health);
}
}
/**
* Get default image for container type
*/
private getDefaultImage(type: string): string {
switch (type) {
case 'ollama':
return 'ollama/ollama:latest';
case 'vllm':
return 'vllm/vllm-openai:latest';
case 'tgi':
return 'ghcr.io/huggingface/text-generation-inference:latest';
default:
return '';
}
}
}