feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
@@ -0,0 +1,192 @@
|
||||
import * as fs from 'node:fs/promises';
|
||||
import { CLUSTER, PATHS } from '../constants.ts';
|
||||
import type { IModelGridConfig } from '../interfaces/config.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
|
||||
export class ClusterHandler {
|
||||
public async status(): Promise<void> {
|
||||
const response = await this.request('/_cluster/status');
|
||||
if (!response) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log(JSON.stringify(response, null, 2));
|
||||
}
|
||||
|
||||
public async nodes(): Promise<void> {
|
||||
const response = await this.request('/_cluster/nodes');
|
||||
if (!response) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log(JSON.stringify(response, null, 2));
|
||||
}
|
||||
|
||||
public async models(): Promise<void> {
|
||||
const response = await this.request('/_cluster/status');
|
||||
if (!response || typeof response !== 'object' || !('models' in response)) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log(JSON.stringify((response as { models: unknown }).models, null, 2));
|
||||
}
|
||||
|
||||
public async desired(): Promise<void> {
|
||||
const response = await this.request('/_cluster/desired');
|
||||
if (!response) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log(JSON.stringify(response, null, 2));
|
||||
}
|
||||
|
||||
public async ensure(model: string): Promise<void> {
|
||||
if (!model) {
|
||||
logger.error('Model ID is required');
|
||||
return;
|
||||
}
|
||||
|
||||
const response = await this.request('/_cluster/models/ensure', {
|
||||
method: 'POST',
|
||||
body: { model },
|
||||
});
|
||||
if (!response) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log(JSON.stringify(response, null, 2));
|
||||
}
|
||||
|
||||
public async scale(model: string, desiredReplicas: number): Promise<void> {
|
||||
if (!model || Number.isNaN(desiredReplicas)) {
|
||||
logger.error('Model ID and desired replica count are required');
|
||||
return;
|
||||
}
|
||||
|
||||
const response = await this.request('/_cluster/models/desired', {
|
||||
method: 'POST',
|
||||
body: { model, desiredReplicas },
|
||||
});
|
||||
if (!response) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log(JSON.stringify(response, null, 2));
|
||||
}
|
||||
|
||||
public async clear(model: string): Promise<void> {
|
||||
if (!model) {
|
||||
logger.error('Model ID is required');
|
||||
return;
|
||||
}
|
||||
|
||||
const response = await this.request('/_cluster/models/desired/remove', {
|
||||
method: 'POST',
|
||||
body: { model },
|
||||
});
|
||||
if (!response) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log(JSON.stringify(response, null, 2));
|
||||
}
|
||||
|
||||
public async cordon(nodeName: string): Promise<void> {
|
||||
await this.updateNodeState('/_cluster/nodes/cordon', nodeName);
|
||||
}
|
||||
|
||||
public async uncordon(nodeName: string): Promise<void> {
|
||||
await this.updateNodeState('/_cluster/nodes/uncordon', nodeName);
|
||||
}
|
||||
|
||||
public async drain(nodeName: string): Promise<void> {
|
||||
await this.updateNodeState('/_cluster/nodes/drain', nodeName);
|
||||
}
|
||||
|
||||
public async activate(nodeName: string): Promise<void> {
|
||||
await this.updateNodeState('/_cluster/nodes/activate', nodeName);
|
||||
}
|
||||
|
||||
private async request(
|
||||
path: string,
|
||||
options: {
|
||||
method?: 'GET' | 'POST';
|
||||
body?: unknown;
|
||||
} = {},
|
||||
): Promise<unknown | null> {
|
||||
const config = await this.readConfig();
|
||||
if (!config) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const endpoint = this.resolveEndpoint(config);
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
};
|
||||
|
||||
if (config.cluster.sharedSecret) {
|
||||
headers[CLUSTER.AUTH_HEADER_NAME] = config.cluster.sharedSecret;
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(`${endpoint}${path}`, {
|
||||
method: options.method || 'GET',
|
||||
headers,
|
||||
body: options.body ? JSON.stringify(options.body) : undefined,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
logger.error(`Cluster request failed: ${response.status} ${await response.text()}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
`Cluster request failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private async readConfig(): Promise<IModelGridConfig | null> {
|
||||
try {
|
||||
return JSON.parse(await fs.readFile(PATHS.CONFIG_FILE, 'utf-8')) as IModelGridConfig;
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
`Failed to read config: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private resolveEndpoint(config: IModelGridConfig): string {
|
||||
if (config.cluster.controlPlaneUrl) {
|
||||
return config.cluster.controlPlaneUrl;
|
||||
}
|
||||
|
||||
if (config.cluster.advertiseUrl) {
|
||||
return config.cluster.advertiseUrl;
|
||||
}
|
||||
|
||||
const host = config.api.host === '0.0.0.0' ? '127.0.0.1' : config.api.host;
|
||||
return `http://${host}:${config.api.port}`;
|
||||
}
|
||||
|
||||
private async updateNodeState(path: string, nodeName: string): Promise<void> {
|
||||
if (!nodeName) {
|
||||
logger.error('Node name is required');
|
||||
return;
|
||||
}
|
||||
|
||||
const response = await this.request(path, {
|
||||
method: 'POST',
|
||||
body: { nodeName },
|
||||
});
|
||||
if (!response) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log(JSON.stringify(response, null, 2));
|
||||
}
|
||||
}
|
||||
+62
-12
@@ -25,6 +25,26 @@ export class ConfigHandler {
|
||||
const configPath = PATHS.CONFIG_FILE;
|
||||
const configContent = await fs.readFile(configPath, 'utf-8');
|
||||
const config = JSON.parse(configContent) as IModelGridConfig;
|
||||
const modelConfig = {
|
||||
registryUrl: config.models.registryUrl ||
|
||||
(config.models as { greenlistUrl?: string }).greenlistUrl ||
|
||||
'https://list.modelgrid.com/catalog/models.json',
|
||||
autoDeploy: config.models.autoDeploy ??
|
||||
(config.models as { autoPull?: boolean }).autoPull ?? true,
|
||||
defaultEngine: config.models.defaultEngine || 'vllm',
|
||||
autoLoad: config.models.autoLoad || [],
|
||||
};
|
||||
const clusterConfig = config.cluster || {
|
||||
enabled: false,
|
||||
nodeName: 'modelgrid-local',
|
||||
role: 'standalone',
|
||||
bindHost: '0.0.0.0',
|
||||
gossipPort: 7946,
|
||||
sharedSecret: undefined,
|
||||
advertiseUrl: undefined,
|
||||
controlPlaneUrl: undefined,
|
||||
heartbeatIntervalMs: 5000,
|
||||
};
|
||||
|
||||
// Overview
|
||||
logger.logBox(
|
||||
@@ -48,9 +68,7 @@ export class ConfigHandler {
|
||||
`Host: ${theme.info(config.api.host)}`,
|
||||
`Port: ${theme.highlight(String(config.api.port))}`,
|
||||
`API Keys: ${config.api.apiKeys.length} configured`,
|
||||
...(config.api.rateLimit
|
||||
? [`Rate Limit: ${config.api.rateLimit} req/min`]
|
||||
: []),
|
||||
...(config.api.rateLimit ? [`Rate Limit: ${config.api.rateLimit} req/min`] : []),
|
||||
'',
|
||||
theme.dim('Endpoint:'),
|
||||
` http://${config.api.host}:${config.api.port}/v1/chat/completions`,
|
||||
@@ -88,12 +106,33 @@ export class ConfigHandler {
|
||||
logger.logBox(
|
||||
'Models',
|
||||
[
|
||||
`Auto Pull: ${config.models.autoPull ? theme.success('Enabled') : theme.dim('Disabled')}`,
|
||||
`Default Container: ${config.models.defaultContainer}`,
|
||||
`Auto Load: ${config.models.autoLoad.length} model(s)`,
|
||||
`Auto Deploy: ${
|
||||
modelConfig.autoDeploy ? theme.success('Enabled') : theme.dim('Disabled')
|
||||
}`,
|
||||
`Default Engine: ${modelConfig.defaultEngine}`,
|
||||
`Auto Load: ${modelConfig.autoLoad.length} model(s)`,
|
||||
'',
|
||||
theme.dim('Greenlist URL:'),
|
||||
` ${config.models.greenlistUrl}`,
|
||||
theme.dim('Registry URL:'),
|
||||
` ${modelConfig.registryUrl}`,
|
||||
],
|
||||
70,
|
||||
'default',
|
||||
);
|
||||
|
||||
logger.log('');
|
||||
logger.logBox(
|
||||
'Cluster',
|
||||
[
|
||||
`Enabled: ${clusterConfig.enabled ? theme.success('Yes') : theme.dim('No')}`,
|
||||
`Node: ${clusterConfig.nodeName}`,
|
||||
`Role: ${clusterConfig.role}`,
|
||||
`Bind Host: ${clusterConfig.bindHost}:${clusterConfig.gossipPort}`,
|
||||
`Shared Secret: ${
|
||||
clusterConfig.sharedSecret ? theme.success('Configured') : theme.dim('Not set')
|
||||
}`,
|
||||
`Advertise URL: ${clusterConfig.advertiseUrl || theme.dim('Default loopback')}`,
|
||||
`Control Plane: ${clusterConfig.controlPlaneUrl || theme.dim('Not configured')}`,
|
||||
`Heartbeat: ${clusterConfig.heartbeatIntervalMs}ms`,
|
||||
],
|
||||
70,
|
||||
'default',
|
||||
@@ -110,7 +149,7 @@ export class ConfigHandler {
|
||||
name: c.name,
|
||||
type: c.type,
|
||||
image: c.image.length > 40 ? c.image.substring(0, 37) + '...' : c.image,
|
||||
port: c.port,
|
||||
port: String(c.port),
|
||||
gpus: c.gpuIds.length > 0 ? c.gpuIds.join(',') : theme.dim('None'),
|
||||
}));
|
||||
|
||||
@@ -189,11 +228,22 @@ export class ConfigHandler {
|
||||
},
|
||||
containers: [],
|
||||
models: {
|
||||
greenlistUrl: 'https://code.foss.global/modelgrid.com/model_lists/raw/branch/main/greenlit.json',
|
||||
autoPull: true,
|
||||
defaultContainer: 'ollama',
|
||||
registryUrl: 'https://list.modelgrid.com/catalog/models.json',
|
||||
autoDeploy: true,
|
||||
defaultEngine: 'vllm',
|
||||
autoLoad: [],
|
||||
},
|
||||
cluster: {
|
||||
enabled: false,
|
||||
nodeName: 'modelgrid-local',
|
||||
role: 'standalone',
|
||||
bindHost: '0.0.0.0',
|
||||
gossipPort: 7946,
|
||||
sharedSecret: '',
|
||||
advertiseUrl: 'http://127.0.0.1:8080',
|
||||
heartbeatIntervalMs: 5000,
|
||||
seedNodes: [],
|
||||
},
|
||||
checkInterval: 30000,
|
||||
};
|
||||
|
||||
|
||||
+62
-138
@@ -1,47 +1,36 @@
|
||||
/**
|
||||
* Container Handler
|
||||
*
|
||||
* CLI commands for container management.
|
||||
* Deployment handler for container-backed runtimes.
|
||||
*/
|
||||
|
||||
import { logger } from '../logger.ts';
|
||||
import { theme } from '../colors.ts';
|
||||
import { ContainerManager } from '../containers/container-manager.ts';
|
||||
import { DockerManager } from '../docker/docker-manager.ts';
|
||||
import type { IContainerConfig } from '../interfaces/container.ts';
|
||||
import { VllmContainer } from '../containers/vllm.ts';
|
||||
import type { ITableColumn } from '../logger.ts';
|
||||
import * as helpers from '../helpers/index.ts';
|
||||
|
||||
/**
|
||||
* Handler for container-related CLI commands
|
||||
*/
|
||||
export class ContainerHandler {
|
||||
private containerManager: ContainerManager;
|
||||
private dockerManager: DockerManager;
|
||||
|
||||
constructor(containerManager: ContainerManager) {
|
||||
this.containerManager = containerManager;
|
||||
this.dockerManager = new DockerManager();
|
||||
}
|
||||
|
||||
/**
|
||||
* List all configured containers
|
||||
*/
|
||||
public async list(): Promise<void> {
|
||||
logger.log('');
|
||||
logger.info('Containers');
|
||||
logger.info('Deployments');
|
||||
logger.log('');
|
||||
|
||||
const containers = this.containerManager.getAllContainers();
|
||||
|
||||
if (containers.length === 0) {
|
||||
logger.logBox(
|
||||
'No Containers',
|
||||
'No Deployments',
|
||||
[
|
||||
'No containers are configured.',
|
||||
'No vLLM deployments are configured.',
|
||||
'',
|
||||
theme.dim('Add a container with:'),
|
||||
` ${theme.command('modelgrid container add')}`,
|
||||
theme.dim('Create one with:'),
|
||||
` ${theme.command('modelgrid run <model-id>')}`,
|
||||
],
|
||||
60,
|
||||
'warning',
|
||||
@@ -49,7 +38,7 @@ export class ContainerHandler {
|
||||
return;
|
||||
}
|
||||
|
||||
const rows = [];
|
||||
const rows: Record<string, string | number>[] = [];
|
||||
|
||||
for (const container of containers) {
|
||||
const status = await container.getStatus();
|
||||
@@ -57,28 +46,22 @@ export class ContainerHandler {
|
||||
|
||||
rows.push({
|
||||
id: config.id,
|
||||
name: config.name,
|
||||
type: this.formatContainerType(container.type),
|
||||
status: status.running
|
||||
? theme.success('Running')
|
||||
: theme.dim('Stopped'),
|
||||
health: status.running
|
||||
? this.formatHealth(status.health)
|
||||
: theme.dim('N/A'),
|
||||
port: config.externalPort || config.port,
|
||||
models: status.loadedModels.length,
|
||||
model: config.models[0] || theme.dim('N/A'),
|
||||
engine: this.formatContainerType(container.type),
|
||||
status: status.running ? theme.success('Running') : theme.dim('Stopped'),
|
||||
health: status.running ? this.formatHealth(status.health) : theme.dim('N/A'),
|
||||
port: String(config.externalPort || config.port),
|
||||
gpus: config.gpuIds.length > 0 ? config.gpuIds.join(',') : theme.dim('None'),
|
||||
});
|
||||
}
|
||||
|
||||
const columns: ITableColumn[] = [
|
||||
{ header: 'ID', key: 'id', align: 'left' },
|
||||
{ header: 'Name', key: 'name', align: 'left', color: theme.highlight },
|
||||
{ header: 'Type', key: 'type', align: 'left' },
|
||||
{ header: 'Model', key: 'model', align: 'left', color: theme.highlight },
|
||||
{ header: 'Engine', key: 'engine', align: 'left' },
|
||||
{ header: 'Status', key: 'status', align: 'left' },
|
||||
{ header: 'Health', key: 'health', align: 'left' },
|
||||
{ header: 'Port', key: 'port', align: 'right', color: theme.info },
|
||||
{ header: 'Models', key: 'models', align: 'right' },
|
||||
{ header: 'GPUs', key: 'gpus', align: 'left' },
|
||||
];
|
||||
|
||||
@@ -86,94 +69,70 @@ export class ContainerHandler {
|
||||
logger.log('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new container interactively
|
||||
*/
|
||||
public async add(): Promise<void> {
|
||||
const { prompt, close, select } = await helpers.createPrompt();
|
||||
const { prompt, close } = await helpers.createPrompt();
|
||||
|
||||
try {
|
||||
logger.log('');
|
||||
logger.highlight('Add Container');
|
||||
logger.dim('Configure a new AI model container');
|
||||
logger.highlight('Create vLLM Deployment');
|
||||
logger.dim('Provision a single-model vLLM runtime');
|
||||
logger.log('');
|
||||
|
||||
// Select container type
|
||||
const typeIndex = await select('Select container type:', [
|
||||
'Ollama - Easy to use, good for local models',
|
||||
'vLLM - High performance, OpenAI compatible',
|
||||
'TGI - HuggingFace Text Generation Inference',
|
||||
]);
|
||||
|
||||
const types = ['ollama', 'vllm', 'tgi'] as const;
|
||||
const containerType = types[typeIndex];
|
||||
|
||||
// Container name
|
||||
const name = await prompt('Container name: ');
|
||||
if (!name.trim()) {
|
||||
logger.error('Container name is required');
|
||||
const modelName = await prompt('Model ID or Hugging Face repo: ');
|
||||
if (!modelName.trim()) {
|
||||
logger.error('Model ID is required');
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate ID from name
|
||||
const id = name.toLowerCase().replace(/[^a-z0-9-]/g, '-');
|
||||
const name = await prompt(
|
||||
`Deployment name [${modelName.split('/').pop() || 'deployment'}]: `,
|
||||
);
|
||||
const deploymentName = name.trim() || modelName.split('/').pop() || 'deployment';
|
||||
const deploymentId = deploymentName.toLowerCase().replace(/[^a-z0-9-]/g, '-');
|
||||
|
||||
// Port
|
||||
const defaultPorts = { ollama: 11434, vllm: 8000, tgi: 8080 };
|
||||
const portStr = await prompt(`Port [${defaultPorts[containerType]}]: `);
|
||||
const port = portStr ? parseInt(portStr, 10) : defaultPorts[containerType];
|
||||
const portStr = await prompt('Port [8000]: ');
|
||||
const port = portStr ? parseInt(portStr, 10) : 8000;
|
||||
|
||||
// GPU assignment
|
||||
const gpuStr = await prompt('GPU IDs (comma-separated, or "all", or empty for none): ');
|
||||
const gpuStr = await prompt('GPU IDs (comma-separated, or "all"): ');
|
||||
let gpuIds: string[] = [];
|
||||
|
||||
if (gpuStr.trim().toLowerCase() === 'all') {
|
||||
const { GpuDetector } = await import('../hardware/gpu-detector.ts');
|
||||
const detector = new GpuDetector();
|
||||
const gpus = await detector.detectGpus();
|
||||
gpuIds = gpus.map((g) => g.id);
|
||||
gpuIds = gpus.map((gpu) => gpu.id);
|
||||
} else if (gpuStr.trim()) {
|
||||
gpuIds = gpuStr.split(',').map((s) => s.trim());
|
||||
gpuIds = gpuStr.split(',').map((value) => value.trim());
|
||||
}
|
||||
|
||||
// Build config
|
||||
const config: IContainerConfig = {
|
||||
id,
|
||||
type: containerType,
|
||||
name,
|
||||
image: this.getDefaultImage(containerType),
|
||||
const config = VllmContainer.createConfig(deploymentId, deploymentName, modelName, gpuIds, {
|
||||
port,
|
||||
gpuIds,
|
||||
models: [],
|
||||
};
|
||||
});
|
||||
config.models = [modelName];
|
||||
|
||||
// Add container
|
||||
await this.containerManager.addContainer(config);
|
||||
this.containerManager.addContainer(config);
|
||||
|
||||
logger.log('');
|
||||
logger.success(`Container "${name}" added successfully`);
|
||||
logger.success(`Deployment "${deploymentName}" added successfully`);
|
||||
logger.log('');
|
||||
logger.dim('Start the container with:');
|
||||
logger.log(` ${theme.command(`modelgrid container start ${id}`)}`);
|
||||
logger.dim('Start it with:');
|
||||
logger.log(` ${theme.command(`modelgrid container start ${deploymentId}`)}`);
|
||||
logger.log('');
|
||||
} finally {
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a container
|
||||
*/
|
||||
public async remove(containerId: string): Promise<void> {
|
||||
if (!containerId) {
|
||||
logger.error('Container ID is required');
|
||||
logger.error('Deployment ID is required');
|
||||
return;
|
||||
}
|
||||
|
||||
const { prompt, close } = await helpers.createPrompt();
|
||||
|
||||
try {
|
||||
const confirm = await prompt(`Remove container "${containerId}"? (y/N): `);
|
||||
const confirm = await prompt(`Remove deployment "${containerId}"? (y/N): `);
|
||||
|
||||
if (confirm.toLowerCase() !== 'y') {
|
||||
logger.log('Aborted');
|
||||
@@ -183,83 +142,72 @@ export class ContainerHandler {
|
||||
const success = await this.containerManager.removeContainer(containerId);
|
||||
|
||||
if (success) {
|
||||
logger.success(`Container "${containerId}" removed`);
|
||||
logger.success(`Deployment "${containerId}" removed`);
|
||||
} else {
|
||||
logger.error(`Failed to remove container "${containerId}"`);
|
||||
logger.error(`Failed to remove deployment "${containerId}"`);
|
||||
}
|
||||
} finally {
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a container
|
||||
*/
|
||||
public async start(containerId?: string): Promise<void> {
|
||||
if (containerId) {
|
||||
// Start specific container
|
||||
const container = this.containerManager.getContainer(containerId);
|
||||
if (!container) {
|
||||
logger.error(`Container "${containerId}" not found`);
|
||||
logger.error(`Deployment "${containerId}" not found`);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`Starting container "${containerId}"...`);
|
||||
logger.info(`Starting deployment "${containerId}"...`);
|
||||
const success = await container.start();
|
||||
|
||||
if (success) {
|
||||
logger.success(`Container "${containerId}" started`);
|
||||
logger.success(`Deployment "${containerId}" started`);
|
||||
} else {
|
||||
logger.error(`Failed to start container "${containerId}"`);
|
||||
logger.error(`Failed to start deployment "${containerId}"`);
|
||||
}
|
||||
} else {
|
||||
// Start all containers
|
||||
logger.info('Starting all containers...');
|
||||
await this.containerManager.startAll();
|
||||
logger.success('All containers started');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info('Starting all deployments...');
|
||||
await this.containerManager.startAll();
|
||||
logger.success('All deployments started');
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop a container
|
||||
*/
|
||||
public async stop(containerId?: string): Promise<void> {
|
||||
if (containerId) {
|
||||
// Stop specific container
|
||||
const container = this.containerManager.getContainer(containerId);
|
||||
if (!container) {
|
||||
logger.error(`Container "${containerId}" not found`);
|
||||
logger.error(`Deployment "${containerId}" not found`);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`Stopping container "${containerId}"...`);
|
||||
logger.info(`Stopping deployment "${containerId}"...`);
|
||||
const success = await container.stop();
|
||||
|
||||
if (success) {
|
||||
logger.success(`Container "${containerId}" stopped`);
|
||||
logger.success(`Deployment "${containerId}" stopped`);
|
||||
} else {
|
||||
logger.error(`Failed to stop container "${containerId}"`);
|
||||
logger.error(`Failed to stop deployment "${containerId}"`);
|
||||
}
|
||||
} else {
|
||||
// Stop all containers
|
||||
logger.info('Stopping all containers...');
|
||||
await this.containerManager.stopAll();
|
||||
logger.success('All containers stopped');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info('Stopping all deployments...');
|
||||
await this.containerManager.stopAll();
|
||||
logger.success('All deployments stopped');
|
||||
}
|
||||
|
||||
/**
|
||||
* Show container logs
|
||||
*/
|
||||
public async logs(containerId: string, lines: number = 100): Promise<void> {
|
||||
if (!containerId) {
|
||||
logger.error('Container ID is required');
|
||||
logger.error('Deployment ID is required');
|
||||
return;
|
||||
}
|
||||
|
||||
const container = this.containerManager.getContainer(containerId);
|
||||
if (!container) {
|
||||
logger.error(`Container "${containerId}" not found`);
|
||||
logger.error(`Deployment "${containerId}" not found`);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -267,13 +215,8 @@ export class ContainerHandler {
|
||||
console.log(logs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Format container type for display
|
||||
*/
|
||||
private formatContainerType(type: string): string {
|
||||
switch (type) {
|
||||
case 'ollama':
|
||||
return theme.containerOllama('Ollama');
|
||||
case 'vllm':
|
||||
return theme.containerVllm('vLLM');
|
||||
case 'tgi':
|
||||
@@ -283,9 +226,6 @@ export class ContainerHandler {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Format health status
|
||||
*/
|
||||
private formatHealth(health: string): string {
|
||||
switch (health) {
|
||||
case 'healthy':
|
||||
@@ -298,20 +238,4 @@ export class ContainerHandler {
|
||||
return theme.dim(health);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get default image for container type
|
||||
*/
|
||||
private getDefaultImage(type: string): string {
|
||||
switch (type) {
|
||||
case 'ollama':
|
||||
return 'ollama/ollama:latest';
|
||||
case 'vllm':
|
||||
return 'vllm/vllm-openai:latest';
|
||||
case 'tgi':
|
||||
return 'ghcr.io/huggingface/text-generation-inference:latest';
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+38
-16
@@ -86,22 +86,30 @@ export class GpuHandler {
|
||||
logger.info('GPU Status');
|
||||
logger.log('');
|
||||
|
||||
const gpuStatus = await this.gpuDetector.getGpuStatus();
|
||||
const gpuInfo = await this.gpuDetector.detectGpus();
|
||||
const gpuStatus = await this.gpuDetector.getAllGpuStatus();
|
||||
|
||||
if (gpuStatus.length === 0) {
|
||||
if (gpuStatus.size === 0) {
|
||||
logger.warn('No GPUs detected');
|
||||
return;
|
||||
}
|
||||
|
||||
for (const gpu of gpuStatus) {
|
||||
const utilizationBar = this.createProgressBar(gpu.utilization, 30);
|
||||
const memoryBar = this.createProgressBar(gpu.memoryUsed / gpu.memoryTotal * 100, 30);
|
||||
for (const [gpuId, status] of gpuStatus) {
|
||||
const info = gpuInfo.find((gpu) => gpu.id === gpuId);
|
||||
const utilizationBar = this.createProgressBar(status.utilization, 30);
|
||||
const memoryBar = this.createProgressBar(status.memoryUsed / status.memoryTotal * 100, 30);
|
||||
|
||||
logger.logBoxTitle(`GPU ${gpu.id}: ${gpu.name}`, 70, 'info');
|
||||
logger.logBoxLine(`Utilization: ${utilizationBar} ${gpu.utilization.toFixed(1)}%`);
|
||||
logger.logBoxLine(`Memory: ${memoryBar} ${Math.round(gpu.memoryUsed)}/${Math.round(gpu.memoryTotal)} MB`);
|
||||
logger.logBoxLine(`Temperature: ${this.formatTemperature(gpu.temperature)}`);
|
||||
logger.logBoxLine(`Power: ${gpu.powerDraw.toFixed(0)}W / ${gpu.powerLimit.toFixed(0)}W`);
|
||||
logger.logBoxTitle(`GPU ${status.id}: ${info?.model || 'Unknown GPU'}`, 70, 'info');
|
||||
logger.logBoxLine(`Utilization: ${utilizationBar} ${status.utilization.toFixed(1)}%`);
|
||||
logger.logBoxLine(
|
||||
`Memory: ${memoryBar} ${Math.round(status.memoryUsed)}/${
|
||||
Math.round(status.memoryTotal)
|
||||
} MB`,
|
||||
);
|
||||
logger.logBoxLine(`Temperature: ${this.formatTemperature(status.temperature)}`);
|
||||
logger.logBoxLine(
|
||||
`Power: ${status.powerUsage.toFixed(0)}W / ${status.powerLimit.toFixed(0)}W`,
|
||||
);
|
||||
logger.logBoxEnd();
|
||||
logger.log('');
|
||||
}
|
||||
@@ -138,13 +146,23 @@ export class GpuHandler {
|
||||
|
||||
const status = await driver.getStatus();
|
||||
|
||||
logger.logBoxTitle(`${this.formatVendor(vendor)} Driver`, 60, status.installed ? 'success' : 'warning');
|
||||
logger.logBoxLine(`Installed: ${status.installed ? theme.success('Yes') : theme.error('No')}`);
|
||||
logger.logBoxTitle(
|
||||
`${this.formatVendor(vendor)} Driver`,
|
||||
60,
|
||||
status.installed ? 'success' : 'warning',
|
||||
);
|
||||
logger.logBoxLine(
|
||||
`Installed: ${status.installed ? theme.success('Yes') : theme.error('No')}`,
|
||||
);
|
||||
|
||||
if (status.installed) {
|
||||
logger.logBoxLine(`Version: ${status.version || 'Unknown'}`);
|
||||
logger.logBoxLine(`Runtime: ${status.runtimeVersion || 'Unknown'}`);
|
||||
logger.logBoxLine(`Container Support: ${status.containerSupport ? theme.success('Yes') : theme.warning('No')}`);
|
||||
logger.logBoxLine(`Runtime: ${status.containerRuntimeVersion || 'Unknown'}`);
|
||||
logger.logBoxLine(
|
||||
`Container Support: ${
|
||||
status.containerSupport ? theme.success('Yes') : theme.warning('No')
|
||||
}`,
|
||||
);
|
||||
} else {
|
||||
logger.logBoxLine('');
|
||||
logger.logBoxLine(theme.dim('Run `modelgrid gpu install` to install drivers'));
|
||||
@@ -183,14 +201,18 @@ export class GpuHandler {
|
||||
|
||||
logger.info(`Installing ${this.formatVendor(vendor)} drivers...`);
|
||||
|
||||
const success = await driver.install();
|
||||
const success = await driver.install({
|
||||
installToolkit: true,
|
||||
installContainerSupport: true,
|
||||
nonInteractive: false,
|
||||
});
|
||||
|
||||
if (success) {
|
||||
logger.success(`${this.formatVendor(vendor)} drivers installed successfully`);
|
||||
|
||||
// Setup container support
|
||||
logger.info('Setting up container support...');
|
||||
const containerSuccess = await driver.setupContainer();
|
||||
const containerSuccess = await driver.installContainerSupport();
|
||||
|
||||
if (containerSuccess) {
|
||||
logger.success('Container support configured');
|
||||
|
||||
+61
-89
@@ -1,55 +1,48 @@
|
||||
/**
|
||||
* Model Handler
|
||||
*
|
||||
* CLI commands for model management.
|
||||
* Model handler for catalog-backed vLLM deployments.
|
||||
*/
|
||||
|
||||
import { logger } from '../logger.ts';
|
||||
import { theme } from '../colors.ts';
|
||||
import { ClusterCoordinator } from '../cluster/coordinator.ts';
|
||||
import { ContainerManager } from '../containers/container-manager.ts';
|
||||
import { ModelRegistry } from '../models/registry.ts';
|
||||
import { ModelLoader } from '../models/loader.ts';
|
||||
import type { ITableColumn } from '../logger.ts';
|
||||
|
||||
/**
|
||||
* Handler for model-related CLI commands
|
||||
*/
|
||||
export class ModelHandler {
|
||||
private containerManager: ContainerManager;
|
||||
private clusterCoordinator: ClusterCoordinator;
|
||||
private modelRegistry: ModelRegistry;
|
||||
private modelLoader: ModelLoader;
|
||||
|
||||
constructor(
|
||||
containerManager: ContainerManager,
|
||||
clusterCoordinator: ClusterCoordinator,
|
||||
modelRegistry: ModelRegistry,
|
||||
) {
|
||||
this.containerManager = containerManager;
|
||||
this.clusterCoordinator = clusterCoordinator;
|
||||
this.modelRegistry = modelRegistry;
|
||||
this.modelLoader = new ModelLoader(modelRegistry, containerManager);
|
||||
}
|
||||
|
||||
/**
|
||||
* List all available models
|
||||
*/
|
||||
public async list(): Promise<void> {
|
||||
logger.log('');
|
||||
logger.info('Models');
|
||||
logger.info('Model Catalog');
|
||||
logger.log('');
|
||||
|
||||
// Get loaded models from containers
|
||||
const loadedModels = await this.containerManager.getAllAvailableModels();
|
||||
const catalogModels = await this.modelRegistry.getAllModels();
|
||||
|
||||
// Get greenlit models
|
||||
const greenlitModels = await this.modelRegistry.getAllGreenlitModels();
|
||||
|
||||
if (loadedModels.size === 0 && greenlitModels.length === 0) {
|
||||
if (loadedModels.size === 0 && catalogModels.length === 0) {
|
||||
logger.logBox(
|
||||
'No Models',
|
||||
[
|
||||
'No models are loaded or greenlit.',
|
||||
'The local registry cache is empty.',
|
||||
'',
|
||||
theme.dim('Pull a model with:'),
|
||||
` ${theme.command('modelgrid model pull <name>')}`,
|
||||
theme.dim('Refresh with:'),
|
||||
` ${theme.command('modelgrid model refresh')}`,
|
||||
],
|
||||
60,
|
||||
'warning',
|
||||
@@ -57,56 +50,51 @@ export class ModelHandler {
|
||||
return;
|
||||
}
|
||||
|
||||
// Show loaded models
|
||||
if (loadedModels.size > 0) {
|
||||
logger.info(`Loaded Models (${loadedModels.size}):`);
|
||||
logger.info(`Running Deployments (${loadedModels.size}):`);
|
||||
logger.log('');
|
||||
|
||||
const rows = [];
|
||||
for (const [name, info] of loadedModels) {
|
||||
const rows: Record<string, string | number>[] = [];
|
||||
for (const [name, endpoints] of loadedModels) {
|
||||
const primaryEndpoint = endpoints[0];
|
||||
rows.push({
|
||||
name,
|
||||
container: info.container,
|
||||
size: info.size ? this.formatSize(info.size) : theme.dim('N/A'),
|
||||
format: info.format || theme.dim('N/A'),
|
||||
modified: info.modifiedAt
|
||||
? new Date(info.modifiedAt).toLocaleDateString()
|
||||
: theme.dim('N/A'),
|
||||
model: name,
|
||||
engine: primaryEndpoint?.type || 'vllm',
|
||||
replicas: String(endpoints.length),
|
||||
endpoint: primaryEndpoint?.url || theme.dim('N/A'),
|
||||
});
|
||||
}
|
||||
|
||||
const columns: ITableColumn[] = [
|
||||
{ header: 'Name', key: 'name', align: 'left', color: theme.highlight },
|
||||
{ header: 'Container', key: 'container', align: 'left' },
|
||||
{ header: 'Size', key: 'size', align: 'right', color: theme.info },
|
||||
{ header: 'Format', key: 'format', align: 'left' },
|
||||
{ header: 'Modified', key: 'modified', align: 'left', color: theme.dim },
|
||||
{ header: 'Model', key: 'model', align: 'left', color: theme.highlight },
|
||||
{ header: 'Engine', key: 'engine', align: 'left' },
|
||||
{ header: 'Replicas', key: 'replicas', align: 'right', color: theme.info },
|
||||
{ header: 'Endpoint', key: 'endpoint', align: 'left', color: theme.dim },
|
||||
];
|
||||
|
||||
logger.logTable(columns, rows);
|
||||
logger.log('');
|
||||
}
|
||||
|
||||
// Show greenlit models (not yet loaded)
|
||||
const loadedNames = new Set(loadedModels.keys());
|
||||
const unloadedGreenlit = greenlitModels.filter((m) => !loadedNames.has(m.name));
|
||||
const available = catalogModels.filter((model) => !loadedNames.has(model.id));
|
||||
|
||||
if (unloadedGreenlit.length > 0) {
|
||||
logger.info(`Available to Pull (${unloadedGreenlit.length}):`);
|
||||
if (available.length > 0) {
|
||||
logger.info(`Available To Deploy (${available.length}):`);
|
||||
logger.log('');
|
||||
|
||||
const rows = unloadedGreenlit.map((m) => ({
|
||||
name: m.name,
|
||||
container: m.container,
|
||||
vram: `${m.minVram} GB`,
|
||||
tags: m.tags?.join(', ') || theme.dim('None'),
|
||||
const rows: Record<string, string | number>[] = available.map((model) => ({
|
||||
model: model.id,
|
||||
family: model.metadata?.family || theme.dim('N/A'),
|
||||
vram: `${model.requirements.minVramGb} GB`,
|
||||
capabilities: this.formatCapabilities(model.capabilities),
|
||||
}));
|
||||
|
||||
const columns: ITableColumn[] = [
|
||||
{ header: 'Name', key: 'name', align: 'left' },
|
||||
{ header: 'Container', key: 'container', align: 'left' },
|
||||
{ header: 'Model', key: 'model', align: 'left' },
|
||||
{ header: 'Family', key: 'family', align: 'left' },
|
||||
{ header: 'Min VRAM', key: 'vram', align: 'right', color: theme.info },
|
||||
{ header: 'Tags', key: 'tags', align: 'left', color: theme.dim },
|
||||
{ header: 'Capabilities', key: 'capabilities', align: 'left', color: theme.dim },
|
||||
];
|
||||
|
||||
logger.logTable(columns, rows);
|
||||
@@ -114,47 +102,42 @@ export class ModelHandler {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull a model
|
||||
*/
|
||||
public async pull(modelName: string): Promise<void> {
|
||||
if (!modelName) {
|
||||
logger.error('Model name is required');
|
||||
logger.error('Model ID is required');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log('');
|
||||
logger.info(`Pulling model: ${modelName}`);
|
||||
logger.info(`Deploying model: ${modelName}`);
|
||||
logger.log('');
|
||||
|
||||
const result = await this.modelLoader.loadModel(modelName);
|
||||
const result = await this.clusterCoordinator.ensureModelViaControlPlane(modelName);
|
||||
|
||||
if (result.success) {
|
||||
if (result.alreadyLoaded) {
|
||||
logger.success(`Model "${modelName}" is already loaded`);
|
||||
if (result) {
|
||||
if (result.created) {
|
||||
logger.success(`Model "${result.model}" deployed successfully`);
|
||||
} else {
|
||||
logger.success(`Model "${modelName}" pulled successfully`);
|
||||
}
|
||||
if (result.container) {
|
||||
logger.dim(`Container: ${result.container}`);
|
||||
logger.success(`Model "${result.model}" is already available`);
|
||||
}
|
||||
logger.dim(`Node: ${result.location.nodeName}`);
|
||||
logger.dim(`Endpoint: ${result.location.endpoint}`);
|
||||
} else {
|
||||
logger.error(`Failed to pull model: ${result.error}`);
|
||||
logger.error(`Failed to deploy model: could not schedule ${modelName}`);
|
||||
}
|
||||
|
||||
logger.log('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a model
|
||||
*/
|
||||
public async remove(modelName: string): Promise<void> {
|
||||
if (!modelName) {
|
||||
logger.error('Model name is required');
|
||||
logger.error('Model ID is required');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`Removing model: ${modelName}`);
|
||||
logger.info(`Removing deployment for model: ${modelName}`);
|
||||
|
||||
await this.clusterCoordinator.clearDesiredDeployment(modelName);
|
||||
|
||||
const success = await this.modelLoader.unloadModel(modelName);
|
||||
|
||||
@@ -165,38 +148,27 @@ export class ModelHandler {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Show model loading status and recommendations
|
||||
*/
|
||||
public async status(): Promise<void> {
|
||||
logger.log('');
|
||||
await this.modelLoader.printStatus();
|
||||
}
|
||||
|
||||
/**
|
||||
* Refresh greenlist cache
|
||||
*/
|
||||
public async refresh(): Promise<void> {
|
||||
logger.info('Refreshing greenlist...');
|
||||
|
||||
await this.modelRegistry.refreshGreenlist();
|
||||
|
||||
logger.success('Greenlist refreshed');
|
||||
logger.info('Refreshing model catalog...');
|
||||
await this.modelRegistry.fetchCatalog(true);
|
||||
logger.success('Model catalog refreshed');
|
||||
}
|
||||
|
||||
/**
|
||||
* Format file size
|
||||
*/
|
||||
private formatSize(bytes: number): string {
|
||||
const units = ['B', 'KB', 'MB', 'GB', 'TB'];
|
||||
let size = bytes;
|
||||
let unitIndex = 0;
|
||||
private formatCapabilities(capabilities: {
|
||||
chat?: boolean;
|
||||
completions?: boolean;
|
||||
embeddings?: boolean;
|
||||
tools?: boolean;
|
||||
}): string {
|
||||
const enabled = Object.entries(capabilities)
|
||||
.filter(([, value]) => value)
|
||||
.map(([key]) => key);
|
||||
|
||||
while (size >= 1024 && unitIndex < units.length - 1) {
|
||||
size /= 1024;
|
||||
unitIndex++;
|
||||
}
|
||||
|
||||
return `${size.toFixed(1)} ${units[unitIndex]}`;
|
||||
return enabled.length > 0 ? enabled.join(', ') : theme.dim('none');
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,7 +27,9 @@ export class ServiceHandler {
|
||||
public async enable(): Promise<void> {
|
||||
this.checkRootAccess('This command must be run as root.');
|
||||
await this.modelgrid.getSystemd().install();
|
||||
logger.log('ModelGrid service has been installed. Use "modelgrid service start" to start the service.');
|
||||
logger.log(
|
||||
'ModelGrid service has been installed. Use "modelgrid service start" to start the service.',
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -41,7 +43,9 @@ export class ServiceHandler {
|
||||
}
|
||||
await this.modelgrid.getDaemon().start();
|
||||
} catch (error) {
|
||||
logger.error(`Daemon start failed: ${error instanceof Error ? error.message : String(error)}`);
|
||||
logger.error(
|
||||
`Daemon start failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
@@ -127,13 +131,18 @@ export class ServiceHandler {
|
||||
|
||||
try {
|
||||
const currentVersion = this.modelgrid.getVersion();
|
||||
const apiUrl = 'https://code.foss.global/api/v1/repos/modelgrid.com/modelgrid/releases/latest';
|
||||
const apiUrl =
|
||||
'https://code.foss.global/api/v1/repos/modelgrid.com/modelgrid/releases/latest';
|
||||
const response = execSync(`curl -sSL ${apiUrl}`).toString();
|
||||
const release = JSON.parse(response);
|
||||
const latestVersion = release.tag_name;
|
||||
|
||||
const normalizedCurrent = currentVersion.startsWith('v') ? currentVersion : `v${currentVersion}`;
|
||||
const normalizedLatest = latestVersion.startsWith('v') ? latestVersion : `v${latestVersion}`;
|
||||
const normalizedCurrent = currentVersion.startsWith('v')
|
||||
? currentVersion
|
||||
: `v${currentVersion}`;
|
||||
const normalizedLatest = latestVersion.startsWith('v')
|
||||
? latestVersion
|
||||
: `v${latestVersion}`;
|
||||
|
||||
logger.dim(`Current version: ${normalizedCurrent}`);
|
||||
logger.dim(`Latest version: ${normalizedLatest}`);
|
||||
@@ -149,7 +158,8 @@ export class ServiceHandler {
|
||||
logger.dim('Downloading and installing...');
|
||||
console.log('');
|
||||
|
||||
const installUrl = 'https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh';
|
||||
const installUrl =
|
||||
'https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh';
|
||||
|
||||
execSync(`curl -sSL ${installUrl} | bash`, {
|
||||
stdio: 'inherit',
|
||||
|
||||
Reference in New Issue
Block a user