feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

This commit is contained in:
2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
+36 -13
View File
@@ -6,14 +6,13 @@
import type {
IContainerConfig,
IContainerStatus,
IContainerEndpoint,
IContainerStatus,
TContainerType,
} from '../interfaces/container.ts';
import { logger } from '../logger.ts';
import { DockerManager } from '../docker/docker-manager.ts';
import { BaseContainer } from './base-container.ts';
import { OllamaContainer } from './ollama.ts';
import { VllmContainer } from './vllm.ts';
import { TgiContainer } from './tgi.ts';
@@ -47,8 +46,6 @@ export class ContainerManager {
*/
private createContainerInstance(config: IContainerConfig): BaseContainer {
switch (config.type) {
case 'ollama':
return new OllamaContainer(config);
case 'vllm':
return new VllmContainer(config);
case 'tgi':
@@ -108,7 +105,11 @@ export class ContainerManager {
try {
this.addContainer(config);
} catch (error) {
logger.warn(`Failed to load container ${config.id}: ${error instanceof Error ? error.message : String(error)}`);
logger.warn(
`Failed to load container ${config.id}: ${
error instanceof Error ? error.message : String(error)
}`,
);
}
}
}
@@ -128,7 +129,11 @@ export class ContainerManager {
const success = await container.start();
results.set(id, success);
} catch (error) {
logger.error(`Failed to start container ${id}: ${error instanceof Error ? error.message : String(error)}`);
logger.error(
`Failed to start container ${id}: ${
error instanceof Error ? error.message : String(error)
}`,
);
results.set(id, false);
}
}
@@ -147,7 +152,11 @@ export class ContainerManager {
const success = await container.stop();
results.set(id, success);
} catch (error) {
logger.error(`Failed to stop container ${id}: ${error instanceof Error ? error.message : String(error)}`);
logger.error(
`Failed to stop container ${id}: ${
error instanceof Error ? error.message : String(error)
}`,
);
results.set(id, false);
}
}
@@ -166,7 +175,11 @@ export class ContainerManager {
const status = await container.getStatus();
statuses.set(id, status);
} catch (error) {
logger.warn(`Failed to get status for container ${id}: ${error instanceof Error ? error.message : String(error)}`);
logger.warn(
`Failed to get status for container ${id}: ${
error instanceof Error ? error.message : String(error)
}`,
);
}
}
@@ -266,7 +279,7 @@ export class ContainerManager {
*/
public async pullModel(
modelName: string,
containerType: TContainerType = 'ollama',
containerType: TContainerType = 'vllm',
containerId?: string,
): Promise<boolean> {
// Find or create appropriate container
@@ -313,6 +326,16 @@ export class ContainerManager {
return results;
}
public async checkAllHealth(): Promise<boolean> {
const results = await this.healthCheck();
if (results.size === 0) {
return true;
}
return Array.from(results.values()).every((healthy) => healthy);
}
/**
* Print container status summary
*/
@@ -329,9 +352,7 @@ export class ContainerManager {
for (const [id, status] of statuses) {
const runningStr = status.running ? 'Running' : 'Stopped';
const healthStr = status.health;
const modelsStr = status.loadedModels.length > 0
? status.loadedModels.join(', ')
: 'None';
const modelsStr = status.loadedModels.length > 0 ? status.loadedModels.join(', ') : 'None';
logger.logBoxLine(`${status.name} (${id})`);
logger.logBoxLine(` Type: ${status.type} | Status: ${runningStr} | Health: ${healthStr}`);
@@ -339,7 +360,9 @@ export class ContainerManager {
logger.logBoxLine(` Endpoint: ${status.endpoint}`);
if (status.gpuUtilization !== undefined) {
logger.logBoxLine(` GPU: ${status.gpuUtilization}% | Memory: ${status.memoryUsage || 0}MB`);
logger.logBoxLine(
` GPU: ${status.gpuUtilization}% | Memory: ${status.memoryUsage || 0}MB`,
);
}
logger.logBoxLine('');
}