feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
@@ -6,14 +6,13 @@
|
||||
|
||||
import type {
|
||||
IContainerConfig,
|
||||
IContainerStatus,
|
||||
IContainerEndpoint,
|
||||
IContainerStatus,
|
||||
TContainerType,
|
||||
} from '../interfaces/container.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { DockerManager } from '../docker/docker-manager.ts';
|
||||
import { BaseContainer } from './base-container.ts';
|
||||
import { OllamaContainer } from './ollama.ts';
|
||||
import { VllmContainer } from './vllm.ts';
|
||||
import { TgiContainer } from './tgi.ts';
|
||||
|
||||
@@ -47,8 +46,6 @@ export class ContainerManager {
|
||||
*/
|
||||
private createContainerInstance(config: IContainerConfig): BaseContainer {
|
||||
switch (config.type) {
|
||||
case 'ollama':
|
||||
return new OllamaContainer(config);
|
||||
case 'vllm':
|
||||
return new VllmContainer(config);
|
||||
case 'tgi':
|
||||
@@ -108,7 +105,11 @@ export class ContainerManager {
|
||||
try {
|
||||
this.addContainer(config);
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to load container ${config.id}: ${error instanceof Error ? error.message : String(error)}`);
|
||||
logger.warn(
|
||||
`Failed to load container ${config.id}: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -128,7 +129,11 @@ export class ContainerManager {
|
||||
const success = await container.start();
|
||||
results.set(id, success);
|
||||
} catch (error) {
|
||||
logger.error(`Failed to start container ${id}: ${error instanceof Error ? error.message : String(error)}`);
|
||||
logger.error(
|
||||
`Failed to start container ${id}: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`,
|
||||
);
|
||||
results.set(id, false);
|
||||
}
|
||||
}
|
||||
@@ -147,7 +152,11 @@ export class ContainerManager {
|
||||
const success = await container.stop();
|
||||
results.set(id, success);
|
||||
} catch (error) {
|
||||
logger.error(`Failed to stop container ${id}: ${error instanceof Error ? error.message : String(error)}`);
|
||||
logger.error(
|
||||
`Failed to stop container ${id}: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`,
|
||||
);
|
||||
results.set(id, false);
|
||||
}
|
||||
}
|
||||
@@ -166,7 +175,11 @@ export class ContainerManager {
|
||||
const status = await container.getStatus();
|
||||
statuses.set(id, status);
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to get status for container ${id}: ${error instanceof Error ? error.message : String(error)}`);
|
||||
logger.warn(
|
||||
`Failed to get status for container ${id}: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -266,7 +279,7 @@ export class ContainerManager {
|
||||
*/
|
||||
public async pullModel(
|
||||
modelName: string,
|
||||
containerType: TContainerType = 'ollama',
|
||||
containerType: TContainerType = 'vllm',
|
||||
containerId?: string,
|
||||
): Promise<boolean> {
|
||||
// Find or create appropriate container
|
||||
@@ -313,6 +326,16 @@ export class ContainerManager {
|
||||
return results;
|
||||
}
|
||||
|
||||
public async checkAllHealth(): Promise<boolean> {
|
||||
const results = await this.healthCheck();
|
||||
|
||||
if (results.size === 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return Array.from(results.values()).every((healthy) => healthy);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print container status summary
|
||||
*/
|
||||
@@ -329,9 +352,7 @@ export class ContainerManager {
|
||||
for (const [id, status] of statuses) {
|
||||
const runningStr = status.running ? 'Running' : 'Stopped';
|
||||
const healthStr = status.health;
|
||||
const modelsStr = status.loadedModels.length > 0
|
||||
? status.loadedModels.join(', ')
|
||||
: 'None';
|
||||
const modelsStr = status.loadedModels.length > 0 ? status.loadedModels.join(', ') : 'None';
|
||||
|
||||
logger.logBoxLine(`${status.name} (${id})`);
|
||||
logger.logBoxLine(` Type: ${status.type} | Status: ${runningStr} | Health: ${healthStr}`);
|
||||
@@ -339,7 +360,9 @@ export class ContainerManager {
|
||||
logger.logBoxLine(` Endpoint: ${status.endpoint}`);
|
||||
|
||||
if (status.gpuUtilization !== undefined) {
|
||||
logger.logBoxLine(` GPU: ${status.gpuUtilization}% | Memory: ${status.memoryUsage || 0}MB`);
|
||||
logger.logBoxLine(
|
||||
` GPU: ${status.gpuUtilization}% | Memory: ${status.memoryUsage || 0}MB`,
|
||||
);
|
||||
}
|
||||
logger.logBoxLine('');
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user