feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing

This commit is contained in:
2026-04-20 23:00:50 +00:00
parent 83cacd0cf1
commit 4f2266e1b7
55 changed files with 3970 additions and 1630 deletions
+40 -5
View File
@@ -54,6 +54,10 @@ export class Daemon {
// Preload models if configured
await this.preloadModels(config);
await this.syncClusterState(config);
await this.modelgrid.getClusterCoordinator().reconcileDesiredReplicas();
await this.syncClusterState(config);
// Setup signal handlers
this.setupSignalHandlers();
@@ -63,7 +67,9 @@ export class Daemon {
await this.monitor();
} catch (error) {
this.isRunning = false;
logger.error(`Daemon failed to start: ${error instanceof Error ? error.message : String(error)}`);
logger.error(
`Daemon failed to start: ${error instanceof Error ? error.message : String(error)}`,
);
process.exit(1);
}
}
@@ -101,6 +107,8 @@ export class Daemon {
config.api,
this.modelgrid.getContainerManager(),
this.modelgrid.getModelRegistry(),
this.modelgrid.getModelLoader(),
this.modelgrid.getClusterCoordinator(),
);
await this.apiServer.start();
@@ -151,8 +159,16 @@ export class Daemon {
logger.info(`Preloading ${config.models.autoLoad.length} model(s)...`);
const modelLoader = this.modelgrid.getModelLoader();
const results = await modelLoader.preloadModels(config.models.autoLoad);
const clusterCoordinator = this.modelgrid.getClusterCoordinator();
const results = new Map<string, { success: boolean; error?: string }>();
for (const modelName of config.models.autoLoad) {
const ensured = await clusterCoordinator.ensureModel(modelName);
results.set(modelName, {
success: !!ensured,
error: ensured ? undefined : 'Failed to schedule preload',
});
}
let loaded = 0;
let failed = 0;
@@ -203,6 +219,10 @@ export class Daemon {
// Check container health
await this.checkContainerHealth();
await this.syncClusterState();
await this.modelgrid.getClusterCoordinator().reconcileDesiredReplicas();
await this.syncClusterState();
// Log periodic status
this.logPeriodicStatus();
@@ -245,6 +265,19 @@ export class Daemon {
}
}
private async syncClusterState(config?: IModelGridConfig): Promise<void> {
const effectiveConfig = config || this.modelgrid.getConfig();
if (!effectiveConfig) {
return;
}
const advertiseUrl = effectiveConfig.cluster.advertiseUrl ||
`http://127.0.0.1:${effectiveConfig.api.port}`;
const coordinator = this.modelgrid.getClusterCoordinator();
await coordinator.syncLocalState(advertiseUrl);
await coordinator.sendHeartbeat();
}
/**
* Log configuration loaded message
*/
@@ -252,8 +285,10 @@ export class Daemon {
logger.log('');
logger.logBoxTitle('Configuration Loaded', 60, 'success');
logger.logBoxLine(`API Port: ${config.api.port}`);
logger.logBoxLine(`Containers: ${config.containers.length}`);
logger.logBoxLine(`Auto-pull: ${config.models.autoPull ? 'Enabled' : 'Disabled'}`);
logger.logBoxLine(`Deployments: ${config.containers.length}`);
logger.logBoxLine(`Auto-deploy: ${config.models.autoDeploy ? 'Enabled' : 'Disabled'}`);
logger.logBoxLine(`Registry: ${config.models.registryUrl}`);
logger.logBoxLine(`Cluster Mode: ${config.cluster.role}`);
logger.logBoxLine(`Check Interval: ${config.checkInterval / 1000}s`);
logger.logBoxEnd();
logger.log('');