feat(cluster,api,models,cli): add cluster-aware model catalog deployments and request routing
This commit is contained in:
+40
-5
@@ -54,6 +54,10 @@ export class Daemon {
|
||||
// Preload models if configured
|
||||
await this.preloadModels(config);
|
||||
|
||||
await this.syncClusterState(config);
|
||||
await this.modelgrid.getClusterCoordinator().reconcileDesiredReplicas();
|
||||
await this.syncClusterState(config);
|
||||
|
||||
// Setup signal handlers
|
||||
this.setupSignalHandlers();
|
||||
|
||||
@@ -63,7 +67,9 @@ export class Daemon {
|
||||
await this.monitor();
|
||||
} catch (error) {
|
||||
this.isRunning = false;
|
||||
logger.error(`Daemon failed to start: ${error instanceof Error ? error.message : String(error)}`);
|
||||
logger.error(
|
||||
`Daemon failed to start: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
@@ -101,6 +107,8 @@ export class Daemon {
|
||||
config.api,
|
||||
this.modelgrid.getContainerManager(),
|
||||
this.modelgrid.getModelRegistry(),
|
||||
this.modelgrid.getModelLoader(),
|
||||
this.modelgrid.getClusterCoordinator(),
|
||||
);
|
||||
|
||||
await this.apiServer.start();
|
||||
@@ -151,8 +159,16 @@ export class Daemon {
|
||||
|
||||
logger.info(`Preloading ${config.models.autoLoad.length} model(s)...`);
|
||||
|
||||
const modelLoader = this.modelgrid.getModelLoader();
|
||||
const results = await modelLoader.preloadModels(config.models.autoLoad);
|
||||
const clusterCoordinator = this.modelgrid.getClusterCoordinator();
|
||||
const results = new Map<string, { success: boolean; error?: string }>();
|
||||
|
||||
for (const modelName of config.models.autoLoad) {
|
||||
const ensured = await clusterCoordinator.ensureModel(modelName);
|
||||
results.set(modelName, {
|
||||
success: !!ensured,
|
||||
error: ensured ? undefined : 'Failed to schedule preload',
|
||||
});
|
||||
}
|
||||
|
||||
let loaded = 0;
|
||||
let failed = 0;
|
||||
@@ -203,6 +219,10 @@ export class Daemon {
|
||||
// Check container health
|
||||
await this.checkContainerHealth();
|
||||
|
||||
await this.syncClusterState();
|
||||
await this.modelgrid.getClusterCoordinator().reconcileDesiredReplicas();
|
||||
await this.syncClusterState();
|
||||
|
||||
// Log periodic status
|
||||
this.logPeriodicStatus();
|
||||
|
||||
@@ -245,6 +265,19 @@ export class Daemon {
|
||||
}
|
||||
}
|
||||
|
||||
private async syncClusterState(config?: IModelGridConfig): Promise<void> {
|
||||
const effectiveConfig = config || this.modelgrid.getConfig();
|
||||
if (!effectiveConfig) {
|
||||
return;
|
||||
}
|
||||
|
||||
const advertiseUrl = effectiveConfig.cluster.advertiseUrl ||
|
||||
`http://127.0.0.1:${effectiveConfig.api.port}`;
|
||||
const coordinator = this.modelgrid.getClusterCoordinator();
|
||||
await coordinator.syncLocalState(advertiseUrl);
|
||||
await coordinator.sendHeartbeat();
|
||||
}
|
||||
|
||||
/**
|
||||
* Log configuration loaded message
|
||||
*/
|
||||
@@ -252,8 +285,10 @@ export class Daemon {
|
||||
logger.log('');
|
||||
logger.logBoxTitle('Configuration Loaded', 60, 'success');
|
||||
logger.logBoxLine(`API Port: ${config.api.port}`);
|
||||
logger.logBoxLine(`Containers: ${config.containers.length}`);
|
||||
logger.logBoxLine(`Auto-pull: ${config.models.autoPull ? 'Enabled' : 'Disabled'}`);
|
||||
logger.logBoxLine(`Deployments: ${config.containers.length}`);
|
||||
logger.logBoxLine(`Auto-deploy: ${config.models.autoDeploy ? 'Enabled' : 'Disabled'}`);
|
||||
logger.logBoxLine(`Registry: ${config.models.registryUrl}`);
|
||||
logger.logBoxLine(`Cluster Mode: ${config.cluster.role}`);
|
||||
logger.logBoxLine(`Check Interval: ${config.checkInterval / 1000}s`);
|
||||
logger.logBoxEnd();
|
||||
logger.log('');
|
||||
|
||||
Reference in New Issue
Block a user