/** * ModelGrid Daemon * * Background process for managing containers and serving the API. */ import process from 'node:process'; import { logger } from './logger.ts'; import { TIMING } from './constants.ts'; import type { ModelGrid } from './modelgrid.ts'; import { ApiServer } from './api/server.ts'; import { UiServer } from './ui/server.ts'; import type { IModelGridConfig } from './interfaces/config.ts'; /** * ModelGrid Daemon */ export class Daemon { private modelgrid: ModelGrid; private isRunning: boolean = false; private apiServer?: ApiServer; private uiServer?: UiServer; constructor(modelgrid: ModelGrid) { this.modelgrid = modelgrid; } /** * Start the daemon */ public async start(): Promise { if (this.isRunning) { logger.warn('Daemon is already running'); return; } logger.log('Starting ModelGrid daemon...'); try { // Initialize ModelGrid await this.modelgrid.initialize(); const config = this.modelgrid.getConfig(); if (!config) { throw new Error('Failed to load configuration'); } this.logConfigLoaded(config); // Start API server await this.startApiServer(config); // Start UI server (runs on its own port, serves the operations console) await this.startUiServer(config); // Start containers await this.startContainers(); // Preload models if configured await this.preloadModels(config); await this.syncClusterState(config); await this.modelgrid.getClusterCoordinator().reconcileDesiredReplicas(); await this.syncClusterState(config); // Setup signal handlers this.setupSignalHandlers(); this.isRunning = true; // Start monitoring loop await this.monitor(); } catch (error) { this.isRunning = false; const message = error instanceof Error ? error.message : String(error); logger.error(`Daemon failed to start: ${message}`); throw error; } } /** * Stop the daemon */ public async stop(): Promise { if (!this.isRunning) { return; } logger.log('Stopping ModelGrid daemon...'); this.isRunning = false; // Stop UI server if (this.uiServer) { await this.uiServer.stop(); } // Stop API server if (this.apiServer) { await this.apiServer.stop(); } // Shutdown ModelGrid (stops containers) await this.modelgrid.shutdown(); logger.success('ModelGrid daemon stopped'); } /** * Start the API server */ private async startApiServer(config: IModelGridConfig): Promise { logger.info('Starting API server...'); this.apiServer = new ApiServer( config.api, this.modelgrid.getContainerManager(), this.modelgrid.getModelRegistry(), this.modelgrid.getModelLoader(), this.modelgrid.getClusterCoordinator(), ); await this.apiServer.start(); } /** * Start the UI server, if enabled. */ private async startUiServer(config: IModelGridConfig): Promise { if (!config.ui.enabled) { logger.dim('UI server disabled in configuration'); return; } logger.info('Starting UI server...'); this.uiServer = new UiServer( config.ui, this.modelgrid.getContainerManager(), this.modelgrid.getClusterManager(), ); await this.uiServer.start(); } /** * Start configured containers */ private async startContainers(): Promise { logger.info('Starting containers...'); const containerManager = this.modelgrid.getContainerManager(); await containerManager.startAll(); // Wait for containers to be healthy logger.dim('Waiting for containers to become healthy...'); await this.waitForContainersHealthy(); } /** * Wait for all containers to report healthy */ private async waitForContainersHealthy(timeout: number = 60000): Promise { const startTime = Date.now(); const containerManager = this.modelgrid.getContainerManager(); while (Date.now() - startTime < timeout) { const allHealthy = await containerManager.checkAllHealth(); if (allHealthy) { logger.success('All containers are healthy'); return; } await this.sleep(5000); } logger.warn('Timeout waiting for containers to become healthy'); } /** * Preload configured models */ private async preloadModels(config: IModelGridConfig): Promise { if (!config.models.autoLoad || config.models.autoLoad.length === 0) { return; } logger.info(`Preloading ${config.models.autoLoad.length} model(s)...`); const clusterCoordinator = this.modelgrid.getClusterCoordinator(); const results = new Map(); for (const modelName of config.models.autoLoad) { const ensured = await clusterCoordinator.ensureModel(modelName); results.set(modelName, { success: !!ensured, error: ensured ? undefined : 'Failed to schedule preload', }); } let loaded = 0; let failed = 0; for (const [name, result] of results) { if (result.success) { loaded++; logger.dim(` ✓ ${name}`); } else { failed++; logger.warn(` ✗ ${name}: ${result.error}`); } } if (failed > 0) { logger.warn(`Preloaded ${loaded}/${config.models.autoLoad.length} models (${failed} failed)`); } else { logger.success(`Preloaded ${loaded} model(s)`); } } /** * Setup signal handlers for graceful shutdown */ private setupSignalHandlers(): void { const shutdown = async () => { logger.log(''); logger.log('Received shutdown signal'); await this.stop(); process.exit(0); }; process.on('SIGINT', shutdown); process.on('SIGTERM', shutdown); } /** * Main monitoring loop */ private async monitor(): Promise { logger.log('Starting monitoring loop...'); const config = this.modelgrid.getConfig(); const checkInterval = config?.checkInterval || TIMING.CHECK_INTERVAL_MS; while (this.isRunning) { try { // Check container health await this.checkContainerHealth(); await this.syncClusterState(); await this.modelgrid.getClusterCoordinator().reconcileDesiredReplicas(); await this.syncClusterState(); // Log periodic status this.logPeriodicStatus(); await this.sleep(checkInterval); } catch (error) { logger.error(`Monitor error: ${error instanceof Error ? error.message : String(error)}`); await this.sleep(checkInterval); } } } /** * Check health of all containers */ private async checkContainerHealth(): Promise { const containerManager = this.modelgrid.getContainerManager(); const statuses = await containerManager.getAllStatus(); for (const [id, status] of statuses) { if (status.running && status.health === 'unhealthy') { logger.warn(`Container ${id} is unhealthy, attempting restart...`); const container = containerManager.getContainer(id); if (container) { await container.restart(); } } } } /** * Log periodic status */ private logPeriodicStatus(): void { if (this.apiServer) { const info = this.apiServer.getInfo(); if (info.running) { logger.dim(`API server running on ${info.host}:${info.port} (uptime: ${info.uptime}s)`); } } } private async syncClusterState(config?: IModelGridConfig): Promise { const effectiveConfig = config || this.modelgrid.getConfig(); if (!effectiveConfig) { return; } const advertiseUrl = effectiveConfig.cluster.advertiseUrl || `http://127.0.0.1:${effectiveConfig.api.port}`; const coordinator = this.modelgrid.getClusterCoordinator(); await coordinator.syncLocalState(advertiseUrl); await coordinator.sendHeartbeat(); } /** * Log configuration loaded message */ private logConfigLoaded(config: IModelGridConfig): void { logger.log(''); logger.logBoxTitle('Configuration Loaded', 60, 'success'); logger.logBoxLine(`API Port: ${config.api.port}`); logger.logBoxLine(`Deployments: ${config.containers.length}`); logger.logBoxLine(`Auto-deploy: ${config.models.autoDeploy ? 'Enabled' : 'Disabled'}`); logger.logBoxLine(`Registry: ${config.models.registryUrl}`); logger.logBoxLine(`Cluster Mode: ${config.cluster.role}`); logger.logBoxLine(`Check Interval: ${config.checkInterval / 1000}s`); logger.logBoxEnd(); logger.log(''); } /** * Sleep for specified milliseconds */ private sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } }