/** * Docker Manager * * Handles Docker installation, configuration, and management. */ import { exec } from 'node:child_process'; import { promisify } from 'node:util'; import * as fs from 'node:fs'; import { logger } from '../logger.ts'; import { DOCKER, TIMING } from '../constants.ts'; const execAsync = promisify(exec); /** * Docker installation result */ export interface IDockerInstallResult { success: boolean; version?: string; error?: string; } /** * Docker status information */ export interface IDockerStatus { installed: boolean; running: boolean; version?: string; runtimes: string[]; hasNvidiaRuntime: boolean; networkExists: boolean; storageDriver?: string; rootDir?: string; } /** * Docker Manager class */ export class DockerManager { private networkName: string; constructor(networkName: string = DOCKER.DEFAULT_NETWORK) { this.networkName = networkName; } /** * Check if Docker is installed */ public async isInstalled(): Promise { try { const { stdout } = await execAsync('docker --version', { timeout: 5000 }); return stdout.includes('Docker'); } catch { return false; } } /** * Check if Docker daemon is running */ public async isRunning(): Promise { try { await execAsync('docker info', { timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS }); return true; } catch { return false; } } /** * Get Docker version */ public async getVersion(): Promise { try { const { stdout } = await execAsync('docker --version', { timeout: 5000 }); const match = stdout.match(/Docker version (\d+\.\d+\.\d+)/); return match ? match[1] : undefined; } catch { return undefined; } } /** * Get full Docker status */ public async getStatus(): Promise { const status: IDockerStatus = { installed: false, running: false, runtimes: [], hasNvidiaRuntime: false, networkExists: false, }; // Check installation status.installed = await this.isInstalled(); if (!status.installed) { return status; } status.version = await this.getVersion(); // Check if running status.running = await this.isRunning(); if (!status.running) { return status; } // Get detailed info try { const { stdout } = await execAsync('docker info --format json', { timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS, }); const info = JSON.parse(stdout); // Get runtimes if (info.Runtimes) { status.runtimes = Object.keys(info.Runtimes); status.hasNvidiaRuntime = status.runtimes.includes('nvidia'); } status.storageDriver = info.Driver; status.rootDir = info.DockerRootDir; } catch { // Try alternative method for runtimes try { const { stdout } = await execAsync('docker info 2>/dev/null | grep -i "runtimes"', { timeout: 5000, }); status.hasNvidiaRuntime = stdout.toLowerCase().includes('nvidia'); } catch { // Ignore } } // Check network exists status.networkExists = await this.networkExists(); return status; } /** * Install Docker on the system */ public async install(): Promise { try { // Check if already installed if (await this.isInstalled()) { return { success: true, version: await this.getVersion(), }; } // Detect distribution const distro = await this.getLinuxDistro(); logger.info(`Installing Docker on ${distro.id}...`); if (distro.id === 'ubuntu' || distro.id === 'debian') { await this.installOnDebian(); } else if ( distro.id === 'fedora' || distro.id === 'rhel' || distro.id === 'centos' || distro.id === 'rocky' || distro.id === 'almalinux' ) { await this.installOnRhel(); } else { // Use convenience script as fallback await this.installWithScript(); } // Start Docker service await this.startService(); // Verify installation const version = await this.getVersion(); if (version) { logger.success(`Docker ${version} installed successfully`); return { success: true, version }; } return { success: false, error: 'Installation completed but Docker not found' }; } catch (error) { return { success: false, error: error instanceof Error ? error.message : String(error), }; } } /** * Install Docker on Debian/Ubuntu */ private async installOnDebian(): Promise { // Remove old versions await execAsync( 'apt-get remove -y docker docker-engine docker.io containerd runc || true', { timeout: 60000 }, ); // Install prerequisites await execAsync('apt-get update', { timeout: 120000 }); await execAsync( 'DEBIAN_FRONTEND=noninteractive apt-get install -y ca-certificates curl gnupg', { timeout: 120000 }, ); // Add Docker's official GPG key await execAsync('install -m 0755 -d /etc/apt/keyrings'); const distro = await this.getLinuxDistro(); const isUbuntu = distro.id === 'ubuntu'; if (isUbuntu) { await execAsync( 'curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg', ); await execAsync('chmod a+r /etc/apt/keyrings/docker.gpg'); // Get Ubuntu codename const { stdout: codename } = await execAsync('lsb_release -cs'); await execAsync( `echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu ${codename.trim()} stable" > /etc/apt/sources.list.d/docker.list`, ); } else { await execAsync( 'curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg', ); await execAsync('chmod a+r /etc/apt/keyrings/docker.gpg'); const { stdout: codename } = await execAsync('lsb_release -cs'); await execAsync( `echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian ${codename.trim()} stable" > /etc/apt/sources.list.d/docker.list`, ); } // Install Docker await execAsync('apt-get update', { timeout: 120000 }); await execAsync( 'DEBIAN_FRONTEND=noninteractive apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin', { timeout: 300000 }, ); } /** * Install Docker on RHEL/Fedora/CentOS */ private async installOnRhel(): Promise { const distro = await this.getLinuxDistro(); const isFedora = distro.id === 'fedora'; // Remove old versions await execAsync( 'dnf remove -y docker docker-client docker-client-latest docker-common docker-latest docker-latest-logrotate docker-logrotate docker-engine || true', { timeout: 60000 }, ); // Install prerequisites await execAsync('dnf install -y dnf-plugins-core', { timeout: 120000 }); // Add Docker repository const repoUrl = isFedora ? 'https://download.docker.com/linux/fedora/docker-ce.repo' : 'https://download.docker.com/linux/centos/docker-ce.repo'; await execAsync(`dnf config-manager --add-repo ${repoUrl}`); // Install Docker await execAsync( 'dnf install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin', { timeout: 300000 }, ); } /** * Install Docker using convenience script */ private async installWithScript(): Promise { logger.info('Installing Docker using convenience script...'); await execAsync('curl -fsSL https://get.docker.com | sh', { timeout: 600000, // 10 minutes }); } /** * Start Docker service */ public async startService(): Promise { try { await execAsync('systemctl start docker'); await execAsync('systemctl enable docker'); logger.success('Docker service started and enabled'); } catch (error) { logger.warn(`Could not start Docker service: ${error instanceof Error ? error.message : String(error)}`); } } /** * Stop Docker service */ public async stopService(): Promise { try { await execAsync('systemctl stop docker'); logger.success('Docker service stopped'); } catch (error) { logger.warn(`Could not stop Docker service: ${error instanceof Error ? error.message : String(error)}`); } } /** * Restart Docker service */ public async restartService(): Promise { try { await execAsync('systemctl restart docker'); logger.success('Docker service restarted'); } catch (error) { logger.warn(`Could not restart Docker service: ${error instanceof Error ? error.message : String(error)}`); } } /** * Check if ModelGrid network exists */ public async networkExists(): Promise { try { await execAsync(`docker network inspect ${this.networkName}`, { timeout: 5000 }); return true; } catch { return false; } } /** * Create the ModelGrid Docker network */ public async createNetwork(): Promise { try { if (await this.networkExists()) { logger.dim(`Network '${this.networkName}' already exists`); return true; } await execAsync(`docker network create ${this.networkName}`, { timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS, }); logger.success(`Created Docker network '${this.networkName}'`); return true; } catch (error) { logger.error(`Failed to create network: ${error instanceof Error ? error.message : String(error)}`); return false; } } /** * Remove the ModelGrid Docker network */ public async removeNetwork(): Promise { try { if (!await this.networkExists()) { return true; } await execAsync(`docker network rm ${this.networkName}`, { timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS, }); logger.success(`Removed Docker network '${this.networkName}'`); return true; } catch (error) { logger.error(`Failed to remove network: ${error instanceof Error ? error.message : String(error)}`); return false; } } /** * Pull a Docker image */ public async pullImage(image: string): Promise { try { logger.info(`Pulling image: ${image}`); await execAsync(`docker pull ${image}`, { timeout: 600000, // 10 minutes for large images }); logger.success(`Pulled image: ${image}`); return true; } catch (error) { logger.error(`Failed to pull image: ${error instanceof Error ? error.message : String(error)}`); return false; } } /** * Check if an image exists locally */ public async imageExists(image: string): Promise { try { await execAsync(`docker image inspect ${image}`, { timeout: 5000 }); return true; } catch { return false; } } /** * List running containers */ public async listContainers(all: boolean = false): Promise { try { const flag = all ? '-a' : ''; const { stdout } = await execAsync( `docker ps ${flag} --format "{{.ID}}"`, { timeout: 5000 }, ); return stdout.trim().split('\n').filter((id) => id); } catch { return []; } } /** * Get container by name */ public async getContainerByName(name: string): Promise { try { const { stdout } = await execAsync( `docker ps -a --filter "name=${name}" --format "{{.ID}}"`, { timeout: 5000 }, ); const id = stdout.trim(); return id || null; } catch { return null; } } /** * Add user to docker group */ public async addUserToDockerGroup(username?: string): Promise { try { const user = username || process.env.SUDO_USER || process.env.USER || ''; if (!user) { logger.warn('Could not determine username for docker group'); return false; } await execAsync(`usermod -aG docker ${user}`); logger.success(`Added user '${user}' to docker group`); logger.info('Log out and log back in for the change to take effect'); return true; } catch (error) { logger.error(`Failed to add user to docker group: ${error instanceof Error ? error.message : String(error)}`); return false; } } /** * Get Linux distribution info */ private async getLinuxDistro(): Promise<{ id: string; version: string }> { try { const content = await fs.promises.readFile('/etc/os-release', 'utf8'); const idMatch = content.match(/^ID=["']?(\w+)["']?$/m); const versionMatch = content.match(/^VERSION_ID=["']?([\d.]+)["']?$/m); return { id: idMatch ? idMatch[1].toLowerCase() : 'unknown', version: versionMatch ? versionMatch[1] : '', }; } catch { return { id: 'unknown', version: '' }; } } /** * Print Docker status */ public async printStatus(): Promise { const status = await this.getStatus(); const lines: string[] = []; lines.push(`Installed: ${status.installed ? 'Yes' : 'No'}`); if (status.installed) { lines.push(`Version: ${status.version || 'Unknown'}`); lines.push(`Running: ${status.running ? 'Yes' : 'No'}`); if (status.running) { lines.push(`NVIDIA Runtime: ${status.hasNvidiaRuntime ? 'Yes' : 'No'}`); lines.push(`ModelGrid Network: ${status.networkExists ? 'Yes' : 'No'}`); if (status.storageDriver) { lines.push(`Storage Driver: ${status.storageDriver}`); } } } logger.logBox( 'Docker Status', lines, 50, status.installed && status.running ? 'success' : status.installed ? 'warning' : 'error', ); } }