Files
modelgrid/ts/docker/docker-manager.ts

510 lines
14 KiB
TypeScript
Raw Permalink Normal View History

2026-01-30 03:16:57 +00:00
/**
* Docker Manager
*
* Handles Docker installation, configuration, and management.
*/
import { exec } from 'node:child_process';
import { promisify } from 'node:util';
import * as fs from 'node:fs';
import { logger } from '../logger.ts';
import { DOCKER, TIMING } from '../constants.ts';
const execAsync = promisify(exec);
/**
* Docker installation result
*/
export interface IDockerInstallResult {
success: boolean;
version?: string;
error?: string;
}
/**
* Docker status information
*/
export interface IDockerStatus {
installed: boolean;
running: boolean;
version?: string;
runtimes: string[];
hasNvidiaRuntime: boolean;
networkExists: boolean;
storageDriver?: string;
rootDir?: string;
}
/**
* Docker Manager class
*/
export class DockerManager {
private networkName: string;
constructor(networkName: string = DOCKER.DEFAULT_NETWORK) {
this.networkName = networkName;
}
/**
* Check if Docker is installed
*/
public async isInstalled(): Promise<boolean> {
try {
const { stdout } = await execAsync('docker --version', { timeout: 5000 });
return stdout.includes('Docker');
} catch {
return false;
}
}
/**
* Check if Docker daemon is running
*/
public async isRunning(): Promise<boolean> {
try {
await execAsync('docker info', { timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS });
return true;
} catch {
return false;
}
}
/**
* Get Docker version
*/
public async getVersion(): Promise<string | undefined> {
try {
const { stdout } = await execAsync('docker --version', { timeout: 5000 });
const match = stdout.match(/Docker version (\d+\.\d+\.\d+)/);
return match ? match[1] : undefined;
} catch {
return undefined;
}
}
/**
* Get full Docker status
*/
public async getStatus(): Promise<IDockerStatus> {
const status: IDockerStatus = {
installed: false,
running: false,
runtimes: [],
hasNvidiaRuntime: false,
networkExists: false,
};
// Check installation
status.installed = await this.isInstalled();
if (!status.installed) {
return status;
}
status.version = await this.getVersion();
// Check if running
status.running = await this.isRunning();
if (!status.running) {
return status;
}
// Get detailed info
try {
const { stdout } = await execAsync('docker info --format json', {
timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS,
});
const info = JSON.parse(stdout);
// Get runtimes
if (info.Runtimes) {
status.runtimes = Object.keys(info.Runtimes);
status.hasNvidiaRuntime = status.runtimes.includes('nvidia');
}
status.storageDriver = info.Driver;
status.rootDir = info.DockerRootDir;
} catch {
// Try alternative method for runtimes
try {
const { stdout } = await execAsync('docker info 2>/dev/null | grep -i "runtimes"', {
timeout: 5000,
});
status.hasNvidiaRuntime = stdout.toLowerCase().includes('nvidia');
} catch {
// Ignore
}
}
// Check network exists
status.networkExists = await this.networkExists();
return status;
}
/**
* Install Docker on the system
*/
public async install(): Promise<IDockerInstallResult> {
try {
// Check if already installed
if (await this.isInstalled()) {
return {
success: true,
version: await this.getVersion(),
};
}
// Detect distribution
const distro = await this.getLinuxDistro();
logger.info(`Installing Docker on ${distro.id}...`);
if (distro.id === 'ubuntu' || distro.id === 'debian') {
await this.installOnDebian();
} else if (
distro.id === 'fedora' ||
distro.id === 'rhel' ||
distro.id === 'centos' ||
distro.id === 'rocky' ||
distro.id === 'almalinux'
) {
await this.installOnRhel();
} else {
// Use convenience script as fallback
await this.installWithScript();
}
// Start Docker service
await this.startService();
// Verify installation
const version = await this.getVersion();
if (version) {
logger.success(`Docker ${version} installed successfully`);
return { success: true, version };
}
return { success: false, error: 'Installation completed but Docker not found' };
} catch (error) {
return {
success: false,
error: error instanceof Error ? error.message : String(error),
};
}
}
/**
* Install Docker on Debian/Ubuntu
*/
private async installOnDebian(): Promise<void> {
// Remove old versions
await execAsync(
'apt-get remove -y docker docker-engine docker.io containerd runc || true',
{ timeout: 60000 },
);
// Install prerequisites
await execAsync('apt-get update', { timeout: 120000 });
await execAsync(
'DEBIAN_FRONTEND=noninteractive apt-get install -y ca-certificates curl gnupg',
{ timeout: 120000 },
);
// Add Docker's official GPG key
await execAsync('install -m 0755 -d /etc/apt/keyrings');
const distro = await this.getLinuxDistro();
const isUbuntu = distro.id === 'ubuntu';
if (isUbuntu) {
await execAsync(
'curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg',
);
await execAsync('chmod a+r /etc/apt/keyrings/docker.gpg');
// Get Ubuntu codename
const { stdout: codename } = await execAsync('lsb_release -cs');
await execAsync(
`echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu ${codename.trim()} stable" > /etc/apt/sources.list.d/docker.list`,
);
} else {
await execAsync(
'curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg',
);
await execAsync('chmod a+r /etc/apt/keyrings/docker.gpg');
const { stdout: codename } = await execAsync('lsb_release -cs');
await execAsync(
`echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian ${codename.trim()} stable" > /etc/apt/sources.list.d/docker.list`,
);
}
// Install Docker
await execAsync('apt-get update', { timeout: 120000 });
await execAsync(
'DEBIAN_FRONTEND=noninteractive apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin',
{ timeout: 300000 },
);
}
/**
* Install Docker on RHEL/Fedora/CentOS
*/
private async installOnRhel(): Promise<void> {
const distro = await this.getLinuxDistro();
const isFedora = distro.id === 'fedora';
// Remove old versions
await execAsync(
'dnf remove -y docker docker-client docker-client-latest docker-common docker-latest docker-latest-logrotate docker-logrotate docker-engine || true',
{ timeout: 60000 },
);
// Install prerequisites
await execAsync('dnf install -y dnf-plugins-core', { timeout: 120000 });
// Add Docker repository
const repoUrl = isFedora
? 'https://download.docker.com/linux/fedora/docker-ce.repo'
: 'https://download.docker.com/linux/centos/docker-ce.repo';
await execAsync(`dnf config-manager --add-repo ${repoUrl}`);
// Install Docker
await execAsync(
'dnf install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin',
{ timeout: 300000 },
);
}
/**
* Install Docker using convenience script
*/
private async installWithScript(): Promise<void> {
logger.info('Installing Docker using convenience script...');
await execAsync('curl -fsSL https://get.docker.com | sh', {
timeout: 600000, // 10 minutes
});
}
/**
* Start Docker service
*/
public async startService(): Promise<void> {
try {
await execAsync('systemctl start docker');
await execAsync('systemctl enable docker');
logger.success('Docker service started and enabled');
} catch (error) {
logger.warn(`Could not start Docker service: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Stop Docker service
*/
public async stopService(): Promise<void> {
try {
await execAsync('systemctl stop docker');
logger.success('Docker service stopped');
} catch (error) {
logger.warn(`Could not stop Docker service: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Restart Docker service
*/
public async restartService(): Promise<void> {
try {
await execAsync('systemctl restart docker');
logger.success('Docker service restarted');
} catch (error) {
logger.warn(`Could not restart Docker service: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Check if ModelGrid network exists
*/
public async networkExists(): Promise<boolean> {
try {
await execAsync(`docker network inspect ${this.networkName}`, { timeout: 5000 });
return true;
} catch {
return false;
}
}
/**
* Create the ModelGrid Docker network
*/
public async createNetwork(): Promise<boolean> {
try {
if (await this.networkExists()) {
logger.dim(`Network '${this.networkName}' already exists`);
return true;
}
await execAsync(`docker network create ${this.networkName}`, {
timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS,
});
logger.success(`Created Docker network '${this.networkName}'`);
return true;
} catch (error) {
logger.error(`Failed to create network: ${error instanceof Error ? error.message : String(error)}`);
return false;
}
}
/**
* Remove the ModelGrid Docker network
*/
public async removeNetwork(): Promise<boolean> {
try {
if (!await this.networkExists()) {
return true;
}
await execAsync(`docker network rm ${this.networkName}`, {
timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS,
});
logger.success(`Removed Docker network '${this.networkName}'`);
return true;
} catch (error) {
logger.error(`Failed to remove network: ${error instanceof Error ? error.message : String(error)}`);
return false;
}
}
/**
* Pull a Docker image
*/
public async pullImage(image: string): Promise<boolean> {
try {
logger.info(`Pulling image: ${image}`);
await execAsync(`docker pull ${image}`, {
timeout: 600000, // 10 minutes for large images
});
logger.success(`Pulled image: ${image}`);
return true;
} catch (error) {
logger.error(`Failed to pull image: ${error instanceof Error ? error.message : String(error)}`);
return false;
}
}
/**
* Check if an image exists locally
*/
public async imageExists(image: string): Promise<boolean> {
try {
await execAsync(`docker image inspect ${image}`, { timeout: 5000 });
return true;
} catch {
return false;
}
}
/**
* List running containers
*/
public async listContainers(all: boolean = false): Promise<string[]> {
try {
const flag = all ? '-a' : '';
const { stdout } = await execAsync(
`docker ps ${flag} --format "{{.ID}}"`,
{ timeout: 5000 },
);
return stdout.trim().split('\n').filter((id) => id);
} catch {
return [];
}
}
/**
* Get container by name
*/
public async getContainerByName(name: string): Promise<string | null> {
try {
const { stdout } = await execAsync(
`docker ps -a --filter "name=${name}" --format "{{.ID}}"`,
{ timeout: 5000 },
);
const id = stdout.trim();
return id || null;
} catch {
return null;
}
}
/**
* Add user to docker group
*/
public async addUserToDockerGroup(username?: string): Promise<boolean> {
try {
const user = username || process.env.SUDO_USER || process.env.USER || '';
if (!user) {
logger.warn('Could not determine username for docker group');
return false;
}
await execAsync(`usermod -aG docker ${user}`);
logger.success(`Added user '${user}' to docker group`);
logger.info('Log out and log back in for the change to take effect');
return true;
} catch (error) {
logger.error(`Failed to add user to docker group: ${error instanceof Error ? error.message : String(error)}`);
return false;
}
}
/**
* Get Linux distribution info
*/
private async getLinuxDistro(): Promise<{ id: string; version: string }> {
try {
const content = await fs.promises.readFile('/etc/os-release', 'utf8');
const idMatch = content.match(/^ID=["']?(\w+)["']?$/m);
const versionMatch = content.match(/^VERSION_ID=["']?([\d.]+)["']?$/m);
return {
id: idMatch ? idMatch[1].toLowerCase() : 'unknown',
version: versionMatch ? versionMatch[1] : '',
};
} catch {
return { id: 'unknown', version: '' };
}
}
/**
* Print Docker status
*/
public async printStatus(): Promise<void> {
const status = await this.getStatus();
const lines: string[] = [];
lines.push(`Installed: ${status.installed ? 'Yes' : 'No'}`);
if (status.installed) {
lines.push(`Version: ${status.version || 'Unknown'}`);
lines.push(`Running: ${status.running ? 'Yes' : 'No'}`);
if (status.running) {
lines.push(`NVIDIA Runtime: ${status.hasNvidiaRuntime ? 'Yes' : 'No'}`);
lines.push(`ModelGrid Network: ${status.networkExists ? 'Yes' : 'No'}`);
if (status.storageDriver) {
lines.push(`Storage Driver: ${status.storageDriver}`);
}
}
}
logger.logBox(
'Docker Status',
lines,
50,
status.installed && status.running ? 'success' : status.installed ? 'warning' : 'error',
);
}
}