510 lines
14 KiB
TypeScript
510 lines
14 KiB
TypeScript
/**
|
|
* Docker Manager
|
|
*
|
|
* Handles Docker installation, configuration, and management.
|
|
*/
|
|
|
|
import { exec } from 'node:child_process';
|
|
import { promisify } from 'node:util';
|
|
import * as fs from 'node:fs';
|
|
import { logger } from '../logger.ts';
|
|
import { DOCKER, TIMING } from '../constants.ts';
|
|
|
|
const execAsync = promisify(exec);
|
|
|
|
/**
|
|
* Docker installation result
|
|
*/
|
|
export interface IDockerInstallResult {
|
|
success: boolean;
|
|
version?: string;
|
|
error?: string;
|
|
}
|
|
|
|
/**
|
|
* Docker status information
|
|
*/
|
|
export interface IDockerStatus {
|
|
installed: boolean;
|
|
running: boolean;
|
|
version?: string;
|
|
runtimes: string[];
|
|
hasNvidiaRuntime: boolean;
|
|
networkExists: boolean;
|
|
storageDriver?: string;
|
|
rootDir?: string;
|
|
}
|
|
|
|
/**
|
|
* Docker Manager class
|
|
*/
|
|
export class DockerManager {
|
|
private networkName: string;
|
|
|
|
constructor(networkName: string = DOCKER.DEFAULT_NETWORK) {
|
|
this.networkName = networkName;
|
|
}
|
|
|
|
/**
|
|
* Check if Docker is installed
|
|
*/
|
|
public async isInstalled(): Promise<boolean> {
|
|
try {
|
|
const { stdout } = await execAsync('docker --version', { timeout: 5000 });
|
|
return stdout.includes('Docker');
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if Docker daemon is running
|
|
*/
|
|
public async isRunning(): Promise<boolean> {
|
|
try {
|
|
await execAsync('docker info', { timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS });
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get Docker version
|
|
*/
|
|
public async getVersion(): Promise<string | undefined> {
|
|
try {
|
|
const { stdout } = await execAsync('docker --version', { timeout: 5000 });
|
|
const match = stdout.match(/Docker version (\d+\.\d+\.\d+)/);
|
|
return match ? match[1] : undefined;
|
|
} catch {
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get full Docker status
|
|
*/
|
|
public async getStatus(): Promise<IDockerStatus> {
|
|
const status: IDockerStatus = {
|
|
installed: false,
|
|
running: false,
|
|
runtimes: [],
|
|
hasNvidiaRuntime: false,
|
|
networkExists: false,
|
|
};
|
|
|
|
// Check installation
|
|
status.installed = await this.isInstalled();
|
|
if (!status.installed) {
|
|
return status;
|
|
}
|
|
|
|
status.version = await this.getVersion();
|
|
|
|
// Check if running
|
|
status.running = await this.isRunning();
|
|
if (!status.running) {
|
|
return status;
|
|
}
|
|
|
|
// Get detailed info
|
|
try {
|
|
const { stdout } = await execAsync('docker info --format json', {
|
|
timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS,
|
|
});
|
|
|
|
const info = JSON.parse(stdout);
|
|
|
|
// Get runtimes
|
|
if (info.Runtimes) {
|
|
status.runtimes = Object.keys(info.Runtimes);
|
|
status.hasNvidiaRuntime = status.runtimes.includes('nvidia');
|
|
}
|
|
|
|
status.storageDriver = info.Driver;
|
|
status.rootDir = info.DockerRootDir;
|
|
} catch {
|
|
// Try alternative method for runtimes
|
|
try {
|
|
const { stdout } = await execAsync('docker info 2>/dev/null | grep -i "runtimes"', {
|
|
timeout: 5000,
|
|
});
|
|
status.hasNvidiaRuntime = stdout.toLowerCase().includes('nvidia');
|
|
} catch {
|
|
// Ignore
|
|
}
|
|
}
|
|
|
|
// Check network exists
|
|
status.networkExists = await this.networkExists();
|
|
|
|
return status;
|
|
}
|
|
|
|
/**
|
|
* Install Docker on the system
|
|
*/
|
|
public async install(): Promise<IDockerInstallResult> {
|
|
try {
|
|
// Check if already installed
|
|
if (await this.isInstalled()) {
|
|
return {
|
|
success: true,
|
|
version: await this.getVersion(),
|
|
};
|
|
}
|
|
|
|
// Detect distribution
|
|
const distro = await this.getLinuxDistro();
|
|
logger.info(`Installing Docker on ${distro.id}...`);
|
|
|
|
if (distro.id === 'ubuntu' || distro.id === 'debian') {
|
|
await this.installOnDebian();
|
|
} else if (
|
|
distro.id === 'fedora' ||
|
|
distro.id === 'rhel' ||
|
|
distro.id === 'centos' ||
|
|
distro.id === 'rocky' ||
|
|
distro.id === 'almalinux'
|
|
) {
|
|
await this.installOnRhel();
|
|
} else {
|
|
// Use convenience script as fallback
|
|
await this.installWithScript();
|
|
}
|
|
|
|
// Start Docker service
|
|
await this.startService();
|
|
|
|
// Verify installation
|
|
const version = await this.getVersion();
|
|
if (version) {
|
|
logger.success(`Docker ${version} installed successfully`);
|
|
return { success: true, version };
|
|
}
|
|
|
|
return { success: false, error: 'Installation completed but Docker not found' };
|
|
} catch (error) {
|
|
return {
|
|
success: false,
|
|
error: error instanceof Error ? error.message : String(error),
|
|
};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Install Docker on Debian/Ubuntu
|
|
*/
|
|
private async installOnDebian(): Promise<void> {
|
|
// Remove old versions
|
|
await execAsync(
|
|
'apt-get remove -y docker docker-engine docker.io containerd runc || true',
|
|
{ timeout: 60000 },
|
|
);
|
|
|
|
// Install prerequisites
|
|
await execAsync('apt-get update', { timeout: 120000 });
|
|
await execAsync(
|
|
'DEBIAN_FRONTEND=noninteractive apt-get install -y ca-certificates curl gnupg',
|
|
{ timeout: 120000 },
|
|
);
|
|
|
|
// Add Docker's official GPG key
|
|
await execAsync('install -m 0755 -d /etc/apt/keyrings');
|
|
|
|
const distro = await this.getLinuxDistro();
|
|
const isUbuntu = distro.id === 'ubuntu';
|
|
|
|
if (isUbuntu) {
|
|
await execAsync(
|
|
'curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg',
|
|
);
|
|
await execAsync('chmod a+r /etc/apt/keyrings/docker.gpg');
|
|
|
|
// Get Ubuntu codename
|
|
const { stdout: codename } = await execAsync('lsb_release -cs');
|
|
await execAsync(
|
|
`echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu ${codename.trim()} stable" > /etc/apt/sources.list.d/docker.list`,
|
|
);
|
|
} else {
|
|
await execAsync(
|
|
'curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg',
|
|
);
|
|
await execAsync('chmod a+r /etc/apt/keyrings/docker.gpg');
|
|
|
|
const { stdout: codename } = await execAsync('lsb_release -cs');
|
|
await execAsync(
|
|
`echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian ${codename.trim()} stable" > /etc/apt/sources.list.d/docker.list`,
|
|
);
|
|
}
|
|
|
|
// Install Docker
|
|
await execAsync('apt-get update', { timeout: 120000 });
|
|
await execAsync(
|
|
'DEBIAN_FRONTEND=noninteractive apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin',
|
|
{ timeout: 300000 },
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Install Docker on RHEL/Fedora/CentOS
|
|
*/
|
|
private async installOnRhel(): Promise<void> {
|
|
const distro = await this.getLinuxDistro();
|
|
const isFedora = distro.id === 'fedora';
|
|
|
|
// Remove old versions
|
|
await execAsync(
|
|
'dnf remove -y docker docker-client docker-client-latest docker-common docker-latest docker-latest-logrotate docker-logrotate docker-engine || true',
|
|
{ timeout: 60000 },
|
|
);
|
|
|
|
// Install prerequisites
|
|
await execAsync('dnf install -y dnf-plugins-core', { timeout: 120000 });
|
|
|
|
// Add Docker repository
|
|
const repoUrl = isFedora
|
|
? 'https://download.docker.com/linux/fedora/docker-ce.repo'
|
|
: 'https://download.docker.com/linux/centos/docker-ce.repo';
|
|
|
|
await execAsync(`dnf config-manager --add-repo ${repoUrl}`);
|
|
|
|
// Install Docker
|
|
await execAsync(
|
|
'dnf install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin',
|
|
{ timeout: 300000 },
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Install Docker using convenience script
|
|
*/
|
|
private async installWithScript(): Promise<void> {
|
|
logger.info('Installing Docker using convenience script...');
|
|
await execAsync('curl -fsSL https://get.docker.com | sh', {
|
|
timeout: 600000, // 10 minutes
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Start Docker service
|
|
*/
|
|
public async startService(): Promise<void> {
|
|
try {
|
|
await execAsync('systemctl start docker');
|
|
await execAsync('systemctl enable docker');
|
|
logger.success('Docker service started and enabled');
|
|
} catch (error) {
|
|
logger.warn(`Could not start Docker service: ${error instanceof Error ? error.message : String(error)}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Stop Docker service
|
|
*/
|
|
public async stopService(): Promise<void> {
|
|
try {
|
|
await execAsync('systemctl stop docker');
|
|
logger.success('Docker service stopped');
|
|
} catch (error) {
|
|
logger.warn(`Could not stop Docker service: ${error instanceof Error ? error.message : String(error)}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Restart Docker service
|
|
*/
|
|
public async restartService(): Promise<void> {
|
|
try {
|
|
await execAsync('systemctl restart docker');
|
|
logger.success('Docker service restarted');
|
|
} catch (error) {
|
|
logger.warn(`Could not restart Docker service: ${error instanceof Error ? error.message : String(error)}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if ModelGrid network exists
|
|
*/
|
|
public async networkExists(): Promise<boolean> {
|
|
try {
|
|
await execAsync(`docker network inspect ${this.networkName}`, { timeout: 5000 });
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Create the ModelGrid Docker network
|
|
*/
|
|
public async createNetwork(): Promise<boolean> {
|
|
try {
|
|
if (await this.networkExists()) {
|
|
logger.dim(`Network '${this.networkName}' already exists`);
|
|
return true;
|
|
}
|
|
|
|
await execAsync(`docker network create ${this.networkName}`, {
|
|
timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS,
|
|
});
|
|
logger.success(`Created Docker network '${this.networkName}'`);
|
|
return true;
|
|
} catch (error) {
|
|
logger.error(`Failed to create network: ${error instanceof Error ? error.message : String(error)}`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Remove the ModelGrid Docker network
|
|
*/
|
|
public async removeNetwork(): Promise<boolean> {
|
|
try {
|
|
if (!await this.networkExists()) {
|
|
return true;
|
|
}
|
|
|
|
await execAsync(`docker network rm ${this.networkName}`, {
|
|
timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS,
|
|
});
|
|
logger.success(`Removed Docker network '${this.networkName}'`);
|
|
return true;
|
|
} catch (error) {
|
|
logger.error(`Failed to remove network: ${error instanceof Error ? error.message : String(error)}`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Pull a Docker image
|
|
*/
|
|
public async pullImage(image: string): Promise<boolean> {
|
|
try {
|
|
logger.info(`Pulling image: ${image}`);
|
|
await execAsync(`docker pull ${image}`, {
|
|
timeout: 600000, // 10 minutes for large images
|
|
});
|
|
logger.success(`Pulled image: ${image}`);
|
|
return true;
|
|
} catch (error) {
|
|
logger.error(`Failed to pull image: ${error instanceof Error ? error.message : String(error)}`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if an image exists locally
|
|
*/
|
|
public async imageExists(image: string): Promise<boolean> {
|
|
try {
|
|
await execAsync(`docker image inspect ${image}`, { timeout: 5000 });
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* List running containers
|
|
*/
|
|
public async listContainers(all: boolean = false): Promise<string[]> {
|
|
try {
|
|
const flag = all ? '-a' : '';
|
|
const { stdout } = await execAsync(
|
|
`docker ps ${flag} --format "{{.ID}}"`,
|
|
{ timeout: 5000 },
|
|
);
|
|
return stdout.trim().split('\n').filter((id) => id);
|
|
} catch {
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get container by name
|
|
*/
|
|
public async getContainerByName(name: string): Promise<string | null> {
|
|
try {
|
|
const { stdout } = await execAsync(
|
|
`docker ps -a --filter "name=${name}" --format "{{.ID}}"`,
|
|
{ timeout: 5000 },
|
|
);
|
|
const id = stdout.trim();
|
|
return id || null;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Add user to docker group
|
|
*/
|
|
public async addUserToDockerGroup(username?: string): Promise<boolean> {
|
|
try {
|
|
const user = username || process.env.SUDO_USER || process.env.USER || '';
|
|
if (!user) {
|
|
logger.warn('Could not determine username for docker group');
|
|
return false;
|
|
}
|
|
|
|
await execAsync(`usermod -aG docker ${user}`);
|
|
logger.success(`Added user '${user}' to docker group`);
|
|
logger.info('Log out and log back in for the change to take effect');
|
|
return true;
|
|
} catch (error) {
|
|
logger.error(`Failed to add user to docker group: ${error instanceof Error ? error.message : String(error)}`);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get Linux distribution info
|
|
*/
|
|
private async getLinuxDistro(): Promise<{ id: string; version: string }> {
|
|
try {
|
|
const content = await fs.promises.readFile('/etc/os-release', 'utf8');
|
|
const idMatch = content.match(/^ID=["']?(\w+)["']?$/m);
|
|
const versionMatch = content.match(/^VERSION_ID=["']?([\d.]+)["']?$/m);
|
|
|
|
return {
|
|
id: idMatch ? idMatch[1].toLowerCase() : 'unknown',
|
|
version: versionMatch ? versionMatch[1] : '',
|
|
};
|
|
} catch {
|
|
return { id: 'unknown', version: '' };
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Print Docker status
|
|
*/
|
|
public async printStatus(): Promise<void> {
|
|
const status = await this.getStatus();
|
|
|
|
const lines: string[] = [];
|
|
lines.push(`Installed: ${status.installed ? 'Yes' : 'No'}`);
|
|
|
|
if (status.installed) {
|
|
lines.push(`Version: ${status.version || 'Unknown'}`);
|
|
lines.push(`Running: ${status.running ? 'Yes' : 'No'}`);
|
|
|
|
if (status.running) {
|
|
lines.push(`NVIDIA Runtime: ${status.hasNvidiaRuntime ? 'Yes' : 'No'}`);
|
|
lines.push(`ModelGrid Network: ${status.networkExists ? 'Yes' : 'No'}`);
|
|
if (status.storageDriver) {
|
|
lines.push(`Storage Driver: ${status.storageDriver}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
logger.logBox(
|
|
'Docker Status',
|
|
lines,
|
|
50,
|
|
status.installed && status.running ? 'success' : status.installed ? 'warning' : 'error',
|
|
);
|
|
}
|
|
}
|