218 lines
6.0 KiB
TypeScript
218 lines
6.0 KiB
TypeScript
|
|
/**
|
||
|
|
* Base Driver Class
|
||
|
|
*
|
||
|
|
* Abstract base class for GPU driver management.
|
||
|
|
*/
|
||
|
|
|
||
|
|
import { exec } from 'node:child_process';
|
||
|
|
import { promisify } from 'node:util';
|
||
|
|
import type { IDriverStatus, TGpuVendor } from '../interfaces/gpu.ts';
|
||
|
|
import { logger } from '../logger.ts';
|
||
|
|
|
||
|
|
const execAsync = promisify(exec);
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Options for driver installation
|
||
|
|
*/
|
||
|
|
export interface IDriverInstallOptions {
|
||
|
|
/** Whether to install the GPU toolkit (CUDA, ROCm, oneAPI) */
|
||
|
|
installToolkit: boolean;
|
||
|
|
/** Whether to install container support (nvidia-docker, etc.) */
|
||
|
|
installContainerSupport: boolean;
|
||
|
|
/** Specific driver version to install (optional) */
|
||
|
|
driverVersion?: string;
|
||
|
|
/** Specific toolkit version to install (optional) */
|
||
|
|
toolkitVersion?: string;
|
||
|
|
/** Whether to run non-interactively */
|
||
|
|
nonInteractive: boolean;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Abstract base class for GPU drivers
|
||
|
|
*/
|
||
|
|
export abstract class BaseDriver {
|
||
|
|
/** GPU vendor this driver supports */
|
||
|
|
public abstract readonly vendor: TGpuVendor;
|
||
|
|
|
||
|
|
/** Display name for this driver */
|
||
|
|
public abstract readonly displayName: string;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Check if the driver is installed
|
||
|
|
*/
|
||
|
|
public abstract isInstalled(): Promise<boolean>;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Get the current driver status
|
||
|
|
*/
|
||
|
|
public abstract getStatus(): Promise<IDriverStatus>;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Install the driver
|
||
|
|
*/
|
||
|
|
public abstract install(options: IDriverInstallOptions): Promise<boolean>;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Install container runtime support (e.g., nvidia-docker)
|
||
|
|
*/
|
||
|
|
public abstract installContainerSupport(): Promise<boolean>;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Get available driver versions
|
||
|
|
*/
|
||
|
|
public abstract getAvailableVersions(): Promise<string[]>;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Execute a shell command with error handling
|
||
|
|
*/
|
||
|
|
protected async execCommand(
|
||
|
|
command: string,
|
||
|
|
options: { timeout?: number; ignoreErrors?: boolean } = {},
|
||
|
|
): Promise<{ stdout: string; stderr: string }> {
|
||
|
|
const { timeout = 30000, ignoreErrors = false } = options;
|
||
|
|
|
||
|
|
try {
|
||
|
|
const result = await execAsync(command, { timeout });
|
||
|
|
return { stdout: result.stdout, stderr: result.stderr };
|
||
|
|
} catch (error) {
|
||
|
|
if (ignoreErrors) {
|
||
|
|
return { stdout: '', stderr: String(error) };
|
||
|
|
}
|
||
|
|
throw error;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Check if running as root
|
||
|
|
*/
|
||
|
|
protected async isRoot(): Promise<boolean> {
|
||
|
|
try {
|
||
|
|
const { stdout } = await this.execCommand('id -u');
|
||
|
|
return stdout.trim() === '0';
|
||
|
|
} catch {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Get the Linux distribution
|
||
|
|
*/
|
||
|
|
protected async getLinuxDistro(): Promise<{ id: string; version: string }> {
|
||
|
|
try {
|
||
|
|
const { stdout } = await this.execCommand('cat /etc/os-release', { ignoreErrors: true });
|
||
|
|
|
||
|
|
const idMatch = stdout.match(/^ID=["']?(\w+)["']?$/m);
|
||
|
|
const versionMatch = stdout.match(/^VERSION_ID=["']?([\d.]+)["']?$/m);
|
||
|
|
|
||
|
|
return {
|
||
|
|
id: idMatch ? idMatch[1].toLowerCase() : 'unknown',
|
||
|
|
version: versionMatch ? versionMatch[1] : '',
|
||
|
|
};
|
||
|
|
} catch {
|
||
|
|
return { id: 'unknown', version: '' };
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Check if a package is installed (apt-based)
|
||
|
|
*/
|
||
|
|
protected async isAptPackageInstalled(packageName: string): Promise<boolean> {
|
||
|
|
try {
|
||
|
|
const { stdout } = await this.execCommand(`dpkg -l ${packageName} 2>/dev/null | grep "^ii"`, {
|
||
|
|
ignoreErrors: true,
|
||
|
|
});
|
||
|
|
return stdout.includes(packageName);
|
||
|
|
} catch {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Check if a package is installed (dnf/yum-based)
|
||
|
|
*/
|
||
|
|
protected async isDnfPackageInstalled(packageName: string): Promise<boolean> {
|
||
|
|
try {
|
||
|
|
const { stdout } = await this.execCommand(`rpm -q ${packageName} 2>/dev/null`, {
|
||
|
|
ignoreErrors: true,
|
||
|
|
});
|
||
|
|
return !stdout.includes('not installed');
|
||
|
|
} catch {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Run apt-get update
|
||
|
|
*/
|
||
|
|
protected async aptUpdate(): Promise<void> {
|
||
|
|
logger.info('Updating package lists...');
|
||
|
|
await this.execCommand('apt-get update', { timeout: 120000 });
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Install a package using apt
|
||
|
|
*/
|
||
|
|
protected async aptInstall(packages: string | string[]): Promise<void> {
|
||
|
|
const pkgList = Array.isArray(packages) ? packages.join(' ') : packages;
|
||
|
|
logger.info(`Installing packages: ${pkgList}`);
|
||
|
|
await this.execCommand(`DEBIAN_FRONTEND=noninteractive apt-get install -y ${pkgList}`, {
|
||
|
|
timeout: 600000, // 10 minutes for large packages
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Install a package using dnf
|
||
|
|
*/
|
||
|
|
protected async dnfInstall(packages: string | string[]): Promise<void> {
|
||
|
|
const pkgList = Array.isArray(packages) ? packages.join(' ') : packages;
|
||
|
|
logger.info(`Installing packages: ${pkgList}`);
|
||
|
|
await this.execCommand(`dnf install -y ${pkgList}`, {
|
||
|
|
timeout: 600000,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Add an apt repository
|
||
|
|
*/
|
||
|
|
protected async addAptRepository(repo: string, keyUrl?: string): Promise<void> {
|
||
|
|
if (keyUrl) {
|
||
|
|
// Add GPG key
|
||
|
|
await this.execCommand(`curl -fsSL ${keyUrl} | gpg --dearmor -o /usr/share/keyrings/$(basename ${keyUrl}).gpg`);
|
||
|
|
}
|
||
|
|
await this.execCommand(`add-apt-repository -y "${repo}"`);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Log driver status summary
|
||
|
|
*/
|
||
|
|
public async logStatus(): Promise<void> {
|
||
|
|
const status = await this.getStatus();
|
||
|
|
|
||
|
|
logger.logBoxTitle(`${this.displayName} Driver Status`, 60, status.installed ? 'success' : 'warning');
|
||
|
|
logger.logBoxLine(`Installed: ${status.installed ? 'Yes' : 'No'}`);
|
||
|
|
|
||
|
|
if (status.installed) {
|
||
|
|
if (status.version) {
|
||
|
|
logger.logBoxLine(`Driver Version: ${status.version}`);
|
||
|
|
}
|
||
|
|
if (status.toolkitVersion) {
|
||
|
|
logger.logBoxLine(`Toolkit Version: ${status.toolkitVersion}`);
|
||
|
|
}
|
||
|
|
logger.logBoxLine(`Container Support: ${status.containerSupport ? 'Yes' : 'No'}`);
|
||
|
|
if (status.containerRuntimeVersion) {
|
||
|
|
logger.logBoxLine(`Container Runtime: ${status.containerRuntimeVersion}`);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (status.issues.length > 0) {
|
||
|
|
logger.logBoxLine('');
|
||
|
|
logger.logBoxLine('Issues:');
|
||
|
|
for (const issue of status.issues) {
|
||
|
|
logger.logBoxLine(` - ${issue}`);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
logger.logBoxEnd();
|
||
|
|
}
|
||
|
|
}
|