initial
Some checks failed
CI / Type Check & Lint (push) Failing after 5s
CI / Build Test (Current Platform) (push) Failing after 5s
CI / Build All Platforms (push) Successful in 49s

This commit is contained in:
2026-01-30 03:16:57 +00:00
commit daaf6559e3
80 changed files with 14430 additions and 0 deletions

281
ts/drivers/amd.ts Normal file
View File

@@ -0,0 +1,281 @@
/**
* AMD Driver Management
*
* Handles AMD ROCm driver detection, installation, and container setup.
*/
import type { IDriverStatus } from '../interfaces/gpu.ts';
import { logger } from '../logger.ts';
import { BaseDriver, type IDriverInstallOptions } from './base-driver.ts';
/**
* AMD ROCm Driver Manager
*/
export class AmdDriver extends BaseDriver {
public readonly vendor = 'amd' as const;
public readonly displayName = 'AMD ROCm';
/**
* Check if AMD ROCm driver is installed
*/
public async isInstalled(): Promise<boolean> {
try {
const { stdout } = await this.execCommand('rocm-smi --showdriverversion 2>/dev/null | head -1', {
timeout: 5000,
ignoreErrors: true,
});
return stdout.includes('Driver');
} catch {
return false;
}
}
/**
* Get AMD ROCm driver status
*/
public async getStatus(): Promise<IDriverStatus> {
const status: IDriverStatus = {
vendor: 'amd',
installed: false,
containerSupport: false,
issues: [],
};
// Check if rocm-smi is available
try {
const { stdout: driverInfo } = await this.execCommand(
'rocm-smi --showdriverversion 2>/dev/null',
{ timeout: 5000, ignoreErrors: true },
);
if (driverInfo.includes('Driver')) {
status.installed = true;
const match = driverInfo.match(/Driver version:\s*(\S+)/i);
if (match) {
status.version = match[1];
}
}
} catch {
status.issues.push('ROCm driver not installed or rocm-smi not available');
return status;
}
// Check ROCm toolkit version
try {
const { stdout: rocmVersion } = await this.execCommand(
'cat /opt/rocm/.info/version 2>/dev/null || rocminfo 2>/dev/null | grep "ROCm" | head -1',
{ timeout: 5000, ignoreErrors: true },
);
const match = rocmVersion.match(/(\d+\.\d+(?:\.\d+)?)/);
if (match) {
status.toolkitVersion = match[1];
}
} catch {
// ROCm toolkit version not available
}
// Check Docker ROCm support
try {
const { stdout: dockerInfo } = await this.execCommand(
'docker info 2>/dev/null | grep -i "rocm\\|amd"',
{ timeout: 5000, ignoreErrors: true },
);
// Check if rocm/pytorch or similar images can run
const { stdout: deviceCheck } = await this.execCommand(
'ls /dev/kfd /dev/dri/render* 2>/dev/null',
{ timeout: 5000, ignoreErrors: true },
);
if (deviceCheck.includes('/dev/kfd') || dockerInfo.includes('rocm')) {
status.containerSupport = true;
} else {
status.issues.push('ROCm device files not available for container access');
}
} catch {
status.issues.push('Could not verify Docker ROCm support');
}
return status;
}
/**
* Install AMD ROCm driver
*/
public async install(options: IDriverInstallOptions): Promise<boolean> {
if (!await this.isRoot()) {
logger.error('Root privileges required to install AMD ROCm drivers');
return false;
}
const distro = await this.getLinuxDistro();
logger.info(`Detected Linux distribution: ${distro.id} ${distro.version}`);
try {
if (distro.id === 'ubuntu') {
return await this.installOnUbuntu(options);
} else if (distro.id === 'rhel' || distro.id === 'centos' || distro.id === 'rocky' || distro.id === 'almalinux') {
return await this.installOnRhel(options);
} else {
logger.error(`Unsupported distribution: ${distro.id}`);
logger.info('Please install ROCm drivers manually from https://rocm.docs.amd.com/');
return false;
}
} catch (error) {
logger.error(`Failed to install AMD ROCm drivers: ${error instanceof Error ? error.message : String(error)}`);
return false;
}
}
/**
* Install on Ubuntu
*/
private async installOnUbuntu(options: IDriverInstallOptions): Promise<boolean> {
logger.info('Installing AMD ROCm on Ubuntu...');
// Install prerequisites
await this.aptUpdate();
await this.aptInstall(['wget', 'gnupg2']);
// Add ROCm repository
const rocmVersion = options.toolkitVersion || '6.0';
const ubuntuVersion = (await this.getLinuxDistro()).version.replace('.', '');
// Download and install ROCm repository
await this.execCommand(
`wget -q https://repo.radeon.com/rocm/rocm.gpg.key -O - | apt-key add -`,
);
await this.execCommand(
`echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/${rocmVersion} ubuntu main" > /etc/apt/sources.list.d/rocm.list`,
);
// Add AMDGPU repository
await this.execCommand(
`echo "deb [arch=amd64] https://repo.radeon.com/amdgpu/${rocmVersion}/ubuntu ${ubuntuVersion === '2204' ? 'jammy' : 'focal'} main" > /etc/apt/sources.list.d/amdgpu.list`,
);
await this.aptUpdate();
// Install AMDGPU driver and ROCm
await this.aptInstall('amdgpu-dkms');
if (options.installToolkit) {
await this.aptInstall('rocm-hip-sdk');
} else {
await this.aptInstall('rocm-smi-lib');
}
// Add user to video and render groups
await this.execCommand('usermod -a -G video,render $SUDO_USER || true');
// Install container support if requested
if (options.installContainerSupport) {
await this.installContainerSupport();
}
logger.success('AMD ROCm installation completed');
logger.warn('A system reboot is required to load the new driver');
logger.info('After reboot, verify with: rocm-smi');
return true;
}
/**
* Install on RHEL
*/
private async installOnRhel(options: IDriverInstallOptions): Promise<boolean> {
logger.info('Installing AMD ROCm on RHEL/CentOS...');
const rocmVersion = options.toolkitVersion || '6.0';
const distro = await this.getLinuxDistro();
const rhelVersion = distro.version.split('.')[0];
// Add EPEL repository
await this.dnfInstall('epel-release');
// Add ROCm repository
await this.execCommand(
`cat <<EOF > /etc/yum.repos.d/rocm.repo
[ROCm]
name=ROCm
baseurl=https://repo.radeon.com/rocm/yum/${rocmVersion}/main
enabled=1
gpgcheck=1
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
EOF`,
);
// Add AMDGPU repository
await this.execCommand(
`cat <<EOF > /etc/yum.repos.d/amdgpu.repo
[amdgpu]
name=amdgpu
baseurl=https://repo.radeon.com/amdgpu/${rocmVersion}/rhel/${rhelVersion}/main/x86_64/
enabled=1
gpgcheck=1
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
EOF`,
);
// Install AMDGPU driver
await this.dnfInstall('amdgpu-dkms');
if (options.installToolkit) {
await this.dnfInstall('rocm-hip-sdk');
} else {
await this.dnfInstall('rocm-smi-lib');
}
// Add user to video and render groups
await this.execCommand('usermod -a -G video,render $SUDO_USER || true');
// Install container support if requested
if (options.installContainerSupport) {
await this.installContainerSupport();
}
logger.success('AMD ROCm installation completed');
logger.warn('A system reboot is required to load the new driver');
return true;
}
/**
* Install container support for AMD GPUs
*/
public async installContainerSupport(): Promise<boolean> {
logger.info('Configuring Docker for AMD ROCm...');
try {
// AMD ROCm containers work by passing through device files
// No special runtime needed, just need to pass --device flags
// Verify device files exist
const { stdout: devices } = await this.execCommand('ls -la /dev/kfd /dev/dri/render* 2>/dev/null || true');
if (!devices.includes('/dev/kfd')) {
logger.warn('/dev/kfd not found. ROCm driver may not be properly loaded.');
logger.info('Try rebooting the system after driver installation.');
return false;
}
// Set permissions
await this.execCommand('chmod 666 /dev/kfd /dev/dri/render* || true');
logger.success('AMD ROCm container support configured');
logger.info('Use the following Docker flags for ROCm containers:');
logger.info(' --device=/dev/kfd --device=/dev/dri --group-add video');
return true;
} catch (error) {
logger.error(`Failed to configure ROCm container support: ${error instanceof Error ? error.message : String(error)}`);
return false;
}
}
/**
* Get available ROCm versions
*/
public async getAvailableVersions(): Promise<string[]> {
// ROCm has a standard set of supported versions
return ['6.0', '5.7', '5.6', '5.5', '5.4'];
}
}

217
ts/drivers/base-driver.ts Normal file
View File

@@ -0,0 +1,217 @@
/**
* Base Driver Class
*
* Abstract base class for GPU driver management.
*/
import { exec } from 'node:child_process';
import { promisify } from 'node:util';
import type { IDriverStatus, TGpuVendor } from '../interfaces/gpu.ts';
import { logger } from '../logger.ts';
const execAsync = promisify(exec);
/**
* Options for driver installation
*/
export interface IDriverInstallOptions {
/** Whether to install the GPU toolkit (CUDA, ROCm, oneAPI) */
installToolkit: boolean;
/** Whether to install container support (nvidia-docker, etc.) */
installContainerSupport: boolean;
/** Specific driver version to install (optional) */
driverVersion?: string;
/** Specific toolkit version to install (optional) */
toolkitVersion?: string;
/** Whether to run non-interactively */
nonInteractive: boolean;
}
/**
* Abstract base class for GPU drivers
*/
export abstract class BaseDriver {
/** GPU vendor this driver supports */
public abstract readonly vendor: TGpuVendor;
/** Display name for this driver */
public abstract readonly displayName: string;
/**
* Check if the driver is installed
*/
public abstract isInstalled(): Promise<boolean>;
/**
* Get the current driver status
*/
public abstract getStatus(): Promise<IDriverStatus>;
/**
* Install the driver
*/
public abstract install(options: IDriverInstallOptions): Promise<boolean>;
/**
* Install container runtime support (e.g., nvidia-docker)
*/
public abstract installContainerSupport(): Promise<boolean>;
/**
* Get available driver versions
*/
public abstract getAvailableVersions(): Promise<string[]>;
/**
* Execute a shell command with error handling
*/
protected async execCommand(
command: string,
options: { timeout?: number; ignoreErrors?: boolean } = {},
): Promise<{ stdout: string; stderr: string }> {
const { timeout = 30000, ignoreErrors = false } = options;
try {
const result = await execAsync(command, { timeout });
return { stdout: result.stdout, stderr: result.stderr };
} catch (error) {
if (ignoreErrors) {
return { stdout: '', stderr: String(error) };
}
throw error;
}
}
/**
* Check if running as root
*/
protected async isRoot(): Promise<boolean> {
try {
const { stdout } = await this.execCommand('id -u');
return stdout.trim() === '0';
} catch {
return false;
}
}
/**
* Get the Linux distribution
*/
protected async getLinuxDistro(): Promise<{ id: string; version: string }> {
try {
const { stdout } = await this.execCommand('cat /etc/os-release', { ignoreErrors: true });
const idMatch = stdout.match(/^ID=["']?(\w+)["']?$/m);
const versionMatch = stdout.match(/^VERSION_ID=["']?([\d.]+)["']?$/m);
return {
id: idMatch ? idMatch[1].toLowerCase() : 'unknown',
version: versionMatch ? versionMatch[1] : '',
};
} catch {
return { id: 'unknown', version: '' };
}
}
/**
* Check if a package is installed (apt-based)
*/
protected async isAptPackageInstalled(packageName: string): Promise<boolean> {
try {
const { stdout } = await this.execCommand(`dpkg -l ${packageName} 2>/dev/null | grep "^ii"`, {
ignoreErrors: true,
});
return stdout.includes(packageName);
} catch {
return false;
}
}
/**
* Check if a package is installed (dnf/yum-based)
*/
protected async isDnfPackageInstalled(packageName: string): Promise<boolean> {
try {
const { stdout } = await this.execCommand(`rpm -q ${packageName} 2>/dev/null`, {
ignoreErrors: true,
});
return !stdout.includes('not installed');
} catch {
return false;
}
}
/**
* Run apt-get update
*/
protected async aptUpdate(): Promise<void> {
logger.info('Updating package lists...');
await this.execCommand('apt-get update', { timeout: 120000 });
}
/**
* Install a package using apt
*/
protected async aptInstall(packages: string | string[]): Promise<void> {
const pkgList = Array.isArray(packages) ? packages.join(' ') : packages;
logger.info(`Installing packages: ${pkgList}`);
await this.execCommand(`DEBIAN_FRONTEND=noninteractive apt-get install -y ${pkgList}`, {
timeout: 600000, // 10 minutes for large packages
});
}
/**
* Install a package using dnf
*/
protected async dnfInstall(packages: string | string[]): Promise<void> {
const pkgList = Array.isArray(packages) ? packages.join(' ') : packages;
logger.info(`Installing packages: ${pkgList}`);
await this.execCommand(`dnf install -y ${pkgList}`, {
timeout: 600000,
});
}
/**
* Add an apt repository
*/
protected async addAptRepository(repo: string, keyUrl?: string): Promise<void> {
if (keyUrl) {
// Add GPG key
await this.execCommand(`curl -fsSL ${keyUrl} | gpg --dearmor -o /usr/share/keyrings/$(basename ${keyUrl}).gpg`);
}
await this.execCommand(`add-apt-repository -y "${repo}"`);
}
/**
* Log driver status summary
*/
public async logStatus(): Promise<void> {
const status = await this.getStatus();
logger.logBoxTitle(`${this.displayName} Driver Status`, 60, status.installed ? 'success' : 'warning');
logger.logBoxLine(`Installed: ${status.installed ? 'Yes' : 'No'}`);
if (status.installed) {
if (status.version) {
logger.logBoxLine(`Driver Version: ${status.version}`);
}
if (status.toolkitVersion) {
logger.logBoxLine(`Toolkit Version: ${status.toolkitVersion}`);
}
logger.logBoxLine(`Container Support: ${status.containerSupport ? 'Yes' : 'No'}`);
if (status.containerRuntimeVersion) {
logger.logBoxLine(`Container Runtime: ${status.containerRuntimeVersion}`);
}
}
if (status.issues.length > 0) {
logger.logBoxLine('');
logger.logBoxLine('Issues:');
for (const issue of status.issues) {
logger.logBoxLine(` - ${issue}`);
}
}
logger.logBoxEnd();
}
}

View File

@@ -0,0 +1,267 @@
/**
* Driver Manager
*
* Coordinates detection and installation of GPU drivers across all vendors.
*/
import type { IDriverStatus, TGpuVendor } from '../interfaces/gpu.ts';
import { logger } from '../logger.ts';
import { GpuDetector } from '../hardware/gpu-detector.ts';
import { BaseDriver, type IDriverInstallOptions } from './base-driver.ts';
import { NvidiaDriver } from './nvidia.ts';
import { AmdDriver } from './amd.ts';
import { IntelDriver } from './intel.ts';
/**
* Driver Manager - coordinates GPU driver management
*/
export class DriverManager {
private gpuDetector: GpuDetector;
private drivers: Map<TGpuVendor, BaseDriver>;
constructor() {
this.gpuDetector = new GpuDetector();
this.drivers = new Map([
['nvidia', new NvidiaDriver()],
['amd', new AmdDriver()],
['intel', new IntelDriver()],
]);
}
/**
* Get driver manager for a specific vendor
*/
public getDriver(vendor: TGpuVendor): BaseDriver | undefined {
return this.drivers.get(vendor);
}
/**
* Get status of all GPU drivers
*/
public async getAllDriverStatus(): Promise<Map<TGpuVendor, IDriverStatus>> {
const statuses = new Map<TGpuVendor, IDriverStatus>();
// Only check drivers for detected GPUs
const gpus = await this.gpuDetector.detectGpus();
const detectedVendors = new Set(gpus.map((g) => g.vendor));
for (const vendor of detectedVendors) {
if (vendor === 'unknown') continue;
const driver = this.drivers.get(vendor);
if (driver) {
const status = await driver.getStatus();
statuses.set(vendor, status);
}
}
return statuses;
}
/**
* Check drivers for all detected GPUs
*/
public async checkAllDrivers(): Promise<{
allInstalled: boolean;
allContainerReady: boolean;
issues: string[];
}> {
const gpus = await this.gpuDetector.detectGpus();
const issues: string[] = [];
let allInstalled = true;
let allContainerReady = true;
if (gpus.length === 0) {
issues.push('No GPUs detected');
return { allInstalled: false, allContainerReady: false, issues };
}
// Group GPUs by vendor
const vendorCounts = new Map<TGpuVendor, number>();
for (const gpu of gpus) {
vendorCounts.set(gpu.vendor, (vendorCounts.get(gpu.vendor) || 0) + 1);
}
// Check each vendor
for (const [vendor, count] of vendorCounts) {
if (vendor === 'unknown') {
issues.push(`${count} GPU(s) with unknown vendor - cannot manage drivers`);
continue;
}
const driver = this.drivers.get(vendor);
if (!driver) {
issues.push(`No driver manager for ${vendor}`);
continue;
}
const status = await driver.getStatus();
if (!status.installed) {
allInstalled = false;
issues.push(`${driver.displayName} driver not installed for ${count} GPU(s)`);
}
if (!status.containerSupport) {
allContainerReady = false;
issues.push(`${driver.displayName} container support not configured`);
}
// Add specific issues
issues.push(...status.issues);
}
return { allInstalled, allContainerReady, issues };
}
/**
* Install drivers for all detected GPUs
*/
public async installAllDrivers(options: Partial<IDriverInstallOptions> = {}): Promise<boolean> {
const fullOptions: IDriverInstallOptions = {
installToolkit: options.installToolkit ?? true,
installContainerSupport: options.installContainerSupport ?? true,
nonInteractive: options.nonInteractive ?? false,
driverVersion: options.driverVersion,
toolkitVersion: options.toolkitVersion,
};
const gpus = await this.gpuDetector.detectGpus();
const vendors = new Set(gpus.map((g) => g.vendor).filter((v) => v !== 'unknown'));
if (vendors.size === 0) {
logger.error('No supported GPUs detected');
return false;
}
let allSuccess = true;
for (const vendor of vendors) {
const driver = this.drivers.get(vendor);
if (!driver) continue;
logger.info(`Installing ${driver.displayName} drivers...`);
const success = await driver.install(fullOptions);
if (!success) {
allSuccess = false;
logger.error(`Failed to install ${driver.displayName} drivers`);
}
}
return allSuccess;
}
/**
* Install container support for all GPUs
*/
public async installContainerSupport(): Promise<boolean> {
const gpus = await this.gpuDetector.detectGpus();
const vendors = new Set(gpus.map((g) => g.vendor).filter((v) => v !== 'unknown'));
let allSuccess = true;
for (const vendor of vendors) {
const driver = this.drivers.get(vendor);
if (!driver) continue;
const success = await driver.installContainerSupport();
if (!success) {
allSuccess = false;
}
}
return allSuccess;
}
/**
* Print driver status summary
*/
public async printDriverStatus(): Promise<void> {
const gpus = await this.gpuDetector.detectGpus();
if (gpus.length === 0) {
logger.logBox('Driver Status', ['No GPUs detected'], 50, 'warning');
return;
}
// Group by vendor
const vendorGpus = new Map<TGpuVendor, typeof gpus>();
for (const gpu of gpus) {
if (!vendorGpus.has(gpu.vendor)) {
vendorGpus.set(gpu.vendor, []);
}
vendorGpus.get(gpu.vendor)!.push(gpu);
}
// Print status for each vendor
for (const [vendor, gpuList] of vendorGpus) {
if (vendor === 'unknown') {
logger.logBox('Unknown GPUs', [
`${gpuList.length} GPU(s) with unknown vendor`,
'Manual driver installation may be required',
], 50, 'warning');
continue;
}
const driver = this.drivers.get(vendor);
if (driver) {
await driver.logStatus();
}
}
}
/**
* Get Docker run arguments for GPU support
*/
public async getDockerGpuArgs(gpuIds?: string[]): Promise<string[]> {
const gpus = await this.gpuDetector.detectGpus();
const args: string[] = [];
// Filter to specific GPUs if provided
const targetGpus = gpuIds
? gpus.filter((g) => gpuIds.includes(g.id))
: gpus;
if (targetGpus.length === 0) {
return args;
}
// Determine vendor (assume single vendor for simplicity)
const vendor = targetGpus[0].vendor;
switch (vendor) {
case 'nvidia':
// NVIDIA uses nvidia-docker runtime
args.push('--runtime=nvidia');
if (gpuIds && gpuIds.length > 0) {
// Use specific GPU indices
const indices = targetGpus.map((g) => g.index).join(',');
args.push(`--gpus="device=${indices}"`);
} else {
args.push('--gpus=all');
}
break;
case 'amd':
// AMD uses device passthrough
args.push('--device=/dev/kfd');
for (const gpu of targetGpus) {
args.push(`--device=/dev/dri/renderD${128 + gpu.index}`);
}
args.push('--group-add=video');
args.push('--security-opt=seccomp=unconfined');
break;
case 'intel':
// Intel uses device passthrough
for (const gpu of targetGpus) {
args.push(`--device=/dev/dri/renderD${128 + gpu.index}`);
}
args.push('--group-add=render');
break;
}
return args;
}
}

11
ts/drivers/index.ts Normal file
View File

@@ -0,0 +1,11 @@
/**
* Driver Management Module
*
* Exports all driver detection and installation functionality.
*/
export { BaseDriver, type IDriverInstallOptions } from './base-driver.ts';
export { NvidiaDriver } from './nvidia.ts';
export { AmdDriver } from './amd.ts';
export { IntelDriver } from './intel.ts';
export { DriverManager } from './driver-manager.ts';

339
ts/drivers/intel.ts Normal file
View File

@@ -0,0 +1,339 @@
/**
* Intel Driver Management
*
* Handles Intel Arc GPU driver detection, installation, and oneAPI setup.
*/
import type { IDriverStatus } from '../interfaces/gpu.ts';
import { logger } from '../logger.ts';
import { BaseDriver, type IDriverInstallOptions } from './base-driver.ts';
/**
* Intel Arc/oneAPI Driver Manager
*/
export class IntelDriver extends BaseDriver {
public readonly vendor = 'intel' as const;
public readonly displayName = 'Intel Arc';
/**
* Check if Intel GPU driver is installed
*/
public async isInstalled(): Promise<boolean> {
try {
// Check for xpu-smi or intel_gpu_top
const { stdout } = await this.execCommand(
'xpu-smi discovery 2>/dev/null || intel_gpu_top -l 2>/dev/null || ls /dev/dri/renderD* 2>/dev/null | grep -c renderD',
{ timeout: 5000, ignoreErrors: true },
);
return stdout.trim().length > 0 && !stdout.includes('not found');
} catch {
return false;
}
}
/**
* Get Intel GPU driver status
*/
public async getStatus(): Promise<IDriverStatus> {
const status: IDriverStatus = {
vendor: 'intel',
installed: false,
containerSupport: false,
issues: [],
};
// Check for i915 driver (Intel integrated/Arc)
try {
const { stdout: driverInfo } = await this.execCommand(
'modinfo i915 2>/dev/null | grep "^version:"',
{ timeout: 5000, ignoreErrors: true },
);
if (driverInfo.includes('version')) {
status.installed = true;
const match = driverInfo.match(/version:\s*(\S+)/i);
if (match) {
status.version = match[1];
}
}
} catch {
// i915 module info not available
}
// Check for xpu-smi (Intel Arc specific)
try {
const { stdout: xpuVersion } = await this.execCommand(
'xpu-smi --version 2>/dev/null',
{ timeout: 5000, ignoreErrors: true },
);
if (xpuVersion.includes('xpu-smi')) {
status.installed = true;
const match = xpuVersion.match(/(\d+\.\d+(?:\.\d+)?)/);
if (match) {
status.version = match[1];
}
}
} catch {
// xpu-smi not available
}
// Check oneAPI toolkit
try {
const { stdout: oneApiVersion } = await this.execCommand(
'ls /opt/intel/oneapi/compiler/*/env/vars.sh 2>/dev/null | head -1 | xargs dirname | xargs dirname | xargs basename',
{ timeout: 5000, ignoreErrors: true },
);
if (oneApiVersion.trim()) {
status.toolkitVersion = oneApiVersion.trim();
}
} catch {
// oneAPI not installed
}
// Check container support
try {
const { stdout: renderDevices } = await this.execCommand(
'ls /dev/dri/renderD* 2>/dev/null',
{ timeout: 5000, ignoreErrors: true },
);
if (renderDevices.includes('renderD')) {
status.containerSupport = true;
} else {
status.issues.push('Intel GPU render devices not available');
}
} catch {
status.issues.push('Could not check Intel GPU device availability');
}
if (!status.installed) {
status.issues.push('Intel GPU driver not detected');
}
return status;
}
/**
* Install Intel GPU drivers and optionally oneAPI
*/
public async install(options: IDriverInstallOptions): Promise<boolean> {
if (!await this.isRoot()) {
logger.error('Root privileges required to install Intel GPU drivers');
return false;
}
const distro = await this.getLinuxDistro();
logger.info(`Detected Linux distribution: ${distro.id} ${distro.version}`);
try {
if (distro.id === 'ubuntu') {
return await this.installOnUbuntu(options);
} else if (distro.id === 'fedora') {
return await this.installOnFedora(options);
} else {
logger.error(`Unsupported distribution for Intel Arc: ${distro.id}`);
logger.info('Please install Intel drivers manually from https://dgpu-docs.intel.com/');
return false;
}
} catch (error) {
logger.error(`Failed to install Intel drivers: ${error instanceof Error ? error.message : String(error)}`);
return false;
}
}
/**
* Install on Ubuntu
*/
private async installOnUbuntu(options: IDriverInstallOptions): Promise<boolean> {
logger.info('Installing Intel GPU drivers on Ubuntu...');
// Install prerequisites
await this.aptUpdate();
await this.aptInstall(['wget', 'gpg']);
// Add Intel graphics repository
await this.execCommand(
'wget -qO - https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg',
);
const distro = await this.getLinuxDistro();
const ubuntuCodename = distro.version === '22.04' ? 'jammy' : distro.version === '24.04' ? 'noble' : 'jammy';
await this.execCommand(
`echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu ${ubuntuCodename} arc" > /etc/apt/sources.list.d/intel-graphics.list`,
);
await this.aptUpdate();
// Install Intel GPU packages
await this.aptInstall([
'intel-opencl-icd',
'intel-level-zero-gpu',
'level-zero',
'intel-media-va-driver-non-free',
'libmfx1',
'libmfxgen1',
'libvpl2',
'libegl-mesa0',
'libegl1-mesa',
'libegl1-mesa-dev',
'libgbm1',
'libgl1-mesa-dev',
'libgl1-mesa-dri',
'libglapi-mesa',
'libgles2-mesa-dev',
'libglx-mesa0',
'libigdgmm12',
'libxatracker2',
'mesa-va-drivers',
'mesa-vdpau-drivers',
'mesa-vulkan-drivers',
'va-driver-all',
]);
// Install xpu-smi for monitoring
await this.aptInstall('xpu-smi');
// Install oneAPI toolkit if requested
if (options.installToolkit) {
await this.installOneApi();
}
// Add user to video and render groups
await this.execCommand('usermod -a -G video,render $SUDO_USER || true');
// Install container support if requested
if (options.installContainerSupport) {
await this.installContainerSupport();
}
logger.success('Intel GPU driver installation completed');
logger.info('Verify installation with: xpu-smi discovery');
return true;
}
/**
* Install on Fedora
*/
private async installOnFedora(options: IDriverInstallOptions): Promise<boolean> {
logger.info('Installing Intel GPU drivers on Fedora...');
// Intel GPU support is included in newer Fedora kernels
// We just need to install the user-space components
await this.dnfInstall([
'intel-media-driver',
'libva-intel-driver',
'intel-compute-runtime',
'level-zero',
'oneapi-level-zero',
]);
// Try to install xpu-smi from Intel repo
try {
await this.execCommand(
'dnf copr enable -y intel/oneapi || true',
);
await this.dnfInstall('xpu-smi');
} catch {
logger.warn('Could not install xpu-smi. Intel Arc monitoring may be limited.');
}
// Add user to video and render groups
await this.execCommand('usermod -a -G video,render $SUDO_USER || true');
// Install oneAPI if requested
if (options.installToolkit) {
await this.installOneApi();
}
// Install container support if requested
if (options.installContainerSupport) {
await this.installContainerSupport();
}
logger.success('Intel GPU driver installation completed');
return true;
}
/**
* Install Intel oneAPI toolkit
*/
private async installOneApi(): Promise<void> {
logger.info('Installing Intel oneAPI toolkit...');
const distro = await this.getLinuxDistro();
if (distro.id === 'ubuntu' || distro.id === 'debian') {
// Add Intel oneAPI repository
await this.execCommand(
'wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null',
);
await this.execCommand(
'echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list',
);
await this.aptUpdate();
await this.aptInstall('intel-basekit');
} else if (distro.id === 'fedora') {
// Add Intel oneAPI repository
await this.execCommand(
`cat <<EOF > /etc/yum.repos.d/oneAPI.repo
[oneAPI]
name=Intel oneAPI repository
baseurl=https://yum.repos.intel.com/oneapi
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
EOF`,
);
await this.dnfInstall('intel-basekit');
}
logger.success('Intel oneAPI toolkit installed');
logger.info('Source the environment with: source /opt/intel/oneapi/setvars.sh');
}
/**
* Install container support for Intel GPUs
*/
public async installContainerSupport(): Promise<boolean> {
logger.info('Configuring Docker for Intel GPUs...');
try {
// Intel GPUs work by passing through device files
// Verify render devices exist
const { stdout: devices } = await this.execCommand('ls -la /dev/dri/renderD* 2>/dev/null || true');
if (!devices.includes('renderD')) {
logger.warn('/dev/dri/renderD* not found. Intel GPU driver may not be properly loaded.');
return false;
}
// Set permissions
await this.execCommand('chmod 666 /dev/dri/renderD* || true');
logger.success('Intel GPU container support configured');
logger.info('Use the following Docker flags for Intel GPU containers:');
logger.info(' --device=/dev/dri --group-add render');
return true;
} catch (error) {
logger.error(`Failed to configure Intel container support: ${error instanceof Error ? error.message : String(error)}`);
return false;
}
}
/**
* Get available driver versions
*/
public async getAvailableVersions(): Promise<string[]> {
// Intel Arc drivers are typically tied to kernel versions
// Return oneAPI versions as reference
return ['2024.0', '2023.2', '2023.1', '2023.0'];
}
}

318
ts/drivers/nvidia.ts Normal file
View File

@@ -0,0 +1,318 @@
/**
* NVIDIA Driver Management
*
* Handles NVIDIA driver detection, installation, and container toolkit setup.
*/
import type { IDriverStatus } from '../interfaces/gpu.ts';
import { logger } from '../logger.ts';
import { BaseDriver, type IDriverInstallOptions } from './base-driver.ts';
/**
* NVIDIA Driver Manager
*/
export class NvidiaDriver extends BaseDriver {
public readonly vendor = 'nvidia' as const;
public readonly displayName = 'NVIDIA';
/**
* Check if the NVIDIA driver is installed
*/
public async isInstalled(): Promise<boolean> {
try {
const { stdout } = await this.execCommand('nvidia-smi --query-gpu=driver_version --format=csv,noheader', {
timeout: 5000,
ignoreErrors: true,
});
return stdout.trim().length > 0;
} catch {
return false;
}
}
/**
* Get NVIDIA driver status
*/
public async getStatus(): Promise<IDriverStatus> {
const status: IDriverStatus = {
vendor: 'nvidia',
installed: false,
containerSupport: false,
issues: [],
};
// Check if nvidia-smi is available
try {
const { stdout: driverVersion } = await this.execCommand(
'nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1',
{ timeout: 5000 },
);
status.installed = true;
status.version = driverVersion.trim();
} catch {
status.issues.push('NVIDIA driver not installed or nvidia-smi not available');
return status;
}
// Check CUDA toolkit
try {
const { stdout: cudaVersion } = await this.execCommand(
'nvcc --version 2>/dev/null | grep "release" | sed "s/.*release \\([0-9.]*\\).*/\\1/"',
{ timeout: 5000, ignoreErrors: true },
);
if (cudaVersion.trim()) {
status.toolkitVersion = cudaVersion.trim();
}
} catch {
// CUDA toolkit not installed
}
// Check nvidia-container-toolkit
try {
const { stdout: containerVersion } = await this.execCommand(
'nvidia-container-cli --version 2>&1 | head -1',
{ timeout: 5000, ignoreErrors: true },
);
if (containerVersion.includes('version')) {
status.containerSupport = true;
const match = containerVersion.match(/version (\d+\.\d+\.\d+)/);
if (match) {
status.containerRuntimeVersion = match[1];
}
}
} catch {
status.issues.push('NVIDIA Container Toolkit not installed');
}
// Check if Docker has nvidia runtime
try {
const { stdout: dockerInfo } = await this.execCommand(
'docker info --format "{{.Runtimes}}" 2>/dev/null',
{ timeout: 5000, ignoreErrors: true },
);
if (!dockerInfo.includes('nvidia')) {
status.issues.push('Docker nvidia runtime not configured');
}
} catch {
// Docker check failed
}
return status;
}
/**
* Install NVIDIA driver and optionally CUDA toolkit
*/
public async install(options: IDriverInstallOptions): Promise<boolean> {
if (!await this.isRoot()) {
logger.error('Root privileges required to install NVIDIA drivers');
return false;
}
const distro = await this.getLinuxDistro();
logger.info(`Detected Linux distribution: ${distro.id} ${distro.version}`);
try {
if (distro.id === 'ubuntu' || distro.id === 'debian') {
return await this.installOnDebian(options);
} else if (distro.id === 'fedora' || distro.id === 'rhel' || distro.id === 'centos' || distro.id === 'rocky' || distro.id === 'almalinux') {
return await this.installOnRhel(options);
} else {
logger.error(`Unsupported distribution: ${distro.id}`);
logger.info('Please install NVIDIA drivers manually');
return false;
}
} catch (error) {
logger.error(`Failed to install NVIDIA drivers: ${error instanceof Error ? error.message : String(error)}`);
return false;
}
}
/**
* Install on Debian/Ubuntu
*/
private async installOnDebian(options: IDriverInstallOptions): Promise<boolean> {
logger.info('Installing NVIDIA drivers on Debian/Ubuntu...');
// Add NVIDIA repository
await this.aptUpdate();
// Install prerequisites
await this.aptInstall(['software-properties-common', 'build-essential', 'dkms']);
// Add NVIDIA PPA (for Ubuntu)
try {
await this.execCommand('add-apt-repository -y ppa:graphics-drivers/ppa 2>/dev/null || true');
await this.aptUpdate();
} catch {
// PPA might not be available on all systems
}
// Install NVIDIA driver
const driverPackage = options.driverVersion
? `nvidia-driver-${options.driverVersion}`
: 'nvidia-driver-535'; // Default to stable version
await this.aptInstall(driverPackage);
// Install CUDA toolkit if requested
if (options.installToolkit) {
await this.installCudaToolkit(options);
}
// Install container support if requested
if (options.installContainerSupport) {
await this.installContainerSupport();
}
logger.success('NVIDIA driver installation completed');
logger.warn('A system reboot is required to load the new driver');
return true;
}
/**
* Install on RHEL/Fedora
*/
private async installOnRhel(options: IDriverInstallOptions): Promise<boolean> {
logger.info('Installing NVIDIA drivers on RHEL/Fedora...');
// Install prerequisites
await this.dnfInstall(['kernel-devel', 'kernel-headers', 'gcc', 'make', 'dkms', 'acpid']);
// Add NVIDIA CUDA repository
const distro = await this.getLinuxDistro();
const repoUrl = `https://developer.download.nvidia.com/compute/cuda/repos/rhel${distro.version.split('.')[0]}/x86_64/cuda-rhel${distro.version.split('.')[0]}.repo`;
await this.execCommand(`dnf config-manager --add-repo ${repoUrl}`);
// Install NVIDIA driver
await this.dnfInstall('nvidia-driver-latest-dkms');
// Install CUDA toolkit if requested
if (options.installToolkit) {
await this.dnfInstall('cuda');
}
// Install container support if requested
if (options.installContainerSupport) {
await this.installContainerSupport();
}
logger.success('NVIDIA driver installation completed');
logger.warn('A system reboot is required to load the new driver');
return true;
}
/**
* Install CUDA toolkit
*/
private async installCudaToolkit(options: IDriverInstallOptions): Promise<void> {
logger.info('Installing CUDA toolkit...');
const distro = await this.getLinuxDistro();
if (distro.id === 'ubuntu' || distro.id === 'debian') {
// Add CUDA repository
const cudaKeyUrl = 'https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb';
await this.execCommand(`wget -q ${cudaKeyUrl} -O /tmp/cuda-keyring.deb && dpkg -i /tmp/cuda-keyring.deb`);
await this.aptUpdate();
const cudaPackage = options.toolkitVersion
? `cuda-toolkit-${options.toolkitVersion.replace('.', '-')}`
: 'cuda-toolkit';
await this.aptInstall(cudaPackage);
}
}
/**
* Install NVIDIA Container Toolkit
*/
public async installContainerSupport(): Promise<boolean> {
if (!await this.isRoot()) {
logger.error('Root privileges required to install NVIDIA Container Toolkit');
return false;
}
const distro = await this.getLinuxDistro();
logger.info('Installing NVIDIA Container Toolkit...');
try {
if (distro.id === 'ubuntu' || distro.id === 'debian') {
// Add repository
await this.execCommand(
'curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg',
);
const distribution = `${distro.id}${distro.version}`;
await this.execCommand(
`curl -s -L https://nvidia.github.io/libnvidia-container/${distribution}/libnvidia-container.list | ` +
'sed "s#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g" | ' +
'tee /etc/apt/sources.list.d/nvidia-container-toolkit.list',
);
await this.aptUpdate();
await this.aptInstall('nvidia-container-toolkit');
} else {
// RHEL/Fedora
await this.execCommand(
'curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | ' +
'tee /etc/yum.repos.d/nvidia-container-toolkit.repo',
);
await this.dnfInstall('nvidia-container-toolkit');
}
// Configure Docker runtime
await this.configureDockerRuntime();
logger.success('NVIDIA Container Toolkit installed successfully');
return true;
} catch (error) {
logger.error(`Failed to install NVIDIA Container Toolkit: ${error instanceof Error ? error.message : String(error)}`);
return false;
}
}
/**
* Configure Docker to use NVIDIA runtime
*/
private async configureDockerRuntime(): Promise<void> {
logger.info('Configuring Docker to use NVIDIA runtime...');
try {
// Run nvidia-ctk to configure Docker
await this.execCommand('nvidia-ctk runtime configure --runtime=docker');
// Restart Docker
await this.execCommand('systemctl restart docker');
logger.success('Docker configured to use NVIDIA runtime');
} catch (error) {
logger.warn(`Could not configure Docker runtime automatically: ${error instanceof Error ? error.message : String(error)}`);
logger.info('Please run: nvidia-ctk runtime configure --runtime=docker');
}
}
/**
* Get available driver versions
*/
public async getAvailableVersions(): Promise<string[]> {
const versions: string[] = [];
try {
const distro = await this.getLinuxDistro();
if (distro.id === 'ubuntu' || distro.id === 'debian') {
const { stdout } = await this.execCommand(
'apt-cache search nvidia-driver | grep "^nvidia-driver-[0-9]" | sed "s/nvidia-driver-\\([0-9]*\\).*/\\1/" | sort -rn | uniq',
{ ignoreErrors: true },
);
versions.push(...stdout.trim().split('\n').filter((v: string) => v.trim()));
}
} catch {
// Failed to get versions
}
return versions;
}
}