284 lines
8.0 KiB
TypeScript
284 lines
8.0 KiB
TypeScript
/**
|
|
* ModelGrid Systemd Service Manager
|
|
*
|
|
* Handles installation, removal, and control of the ModelGrid systemd service.
|
|
*/
|
|
|
|
import process from 'node:process';
|
|
import { promises as fs } from 'node:fs';
|
|
import { execSync } from 'node:child_process';
|
|
import { logger } from './logger.ts';
|
|
import { theme, symbols } from './colors.ts';
|
|
import { PATHS, VERSION } from './constants.ts';
|
|
|
|
/**
|
|
* Systemd service manager for ModelGrid
|
|
*/
|
|
export class Systemd {
|
|
private readonly serviceFilePath = '/etc/systemd/system/modelgrid.service';
|
|
|
|
private readonly serviceTemplate = `[Unit]
|
|
Description=ModelGrid - AI Infrastructure Management
|
|
After=network.target docker.service
|
|
Requires=docker.service
|
|
|
|
[Service]
|
|
ExecStart=/usr/local/bin/modelgrid service start-daemon
|
|
Restart=always
|
|
RestartSec=10
|
|
User=root
|
|
Group=root
|
|
Environment=PATH=/usr/bin:/usr/local/bin
|
|
WorkingDirectory=/opt/modelgrid
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
`;
|
|
|
|
/**
|
|
* Install the systemd service
|
|
*/
|
|
public async install(): Promise<void> {
|
|
try {
|
|
// Check if configuration exists
|
|
await this.checkConfigExists();
|
|
|
|
// Write service file
|
|
await fs.writeFile(this.serviceFilePath, this.serviceTemplate);
|
|
logger.logBoxTitle('Service Installation', 50);
|
|
logger.logBoxLine(`Service file created at ${this.serviceFilePath}`);
|
|
|
|
// Reload systemd
|
|
execSync('systemctl daemon-reload');
|
|
logger.logBoxLine('Systemd daemon reloaded');
|
|
|
|
// Enable service
|
|
execSync('systemctl enable modelgrid.service');
|
|
logger.logBoxLine('Service enabled to start on boot');
|
|
logger.logBoxEnd();
|
|
} catch (error) {
|
|
if (error instanceof Error && error.message === 'Configuration not found') {
|
|
throw error;
|
|
}
|
|
logger.error(`Failed to install systemd service: ${error}`);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Start the systemd service
|
|
*/
|
|
public async start(): Promise<void> {
|
|
try {
|
|
await this.checkConfigExists();
|
|
execSync('systemctl start modelgrid.service');
|
|
logger.logBoxTitle('Service Status', 45);
|
|
logger.logBoxLine('ModelGrid service started successfully');
|
|
logger.logBoxEnd();
|
|
} catch (error) {
|
|
if (error instanceof Error && error.message === 'Configuration not found') {
|
|
process.exit(1);
|
|
}
|
|
logger.error(`Failed to start service: ${error}`);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Stop the systemd service
|
|
*/
|
|
public stop(): void {
|
|
try {
|
|
execSync('systemctl stop modelgrid.service');
|
|
logger.success('ModelGrid service stopped');
|
|
} catch (error) {
|
|
logger.error(`Failed to stop service: ${error}`);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get status of the systemd service
|
|
*/
|
|
public async getStatus(): Promise<void> {
|
|
try {
|
|
// Display version
|
|
logger.log('');
|
|
logger.log(`${theme.dim('ModelGrid')} ${theme.dim('v' + VERSION)}`);
|
|
|
|
// Check if config exists
|
|
try {
|
|
await this.checkConfigExists();
|
|
} catch (_error) {
|
|
return;
|
|
}
|
|
|
|
// Display service status
|
|
await this.displayServiceStatus();
|
|
|
|
// Display container status
|
|
await this.displayContainerStatus();
|
|
|
|
// Display GPU status
|
|
await this.displayGpuStatus();
|
|
} catch (error) {
|
|
logger.error(`Failed to get status: ${error instanceof Error ? error.message : String(error)}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Display systemd service status
|
|
*/
|
|
private async displayServiceStatus(): Promise<void> {
|
|
try {
|
|
const serviceStatus = execSync('systemctl status modelgrid.service').toString();
|
|
const lines = serviceStatus.split('\n');
|
|
|
|
let isActive = false;
|
|
let pid = '';
|
|
let memory = '';
|
|
|
|
for (const line of lines) {
|
|
if (line.includes('Active:')) {
|
|
isActive = line.includes('active (running)');
|
|
} else if (line.includes('Main PID:')) {
|
|
const match = line.match(/Main PID:\s+(\d+)/);
|
|
if (match) pid = match[1];
|
|
} else if (line.includes('Memory:')) {
|
|
const match = line.match(/Memory:\s+([\d.]+[A-Z])/);
|
|
if (match) memory = match[1];
|
|
}
|
|
}
|
|
|
|
logger.log('');
|
|
if (isActive) {
|
|
logger.log(`${symbols.running} ${theme.success('Service:')} ${theme.statusActive('active (running)')}`);
|
|
} else {
|
|
logger.log(`${symbols.stopped} ${theme.dim('Service:')} ${theme.statusInactive('inactive')}`);
|
|
}
|
|
|
|
if (pid || memory) {
|
|
const details = [];
|
|
if (pid) details.push(`PID: ${theme.dim(pid)}`);
|
|
if (memory) details.push(`Memory: ${theme.dim(memory)}`);
|
|
logger.log(` ${details.join(' ')}`);
|
|
}
|
|
logger.log('');
|
|
} catch (_error) {
|
|
logger.log('');
|
|
logger.log(`${symbols.stopped} ${theme.dim('Service:')} ${theme.statusInactive('not installed')}`);
|
|
logger.log('');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Display container status
|
|
*/
|
|
private async displayContainerStatus(): Promise<void> {
|
|
try {
|
|
// Try to get container info from docker
|
|
const output = execSync('docker ps --filter "name=modelgrid" --format "{{.Names}}\\t{{.Status}}"', { encoding: 'utf-8' });
|
|
const lines = output.trim().split('\n').filter(l => l.trim());
|
|
|
|
if (lines.length === 0) {
|
|
logger.info('Containers: None running');
|
|
return;
|
|
}
|
|
|
|
logger.info(`Containers (${lines.length}):`);
|
|
|
|
for (const line of lines) {
|
|
const [name, status] = line.split('\t');
|
|
const isUp = status?.toLowerCase().includes('up');
|
|
|
|
logger.log(` ${isUp ? symbols.running : symbols.stopped} ${theme.highlight(name)} - ${isUp ? theme.success(status) : theme.dim(status)}`);
|
|
}
|
|
logger.log('');
|
|
} catch (_error) {
|
|
// Docker might not be running
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Display GPU status
|
|
*/
|
|
private async displayGpuStatus(): Promise<void> {
|
|
try {
|
|
// Try nvidia-smi
|
|
const output = execSync('nvidia-smi --query-gpu=name,utilization.gpu,memory.used,memory.total --format=csv,noheader,nounits', { encoding: 'utf-8' });
|
|
const lines = output.trim().split('\n');
|
|
|
|
if (lines.length === 0) {
|
|
return;
|
|
}
|
|
|
|
logger.info(`GPUs (${lines.length}):`);
|
|
|
|
for (const line of lines) {
|
|
const [name, util, memUsed, memTotal] = line.split(',').map(s => s.trim());
|
|
const memPercent = Math.round((parseInt(memUsed) / parseInt(memTotal)) * 100);
|
|
|
|
logger.log(` ${symbols.info} ${theme.gpuNvidia(name)}`);
|
|
logger.log(` Utilization: ${theme.highlight(util + '%')} Memory: ${theme.info(memUsed)}/${memTotal} MB (${memPercent}%)`);
|
|
}
|
|
logger.log('');
|
|
} catch (_error) {
|
|
// nvidia-smi might not be available
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Disable and uninstall the service
|
|
*/
|
|
public async disable(): Promise<void> {
|
|
try {
|
|
// Stop if running
|
|
try {
|
|
execSync('systemctl stop modelgrid.service');
|
|
logger.log('Service stopped');
|
|
} catch (_error) {
|
|
// Might not be running
|
|
}
|
|
|
|
// Disable
|
|
try {
|
|
execSync('systemctl disable modelgrid.service');
|
|
logger.log('Service disabled');
|
|
} catch (_error) {
|
|
// Might not be enabled
|
|
}
|
|
|
|
// Remove service file
|
|
try {
|
|
await fs.unlink(this.serviceFilePath);
|
|
logger.log('Service file removed');
|
|
} catch (_error) {
|
|
// Might not exist
|
|
}
|
|
|
|
// Reload systemd
|
|
execSync('systemctl daemon-reload');
|
|
logger.success('ModelGrid service uninstalled');
|
|
} catch (error) {
|
|
logger.error(`Failed to disable service: ${error}`);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if configuration file exists
|
|
*/
|
|
private async checkConfigExists(): Promise<void> {
|
|
try {
|
|
await fs.access(PATHS.CONFIG_FILE);
|
|
} catch (_error) {
|
|
logger.log('');
|
|
logger.error('No configuration found');
|
|
logger.log(` ${theme.dim('Config file:')} ${PATHS.CONFIG_FILE}`);
|
|
logger.log(` ${theme.dim('Run')} ${theme.command('modelgrid config init')} ${theme.dim('to create one')}`);
|
|
logger.log('');
|
|
throw new Error('Configuration not found');
|
|
}
|
|
}
|
|
}
|