initial
This commit is contained in:
8
ts/00_commitinfo_data.ts
Normal file
8
ts/00_commitinfo_data.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
/**
|
||||
* autocreated commitinfo by @push.rocks/commitinfo
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@modelgrid.com/modelgrid',
|
||||
version: '1.0.0',
|
||||
description: 'GPU infrastructure management daemon with OpenAI-compatible API for AI model containers'
|
||||
}
|
||||
150
ts/api/handlers/chat.ts
Normal file
150
ts/api/handlers/chat.ts
Normal file
@@ -0,0 +1,150 @@
|
||||
/**
|
||||
* Chat Completions Handler
|
||||
*
|
||||
* Handles /v1/chat/completions and /v1/completions endpoints.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import type {
|
||||
IChatCompletionRequest,
|
||||
IChatCompletionResponse,
|
||||
IApiError,
|
||||
} from '../../interfaces/api.ts';
|
||||
import { logger } from '../../logger.ts';
|
||||
import { ContainerManager } from '../../containers/container-manager.ts';
|
||||
import { ModelLoader } from '../../models/loader.ts';
|
||||
|
||||
/**
|
||||
* Handler for chat completion requests
|
||||
*/
|
||||
export class ChatHandler {
|
||||
private containerManager: ContainerManager;
|
||||
private modelLoader: ModelLoader;
|
||||
|
||||
constructor(containerManager: ContainerManager, modelLoader: ModelLoader) {
|
||||
this.containerManager = containerManager;
|
||||
this.modelLoader = modelLoader;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle POST /v1/chat/completions
|
||||
*/
|
||||
public async handleChatCompletion(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
body: IChatCompletionRequest,
|
||||
): Promise<void> {
|
||||
const modelName = body.model;
|
||||
const isStream = body.stream === true;
|
||||
|
||||
logger.dim(`Chat completion request for model: ${modelName}`);
|
||||
|
||||
try {
|
||||
// Find or load the model
|
||||
const container = await this.findOrLoadModel(modelName);
|
||||
if (!container) {
|
||||
this.sendError(res, 404, `Model "${modelName}" not found or could not be loaded`, 'model_not_found');
|
||||
return;
|
||||
}
|
||||
|
||||
// Route to streaming or non-streaming handler
|
||||
if (isStream) {
|
||||
await this.handleStreamingCompletion(res, container, body);
|
||||
} else {
|
||||
await this.handleNonStreamingCompletion(res, container, body);
|
||||
}
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error(`Chat completion error: ${message}`);
|
||||
this.sendError(res, 500, `Chat completion failed: ${message}`, 'server_error');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find container with model or attempt to load it
|
||||
*/
|
||||
private async findOrLoadModel(
|
||||
modelName: string,
|
||||
): Promise<import('../../containers/base-container.ts').BaseContainer | null> {
|
||||
// First, check if model is already loaded
|
||||
const container = await this.containerManager.findContainerForModel(modelName);
|
||||
if (container) {
|
||||
return container;
|
||||
}
|
||||
|
||||
// Try to load the model
|
||||
logger.info(`Model ${modelName} not loaded, attempting to load...`);
|
||||
const loadResult = await this.modelLoader.loadModel(modelName);
|
||||
|
||||
if (!loadResult.success) {
|
||||
logger.error(`Failed to load model: ${loadResult.error}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Find the container again after loading
|
||||
return this.containerManager.findContainerForModel(modelName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle non-streaming chat completion
|
||||
*/
|
||||
private async handleNonStreamingCompletion(
|
||||
res: http.ServerResponse,
|
||||
container: import('../../containers/base-container.ts').BaseContainer,
|
||||
body: IChatCompletionRequest,
|
||||
): Promise<void> {
|
||||
const response = await container.chatCompletion(body);
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(response));
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle streaming chat completion
|
||||
*/
|
||||
private async handleStreamingCompletion(
|
||||
res: http.ServerResponse,
|
||||
container: import('../../containers/base-container.ts').BaseContainer,
|
||||
body: IChatCompletionRequest,
|
||||
): Promise<void> {
|
||||
// Set SSE headers
|
||||
res.writeHead(200, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'X-Accel-Buffering': 'no',
|
||||
});
|
||||
|
||||
// Stream chunks to client
|
||||
await container.chatCompletionStream(body, (chunk) => {
|
||||
res.write(`data: ${chunk}\n\n`);
|
||||
});
|
||||
|
||||
// Send final done message
|
||||
res.write('data: [DONE]\n\n');
|
||||
res.end();
|
||||
}
|
||||
|
||||
/**
|
||||
* Send error response
|
||||
*/
|
||||
private sendError(
|
||||
res: http.ServerResponse,
|
||||
statusCode: number,
|
||||
message: string,
|
||||
type: string,
|
||||
param?: string,
|
||||
): void {
|
||||
const error: IApiError = {
|
||||
error: {
|
||||
message,
|
||||
type,
|
||||
param,
|
||||
code: null,
|
||||
},
|
||||
};
|
||||
|
||||
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(error));
|
||||
}
|
||||
}
|
||||
235
ts/api/handlers/embeddings.ts
Normal file
235
ts/api/handlers/embeddings.ts
Normal file
@@ -0,0 +1,235 @@
|
||||
/**
|
||||
* Embeddings Handler
|
||||
*
|
||||
* Handles /v1/embeddings endpoint.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import type {
|
||||
IEmbeddingsRequest,
|
||||
IEmbeddingsResponse,
|
||||
IEmbeddingData,
|
||||
IApiError,
|
||||
} from '../../interfaces/api.ts';
|
||||
import { logger } from '../../logger.ts';
|
||||
import { ContainerManager } from '../../containers/container-manager.ts';
|
||||
|
||||
/**
|
||||
* Handler for embeddings requests
|
||||
*/
|
||||
export class EmbeddingsHandler {
|
||||
private containerManager: ContainerManager;
|
||||
|
||||
constructor(containerManager: ContainerManager) {
|
||||
this.containerManager = containerManager;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle POST /v1/embeddings
|
||||
*/
|
||||
public async handleEmbeddings(
|
||||
res: http.ServerResponse,
|
||||
body: IEmbeddingsRequest,
|
||||
): Promise<void> {
|
||||
const modelName = body.model;
|
||||
|
||||
logger.dim(`Embeddings request for model: ${modelName}`);
|
||||
|
||||
try {
|
||||
// Find container with the embedding model
|
||||
const container = await this.containerManager.findContainerForModel(modelName);
|
||||
if (!container) {
|
||||
this.sendError(res, 404, `Embedding model "${modelName}" not found`, 'model_not_found');
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate embeddings
|
||||
const response = await this.generateEmbeddings(container, body);
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(response));
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error(`Embeddings error: ${message}`);
|
||||
this.sendError(res, 500, `Embeddings generation failed: ${message}`, 'server_error');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embeddings from container
|
||||
*/
|
||||
private async generateEmbeddings(
|
||||
container: import('../../containers/base-container.ts').BaseContainer,
|
||||
request: IEmbeddingsRequest,
|
||||
): Promise<IEmbeddingsResponse> {
|
||||
const inputs = Array.isArray(request.input) ? request.input : [request.input];
|
||||
const embeddings: IEmbeddingData[] = [];
|
||||
let totalTokens = 0;
|
||||
|
||||
// Generate embeddings for each input
|
||||
for (let i = 0; i < inputs.length; i++) {
|
||||
const input = inputs[i];
|
||||
const embedding = await this.getEmbeddingFromContainer(container, request.model, input);
|
||||
|
||||
embeddings.push({
|
||||
object: 'embedding',
|
||||
embedding: embedding.vector,
|
||||
index: i,
|
||||
});
|
||||
|
||||
totalTokens += embedding.tokenCount;
|
||||
}
|
||||
|
||||
return {
|
||||
object: 'list',
|
||||
data: embeddings,
|
||||
model: request.model,
|
||||
usage: {
|
||||
prompt_tokens: totalTokens,
|
||||
total_tokens: totalTokens,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding from container (container-specific implementation)
|
||||
*/
|
||||
private async getEmbeddingFromContainer(
|
||||
container: import('../../containers/base-container.ts').BaseContainer,
|
||||
model: string,
|
||||
input: string,
|
||||
): Promise<{ vector: number[]; tokenCount: number }> {
|
||||
const endpoint = container.getEndpoint();
|
||||
const containerType = container.type;
|
||||
|
||||
// Route to container-specific embedding endpoint
|
||||
if (containerType === 'ollama') {
|
||||
return this.getOllamaEmbedding(endpoint, model, input);
|
||||
} else if (containerType === 'vllm') {
|
||||
return this.getVllmEmbedding(endpoint, model, input);
|
||||
} else if (containerType === 'tgi') {
|
||||
return this.getTgiEmbedding(endpoint, model, input);
|
||||
}
|
||||
|
||||
throw new Error(`Container type ${containerType} does not support embeddings`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding from Ollama
|
||||
*/
|
||||
private async getOllamaEmbedding(
|
||||
endpoint: string,
|
||||
model: string,
|
||||
input: string,
|
||||
): Promise<{ vector: number[]; tokenCount: number }> {
|
||||
const response = await fetch(`${endpoint}/api/embeddings`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
prompt: input,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`Ollama embedding error: ${errorText}`);
|
||||
}
|
||||
|
||||
const result = await response.json() as { embedding: number[] };
|
||||
|
||||
// Estimate token count (rough approximation: ~4 chars per token)
|
||||
const tokenCount = Math.ceil(input.length / 4);
|
||||
|
||||
return {
|
||||
vector: result.embedding,
|
||||
tokenCount,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding from vLLM (OpenAI-compatible)
|
||||
*/
|
||||
private async getVllmEmbedding(
|
||||
endpoint: string,
|
||||
model: string,
|
||||
input: string,
|
||||
): Promise<{ vector: number[]; tokenCount: number }> {
|
||||
const response = await fetch(`${endpoint}/v1/embeddings`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
input,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`vLLM embedding error: ${errorText}`);
|
||||
}
|
||||
|
||||
const result = await response.json() as IEmbeddingsResponse;
|
||||
|
||||
return {
|
||||
vector: result.data[0].embedding,
|
||||
tokenCount: result.usage.total_tokens,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding from TGI
|
||||
*/
|
||||
private async getTgiEmbedding(
|
||||
endpoint: string,
|
||||
_model: string,
|
||||
input: string,
|
||||
): Promise<{ vector: number[]; tokenCount: number }> {
|
||||
// TGI uses /embed endpoint
|
||||
const response = await fetch(`${endpoint}/embed`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
inputs: input,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`TGI embedding error: ${errorText}`);
|
||||
}
|
||||
|
||||
const result = await response.json() as number[][];
|
||||
|
||||
// Estimate token count
|
||||
const tokenCount = Math.ceil(input.length / 4);
|
||||
|
||||
return {
|
||||
vector: result[0],
|
||||
tokenCount,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Send error response
|
||||
*/
|
||||
private sendError(
|
||||
res: http.ServerResponse,
|
||||
statusCode: number,
|
||||
message: string,
|
||||
type: string,
|
||||
param?: string,
|
||||
): void {
|
||||
const error: IApiError = {
|
||||
error: {
|
||||
message,
|
||||
type,
|
||||
param,
|
||||
code: null,
|
||||
},
|
||||
};
|
||||
|
||||
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(error));
|
||||
}
|
||||
}
|
||||
9
ts/api/handlers/index.ts
Normal file
9
ts/api/handlers/index.ts
Normal file
@@ -0,0 +1,9 @@
|
||||
/**
|
||||
* API Handlers
|
||||
*
|
||||
* OpenAI-compatible request handlers.
|
||||
*/
|
||||
|
||||
export { ChatHandler } from './chat.ts';
|
||||
export { ModelsHandler } from './models.ts';
|
||||
export { EmbeddingsHandler } from './embeddings.ts';
|
||||
136
ts/api/handlers/models.ts
Normal file
136
ts/api/handlers/models.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
/**
|
||||
* Models Handler
|
||||
*
|
||||
* Handles /v1/models endpoints.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import type {
|
||||
IModelInfo,
|
||||
IListModelsResponse,
|
||||
IApiError,
|
||||
} from '../../interfaces/api.ts';
|
||||
import { logger } from '../../logger.ts';
|
||||
import { ContainerManager } from '../../containers/container-manager.ts';
|
||||
import { ModelRegistry } from '../../models/registry.ts';
|
||||
|
||||
/**
|
||||
* Handler for model-related requests
|
||||
*/
|
||||
export class ModelsHandler {
|
||||
private containerManager: ContainerManager;
|
||||
private modelRegistry: ModelRegistry;
|
||||
|
||||
constructor(containerManager: ContainerManager, modelRegistry: ModelRegistry) {
|
||||
this.containerManager = containerManager;
|
||||
this.modelRegistry = modelRegistry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle GET /v1/models
|
||||
*/
|
||||
public async handleListModels(res: http.ServerResponse): Promise<void> {
|
||||
try {
|
||||
const models = await this.getAvailableModels();
|
||||
|
||||
const response: IListModelsResponse = {
|
||||
object: 'list',
|
||||
data: models,
|
||||
};
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(response));
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error(`Failed to list models: ${message}`);
|
||||
this.sendError(res, 500, `Failed to list models: ${message}`, 'server_error');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle GET /v1/models/:model
|
||||
*/
|
||||
public async handleGetModel(res: http.ServerResponse, modelId: string): Promise<void> {
|
||||
try {
|
||||
const models = await this.getAvailableModels();
|
||||
const model = models.find((m) => m.id === modelId);
|
||||
|
||||
if (!model) {
|
||||
this.sendError(res, 404, `Model "${modelId}" not found`, 'model_not_found');
|
||||
return;
|
||||
}
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(model));
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error(`Failed to get model info: ${message}`);
|
||||
this.sendError(res, 500, `Failed to get model info: ${message}`, 'server_error');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all available models from containers and greenlist
|
||||
*/
|
||||
private async getAvailableModels(): Promise<IModelInfo[]> {
|
||||
const models: IModelInfo[] = [];
|
||||
const seen = new Set<string>();
|
||||
const timestamp = Math.floor(Date.now() / 1000);
|
||||
|
||||
// Get models from running containers
|
||||
const containerModels = await this.containerManager.getAllAvailableModels();
|
||||
for (const [modelId, modelInfo] of containerModels) {
|
||||
if (!seen.has(modelId)) {
|
||||
seen.add(modelId);
|
||||
models.push({
|
||||
id: modelId,
|
||||
object: 'model',
|
||||
created: timestamp,
|
||||
owned_by: `modelgrid-${modelInfo.container}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Add greenlit models that aren't loaded yet
|
||||
const greenlitModels = await this.modelRegistry.getAllGreenlitModels();
|
||||
for (const greenlit of greenlitModels) {
|
||||
if (!seen.has(greenlit.name)) {
|
||||
seen.add(greenlit.name);
|
||||
models.push({
|
||||
id: greenlit.name,
|
||||
object: 'model',
|
||||
created: timestamp,
|
||||
owned_by: `modelgrid-${greenlit.container}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Sort alphabetically
|
||||
models.sort((a, b) => a.id.localeCompare(b.id));
|
||||
|
||||
return models;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send error response
|
||||
*/
|
||||
private sendError(
|
||||
res: http.ServerResponse,
|
||||
statusCode: number,
|
||||
message: string,
|
||||
type: string,
|
||||
param?: string,
|
||||
): void {
|
||||
const error: IApiError = {
|
||||
error: {
|
||||
message,
|
||||
type,
|
||||
param,
|
||||
code: null,
|
||||
},
|
||||
};
|
||||
|
||||
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(error));
|
||||
}
|
||||
}
|
||||
10
ts/api/index.ts
Normal file
10
ts/api/index.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
/**
|
||||
* API Gateway Module
|
||||
*
|
||||
* OpenAI-compatible API server for ModelGrid.
|
||||
*/
|
||||
|
||||
export { ApiServer } from './server.ts';
|
||||
export { ApiRouter } from './router.ts';
|
||||
export * from './handlers/index.ts';
|
||||
export * from './middleware/index.ts';
|
||||
105
ts/api/middleware/auth.ts
Normal file
105
ts/api/middleware/auth.ts
Normal file
@@ -0,0 +1,105 @@
|
||||
/**
|
||||
* Authentication Middleware
|
||||
*
|
||||
* Validates API keys for incoming requests.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import { logger } from '../../logger.ts';
|
||||
|
||||
/**
|
||||
* Authentication middleware for API key validation
|
||||
*/
|
||||
export class AuthMiddleware {
|
||||
private apiKeys: Set<string>;
|
||||
private allowNoAuth: boolean;
|
||||
|
||||
constructor(apiKeys: string[], allowNoAuth: boolean = false) {
|
||||
this.apiKeys = new Set(apiKeys);
|
||||
this.allowNoAuth = allowNoAuth;
|
||||
|
||||
if (this.apiKeys.size === 0 && !allowNoAuth) {
|
||||
logger.warn('No API keys configured - authentication will fail for all requests');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Authenticate a request
|
||||
*/
|
||||
public authenticate(req: http.IncomingMessage): boolean {
|
||||
// If no keys configured and allowNoAuth is true, allow all requests
|
||||
if (this.apiKeys.size === 0 && this.allowNoAuth) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const authHeader = req.headers.authorization;
|
||||
|
||||
if (!authHeader) {
|
||||
logger.dim('Request rejected: No Authorization header');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Extract Bearer token
|
||||
const match = authHeader.match(/^Bearer\s+(.+)$/i);
|
||||
if (!match) {
|
||||
logger.dim('Request rejected: Invalid Authorization header format');
|
||||
return false;
|
||||
}
|
||||
|
||||
const apiKey = match[1];
|
||||
|
||||
// Check if key is valid
|
||||
if (!this.apiKeys.has(apiKey)) {
|
||||
logger.dim('Request rejected: Invalid API key');
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get API key from request (if authenticated)
|
||||
*/
|
||||
public getApiKey(req: http.IncomingMessage): string | null {
|
||||
const authHeader = req.headers.authorization;
|
||||
if (!authHeader) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const match = authHeader.match(/^Bearer\s+(.+)$/i);
|
||||
return match ? match[1] : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an API key
|
||||
*/
|
||||
public addApiKey(key: string): void {
|
||||
this.apiKeys.add(key);
|
||||
logger.info('API key added');
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove an API key
|
||||
*/
|
||||
public removeApiKey(key: string): boolean {
|
||||
const removed = this.apiKeys.delete(key);
|
||||
if (removed) {
|
||||
logger.info('API key removed');
|
||||
}
|
||||
return removed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get count of configured API keys
|
||||
*/
|
||||
public getKeyCount(): number {
|
||||
return this.apiKeys.size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if authentication is required
|
||||
*/
|
||||
public isAuthRequired(): boolean {
|
||||
return !this.allowNoAuth || this.apiKeys.size > 0;
|
||||
}
|
||||
}
|
||||
7
ts/api/middleware/index.ts
Normal file
7
ts/api/middleware/index.ts
Normal file
@@ -0,0 +1,7 @@
|
||||
/**
|
||||
* API Middleware
|
||||
*/
|
||||
|
||||
export { AuthMiddleware } from './auth.ts';
|
||||
export { SanityMiddleware } from './sanity.ts';
|
||||
export type { IValidationResult } from './sanity.ts';
|
||||
254
ts/api/middleware/sanity.ts
Normal file
254
ts/api/middleware/sanity.ts
Normal file
@@ -0,0 +1,254 @@
|
||||
/**
|
||||
* Sanity Middleware
|
||||
*
|
||||
* Validates request structure and parameters.
|
||||
*/
|
||||
|
||||
import type { IChatCompletionRequest, IEmbeddingsRequest } from '../../interfaces/api.ts';
|
||||
import { ModelRegistry } from '../../models/registry.ts';
|
||||
|
||||
/**
|
||||
* Validation result
|
||||
*/
|
||||
export interface IValidationResult {
|
||||
valid: boolean;
|
||||
error?: string;
|
||||
param?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Request validation middleware
|
||||
*/
|
||||
export class SanityMiddleware {
|
||||
private modelRegistry: ModelRegistry;
|
||||
|
||||
constructor(modelRegistry: ModelRegistry) {
|
||||
this.modelRegistry = modelRegistry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate chat completion request
|
||||
*/
|
||||
public validateChatRequest(body: unknown): IValidationResult {
|
||||
if (!body || typeof body !== 'object') {
|
||||
return { valid: false, error: 'Request body must be a JSON object' };
|
||||
}
|
||||
|
||||
const request = body as Record<string, unknown>;
|
||||
|
||||
// Validate model
|
||||
if (!request.model || typeof request.model !== 'string') {
|
||||
return { valid: false, error: 'Missing or invalid "model" field', param: 'model' };
|
||||
}
|
||||
|
||||
// Validate messages
|
||||
if (!Array.isArray(request.messages)) {
|
||||
return { valid: false, error: 'Missing or invalid "messages" field', param: 'messages' };
|
||||
}
|
||||
|
||||
if (request.messages.length === 0) {
|
||||
return { valid: false, error: '"messages" array cannot be empty', param: 'messages' };
|
||||
}
|
||||
|
||||
// Validate each message
|
||||
for (let i = 0; i < request.messages.length; i++) {
|
||||
const msg = request.messages[i] as Record<string, unknown>;
|
||||
const msgValidation = this.validateMessage(msg, i);
|
||||
if (!msgValidation.valid) {
|
||||
return msgValidation;
|
||||
}
|
||||
}
|
||||
|
||||
// Validate optional parameters
|
||||
if (request.temperature !== undefined) {
|
||||
const temp = request.temperature as number;
|
||||
if (typeof temp !== 'number' || temp < 0 || temp > 2) {
|
||||
return { valid: false, error: '"temperature" must be between 0 and 2', param: 'temperature' };
|
||||
}
|
||||
}
|
||||
|
||||
if (request.top_p !== undefined) {
|
||||
const topP = request.top_p as number;
|
||||
if (typeof topP !== 'number' || topP < 0 || topP > 1) {
|
||||
return { valid: false, error: '"top_p" must be between 0 and 1', param: 'top_p' };
|
||||
}
|
||||
}
|
||||
|
||||
if (request.max_tokens !== undefined) {
|
||||
const maxTokens = request.max_tokens as number;
|
||||
if (typeof maxTokens !== 'number' || maxTokens < 1) {
|
||||
return { valid: false, error: '"max_tokens" must be a positive integer', param: 'max_tokens' };
|
||||
}
|
||||
}
|
||||
|
||||
if (request.n !== undefined) {
|
||||
const n = request.n as number;
|
||||
if (typeof n !== 'number' || n < 1 || n > 10) {
|
||||
return { valid: false, error: '"n" must be between 1 and 10', param: 'n' };
|
||||
}
|
||||
}
|
||||
|
||||
if (request.stream !== undefined && typeof request.stream !== 'boolean') {
|
||||
return { valid: false, error: '"stream" must be a boolean', param: 'stream' };
|
||||
}
|
||||
|
||||
if (request.presence_penalty !== undefined) {
|
||||
const pp = request.presence_penalty as number;
|
||||
if (typeof pp !== 'number' || pp < -2 || pp > 2) {
|
||||
return { valid: false, error: '"presence_penalty" must be between -2 and 2', param: 'presence_penalty' };
|
||||
}
|
||||
}
|
||||
|
||||
if (request.frequency_penalty !== undefined) {
|
||||
const fp = request.frequency_penalty as number;
|
||||
if (typeof fp !== 'number' || fp < -2 || fp > 2) {
|
||||
return { valid: false, error: '"frequency_penalty" must be between -2 and 2', param: 'frequency_penalty' };
|
||||
}
|
||||
}
|
||||
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate a single message in the chat request
|
||||
*/
|
||||
private validateMessage(msg: Record<string, unknown>, index: number): IValidationResult {
|
||||
if (!msg || typeof msg !== 'object') {
|
||||
return { valid: false, error: `Message at index ${index} must be an object`, param: `messages[${index}]` };
|
||||
}
|
||||
|
||||
// Validate role
|
||||
const validRoles = ['system', 'user', 'assistant', 'tool'];
|
||||
if (!msg.role || !validRoles.includes(msg.role as string)) {
|
||||
return {
|
||||
valid: false,
|
||||
error: `Invalid role at index ${index}. Must be one of: ${validRoles.join(', ')}`,
|
||||
param: `messages[${index}].role`,
|
||||
};
|
||||
}
|
||||
|
||||
// Validate content (can be null for assistant with tool_calls)
|
||||
if (msg.role === 'assistant' && msg.tool_calls) {
|
||||
// Content can be null/undefined when tool_calls present
|
||||
} else if (msg.content === undefined || msg.content === null) {
|
||||
return {
|
||||
valid: false,
|
||||
error: `Missing content at index ${index}`,
|
||||
param: `messages[${index}].content`,
|
||||
};
|
||||
} else if (typeof msg.content !== 'string') {
|
||||
return {
|
||||
valid: false,
|
||||
error: `Content at index ${index} must be a string`,
|
||||
param: `messages[${index}].content`,
|
||||
};
|
||||
}
|
||||
|
||||
// Validate tool response message
|
||||
if (msg.role === 'tool' && !msg.tool_call_id) {
|
||||
return {
|
||||
valid: false,
|
||||
error: `Tool message at index ${index} requires tool_call_id`,
|
||||
param: `messages[${index}].tool_call_id`,
|
||||
};
|
||||
}
|
||||
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate embeddings request
|
||||
*/
|
||||
public validateEmbeddingsRequest(body: unknown): IValidationResult {
|
||||
if (!body || typeof body !== 'object') {
|
||||
return { valid: false, error: 'Request body must be a JSON object' };
|
||||
}
|
||||
|
||||
const request = body as Record<string, unknown>;
|
||||
|
||||
// Validate model
|
||||
if (!request.model || typeof request.model !== 'string') {
|
||||
return { valid: false, error: 'Missing or invalid "model" field', param: 'model' };
|
||||
}
|
||||
|
||||
// Validate input
|
||||
if (request.input === undefined || request.input === null) {
|
||||
return { valid: false, error: 'Missing "input" field', param: 'input' };
|
||||
}
|
||||
|
||||
const input = request.input;
|
||||
if (typeof input !== 'string' && !Array.isArray(input)) {
|
||||
return { valid: false, error: '"input" must be a string or array of strings', param: 'input' };
|
||||
}
|
||||
|
||||
if (Array.isArray(input)) {
|
||||
for (let i = 0; i < input.length; i++) {
|
||||
if (typeof input[i] !== 'string') {
|
||||
return { valid: false, error: `"input[${i}]" must be a string`, param: `input[${i}]` };
|
||||
}
|
||||
}
|
||||
|
||||
if (input.length === 0) {
|
||||
return { valid: false, error: '"input" array cannot be empty', param: 'input' };
|
||||
}
|
||||
}
|
||||
|
||||
// Validate encoding_format
|
||||
if (request.encoding_format !== undefined) {
|
||||
const format = request.encoding_format as string;
|
||||
if (format !== 'float' && format !== 'base64') {
|
||||
return { valid: false, error: '"encoding_format" must be "float" or "base64"', param: 'encoding_format' };
|
||||
}
|
||||
}
|
||||
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if model is in greenlist (async validation)
|
||||
*/
|
||||
public async validateModelGreenlist(modelName: string): Promise<IValidationResult> {
|
||||
const isGreenlit = await this.modelRegistry.isModelGreenlit(modelName);
|
||||
if (!isGreenlit) {
|
||||
return {
|
||||
valid: false,
|
||||
error: `Model "${modelName}" is not greenlit. Contact administrator to add it to the greenlist.`,
|
||||
param: 'model',
|
||||
};
|
||||
}
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize request body by removing unknown fields
|
||||
*/
|
||||
public sanitizeChatRequest(body: Record<string, unknown>): IChatCompletionRequest {
|
||||
return {
|
||||
model: body.model as string,
|
||||
messages: body.messages as IChatCompletionRequest['messages'],
|
||||
max_tokens: body.max_tokens as number | undefined,
|
||||
temperature: body.temperature as number | undefined,
|
||||
top_p: body.top_p as number | undefined,
|
||||
n: body.n as number | undefined,
|
||||
stream: body.stream as boolean | undefined,
|
||||
stop: body.stop as string | string[] | undefined,
|
||||
presence_penalty: body.presence_penalty as number | undefined,
|
||||
frequency_penalty: body.frequency_penalty as number | undefined,
|
||||
user: body.user as string | undefined,
|
||||
tools: body.tools as IChatCompletionRequest['tools'],
|
||||
tool_choice: body.tool_choice as IChatCompletionRequest['tool_choice'],
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize embeddings request
|
||||
*/
|
||||
public sanitizeEmbeddingsRequest(body: Record<string, unknown>): IEmbeddingsRequest {
|
||||
return {
|
||||
model: body.model as string,
|
||||
input: body.input as string | string[],
|
||||
user: body.user as string | undefined,
|
||||
encoding_format: body.encoding_format as 'float' | 'base64' | undefined,
|
||||
};
|
||||
}
|
||||
}
|
||||
300
ts/api/router.ts
Normal file
300
ts/api/router.ts
Normal file
@@ -0,0 +1,300 @@
|
||||
/**
|
||||
* API Router
|
||||
*
|
||||
* Routes incoming requests to appropriate handlers.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import type { IApiError } from '../interfaces/api.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { ContainerManager } from '../containers/container-manager.ts';
|
||||
import { ModelRegistry } from '../models/registry.ts';
|
||||
import { ModelLoader } from '../models/loader.ts';
|
||||
import { ChatHandler } from './handlers/chat.ts';
|
||||
import { ModelsHandler } from './handlers/models.ts';
|
||||
import { EmbeddingsHandler } from './handlers/embeddings.ts';
|
||||
import { AuthMiddleware } from './middleware/auth.ts';
|
||||
import { SanityMiddleware } from './middleware/sanity.ts';
|
||||
|
||||
/**
|
||||
* API Router - routes requests to handlers
|
||||
*/
|
||||
export class ApiRouter {
|
||||
private containerManager: ContainerManager;
|
||||
private modelRegistry: ModelRegistry;
|
||||
private modelLoader: ModelLoader;
|
||||
private chatHandler: ChatHandler;
|
||||
private modelsHandler: ModelsHandler;
|
||||
private embeddingsHandler: EmbeddingsHandler;
|
||||
private authMiddleware: AuthMiddleware;
|
||||
private sanityMiddleware: SanityMiddleware;
|
||||
|
||||
constructor(
|
||||
containerManager: ContainerManager,
|
||||
modelRegistry: ModelRegistry,
|
||||
modelLoader: ModelLoader,
|
||||
apiKeys: string[],
|
||||
) {
|
||||
this.containerManager = containerManager;
|
||||
this.modelRegistry = modelRegistry;
|
||||
this.modelLoader = modelLoader;
|
||||
|
||||
// Initialize handlers
|
||||
this.chatHandler = new ChatHandler(containerManager, modelLoader);
|
||||
this.modelsHandler = new ModelsHandler(containerManager, modelRegistry);
|
||||
this.embeddingsHandler = new EmbeddingsHandler(containerManager);
|
||||
|
||||
// Initialize middleware
|
||||
this.authMiddleware = new AuthMiddleware(apiKeys);
|
||||
this.sanityMiddleware = new SanityMiddleware(modelRegistry);
|
||||
}
|
||||
|
||||
/**
|
||||
* Route a request to the appropriate handler
|
||||
*/
|
||||
public async route(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
path: string,
|
||||
): Promise<void> {
|
||||
// OpenAI API endpoints
|
||||
if (path === '/v1/chat/completions') {
|
||||
await this.handleChatCompletions(req, res);
|
||||
return;
|
||||
}
|
||||
|
||||
if (path === '/v1/completions') {
|
||||
await this.handleCompletions(req, res);
|
||||
return;
|
||||
}
|
||||
|
||||
if (path === '/v1/models' || path === '/v1/models/') {
|
||||
await this.handleModels(req, res);
|
||||
return;
|
||||
}
|
||||
|
||||
if (path.startsWith('/v1/models/')) {
|
||||
await this.handleModelInfo(req, res, path);
|
||||
return;
|
||||
}
|
||||
|
||||
if (path === '/v1/embeddings') {
|
||||
await this.handleEmbeddings(req, res);
|
||||
return;
|
||||
}
|
||||
|
||||
// Not found
|
||||
this.sendError(res, 404, `Endpoint not found: ${path}`, 'invalid_request_error');
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle POST /v1/chat/completions
|
||||
*/
|
||||
private async handleChatCompletions(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
): Promise<void> {
|
||||
if (req.method !== 'POST') {
|
||||
this.sendError(res, 405, 'Method not allowed', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Authenticate
|
||||
if (!this.authMiddleware.authenticate(req)) {
|
||||
this.sendError(res, 401, 'Invalid API key', 'authentication_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse body
|
||||
const body = await this.parseRequestBody(req);
|
||||
if (!body) {
|
||||
this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Validate request
|
||||
const validation = this.sanityMiddleware.validateChatRequest(body);
|
||||
if (!validation.valid) {
|
||||
this.sendError(res, 400, validation.error || 'Invalid request', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle request
|
||||
await this.chatHandler.handleChatCompletion(req, res, body);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle POST /v1/completions (legacy endpoint)
|
||||
*/
|
||||
private async handleCompletions(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
): Promise<void> {
|
||||
if (req.method !== 'POST') {
|
||||
this.sendError(res, 405, 'Method not allowed', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Authenticate
|
||||
if (!this.authMiddleware.authenticate(req)) {
|
||||
this.sendError(res, 401, 'Invalid API key', 'authentication_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse body
|
||||
const body = await this.parseRequestBody(req);
|
||||
if (!body) {
|
||||
this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Convert to chat format and handle
|
||||
const chatBody = this.convertCompletionToChat(body);
|
||||
await this.chatHandler.handleChatCompletion(req, res, chatBody);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle GET /v1/models
|
||||
*/
|
||||
private async handleModels(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
): Promise<void> {
|
||||
if (req.method !== 'GET') {
|
||||
this.sendError(res, 405, 'Method not allowed', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Authenticate
|
||||
if (!this.authMiddleware.authenticate(req)) {
|
||||
this.sendError(res, 401, 'Invalid API key', 'authentication_error');
|
||||
return;
|
||||
}
|
||||
|
||||
await this.modelsHandler.handleListModels(res);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle GET /v1/models/:model
|
||||
*/
|
||||
private async handleModelInfo(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
path: string,
|
||||
): Promise<void> {
|
||||
if (req.method !== 'GET') {
|
||||
this.sendError(res, 405, 'Method not allowed', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Authenticate
|
||||
if (!this.authMiddleware.authenticate(req)) {
|
||||
this.sendError(res, 401, 'Invalid API key', 'authentication_error');
|
||||
return;
|
||||
}
|
||||
|
||||
const modelId = path.replace('/v1/models/', '');
|
||||
await this.modelsHandler.handleGetModel(res, modelId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle POST /v1/embeddings
|
||||
*/
|
||||
private async handleEmbeddings(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
): Promise<void> {
|
||||
if (req.method !== 'POST') {
|
||||
this.sendError(res, 405, 'Method not allowed', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Authenticate
|
||||
if (!this.authMiddleware.authenticate(req)) {
|
||||
this.sendError(res, 401, 'Invalid API key', 'authentication_error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse body
|
||||
const body = await this.parseRequestBody(req);
|
||||
if (!body) {
|
||||
this.sendError(res, 400, 'Invalid JSON body', 'invalid_request_error');
|
||||
return;
|
||||
}
|
||||
|
||||
await this.embeddingsHandler.handleEmbeddings(res, body);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse request body
|
||||
*/
|
||||
private async parseRequestBody(req: http.IncomingMessage): Promise<unknown | null> {
|
||||
return new Promise((resolve) => {
|
||||
let body = '';
|
||||
|
||||
req.on('data', (chunk) => {
|
||||
body += chunk.toString();
|
||||
// Limit body size
|
||||
if (body.length > 10 * 1024 * 1024) {
|
||||
resolve(null);
|
||||
}
|
||||
});
|
||||
|
||||
req.on('end', () => {
|
||||
try {
|
||||
resolve(JSON.parse(body));
|
||||
} catch {
|
||||
resolve(null);
|
||||
}
|
||||
});
|
||||
|
||||
req.on('error', () => {
|
||||
resolve(null);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert legacy completion request to chat format
|
||||
*/
|
||||
private convertCompletionToChat(body: Record<string, unknown>): Record<string, unknown> {
|
||||
const prompt = body.prompt as string | string[];
|
||||
const promptText = Array.isArray(prompt) ? prompt.join('\n') : prompt;
|
||||
|
||||
return {
|
||||
model: body.model,
|
||||
messages: [
|
||||
{ role: 'user', content: promptText },
|
||||
],
|
||||
max_tokens: body.max_tokens,
|
||||
temperature: body.temperature,
|
||||
top_p: body.top_p,
|
||||
n: body.n,
|
||||
stream: body.stream,
|
||||
stop: body.stop,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Send error response
|
||||
*/
|
||||
private sendError(
|
||||
res: http.ServerResponse,
|
||||
statusCode: number,
|
||||
message: string,
|
||||
type: string,
|
||||
param?: string,
|
||||
): void {
|
||||
const error: IApiError = {
|
||||
error: {
|
||||
message,
|
||||
type,
|
||||
param,
|
||||
code: null,
|
||||
},
|
||||
};
|
||||
|
||||
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(error));
|
||||
}
|
||||
}
|
||||
300
ts/api/server.ts
Normal file
300
ts/api/server.ts
Normal file
@@ -0,0 +1,300 @@
|
||||
/**
|
||||
* API Server
|
||||
*
|
||||
* HTTP server for the OpenAI-compatible API gateway.
|
||||
*/
|
||||
|
||||
import * as http from 'node:http';
|
||||
import type { IApiConfig } from '../interfaces/config.ts';
|
||||
import type { IHealthResponse } from '../interfaces/api.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { API_SERVER } from '../constants.ts';
|
||||
import { ApiRouter } from './router.ts';
|
||||
import { ContainerManager } from '../containers/container-manager.ts';
|
||||
import { ModelRegistry } from '../models/registry.ts';
|
||||
import { ModelLoader } from '../models/loader.ts';
|
||||
import { GpuDetector } from '../hardware/gpu-detector.ts';
|
||||
|
||||
/**
|
||||
* API Server for ModelGrid
|
||||
*/
|
||||
export class ApiServer {
|
||||
private server?: http.Server;
|
||||
private config: IApiConfig;
|
||||
private router: ApiRouter;
|
||||
private containerManager: ContainerManager;
|
||||
private modelRegistry: ModelRegistry;
|
||||
private modelLoader: ModelLoader;
|
||||
private gpuDetector: GpuDetector;
|
||||
private startTime: number = 0;
|
||||
|
||||
constructor(
|
||||
config: IApiConfig,
|
||||
containerManager: ContainerManager,
|
||||
modelRegistry: ModelRegistry,
|
||||
) {
|
||||
this.config = config;
|
||||
this.containerManager = containerManager;
|
||||
this.modelRegistry = modelRegistry;
|
||||
this.gpuDetector = new GpuDetector();
|
||||
this.modelLoader = new ModelLoader(modelRegistry, containerManager, true);
|
||||
this.router = new ApiRouter(
|
||||
containerManager,
|
||||
modelRegistry,
|
||||
this.modelLoader,
|
||||
config.apiKeys,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the API server
|
||||
*/
|
||||
public async start(): Promise<void> {
|
||||
if (this.server) {
|
||||
logger.warn('API server is already running');
|
||||
return;
|
||||
}
|
||||
|
||||
this.startTime = Date.now();
|
||||
|
||||
this.server = http.createServer(async (req, res) => {
|
||||
await this.handleRequest(req, res);
|
||||
});
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
this.server!.listen(this.config.port, this.config.host, () => {
|
||||
logger.success(`API server started on ${this.config.host}:${this.config.port}`);
|
||||
logger.info('OpenAI-compatible API available at:');
|
||||
logger.info(` POST /v1/chat/completions`);
|
||||
logger.info(` GET /v1/models`);
|
||||
logger.info(` POST /v1/embeddings`);
|
||||
resolve();
|
||||
});
|
||||
|
||||
this.server!.on('error', (error) => {
|
||||
logger.error(`API server error: ${error.message}`);
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the API server
|
||||
*/
|
||||
public async stop(): Promise<void> {
|
||||
if (!this.server) {
|
||||
return;
|
||||
}
|
||||
|
||||
return new Promise((resolve) => {
|
||||
this.server!.close(() => {
|
||||
logger.log('API server stopped');
|
||||
this.server = undefined;
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle incoming HTTP request
|
||||
*/
|
||||
private async handleRequest(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
): Promise<void> {
|
||||
const startTime = Date.now();
|
||||
|
||||
// Set CORS headers if enabled
|
||||
if (this.config.cors) {
|
||||
this.setCorsHeaders(req, res);
|
||||
}
|
||||
|
||||
// Handle preflight requests
|
||||
if (req.method === 'OPTIONS') {
|
||||
res.writeHead(204);
|
||||
res.end();
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse URL
|
||||
const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`);
|
||||
const path = url.pathname;
|
||||
|
||||
// Health check endpoint (no auth required)
|
||||
if (path === '/health' || path === '/healthz') {
|
||||
await this.handleHealthCheck(res);
|
||||
return;
|
||||
}
|
||||
|
||||
// Metrics endpoint (no auth required)
|
||||
if (path === '/metrics') {
|
||||
await this.handleMetrics(res);
|
||||
return;
|
||||
}
|
||||
|
||||
// Route request
|
||||
try {
|
||||
await this.router.route(req, res, path);
|
||||
} catch (error) {
|
||||
logger.error(`Request error: ${error instanceof Error ? error.message : String(error)}`);
|
||||
this.sendError(res, 500, 'Internal server error', 'internal_error');
|
||||
}
|
||||
|
||||
// Log request
|
||||
const duration = Date.now() - startTime;
|
||||
logger.dim(`${req.method} ${path} - ${res.statusCode} (${duration}ms)`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set CORS headers
|
||||
*/
|
||||
private setCorsHeaders(
|
||||
req: http.IncomingMessage,
|
||||
res: http.ServerResponse,
|
||||
): void {
|
||||
const origin = req.headers.origin || '*';
|
||||
const allowedOrigins = this.config.corsOrigins || ['*'];
|
||||
|
||||
if (allowedOrigins.includes('*') || allowedOrigins.includes(origin)) {
|
||||
res.setHeader('Access-Control-Allow-Origin', origin);
|
||||
}
|
||||
|
||||
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
|
||||
res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
|
||||
res.setHeader('Access-Control-Max-Age', '86400');
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle health check
|
||||
*/
|
||||
private async handleHealthCheck(res: http.ServerResponse): Promise<void> {
|
||||
try {
|
||||
const statuses = await this.containerManager.getAllStatus();
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
const models = await this.containerManager.getAllAvailableModels();
|
||||
|
||||
let status: 'ok' | 'degraded' | 'error' = 'ok';
|
||||
const containerHealth: Record<string, 'healthy' | 'unhealthy'> = {};
|
||||
const gpuStatus: Record<string, 'available' | 'in_use' | 'error'> = {};
|
||||
|
||||
// Check container health
|
||||
for (const [id, containerStatus] of statuses) {
|
||||
if (containerStatus.running && containerStatus.health === 'healthy') {
|
||||
containerHealth[id] = 'healthy';
|
||||
} else {
|
||||
containerHealth[id] = 'unhealthy';
|
||||
status = 'degraded';
|
||||
}
|
||||
}
|
||||
|
||||
// Check GPU status
|
||||
for (const gpu of gpus) {
|
||||
gpuStatus[gpu.id] = 'available';
|
||||
}
|
||||
|
||||
const response: IHealthResponse = {
|
||||
status,
|
||||
version: '1.0.0', // TODO: Get from config
|
||||
uptime: Math.floor((Date.now() - this.startTime) / 1000),
|
||||
containers: statuses.size,
|
||||
models: models.size,
|
||||
gpus: gpus.length,
|
||||
details: {
|
||||
containers: containerHealth,
|
||||
gpus: gpuStatus,
|
||||
},
|
||||
};
|
||||
|
||||
res.writeHead(status === 'ok' ? 200 : 503, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(response, null, 2));
|
||||
} catch (error) {
|
||||
res.writeHead(500, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({
|
||||
status: 'error',
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle metrics endpoint (Prometheus format)
|
||||
*/
|
||||
private async handleMetrics(res: http.ServerResponse): Promise<void> {
|
||||
try {
|
||||
const metrics: string[] = [];
|
||||
const timestamp = Date.now();
|
||||
|
||||
// Server uptime
|
||||
const uptime = Math.floor((timestamp - this.startTime) / 1000);
|
||||
metrics.push(`# HELP modelgrid_uptime_seconds Server uptime in seconds`);
|
||||
metrics.push(`# TYPE modelgrid_uptime_seconds gauge`);
|
||||
metrics.push(`modelgrid_uptime_seconds ${uptime}`);
|
||||
|
||||
// Container count
|
||||
const statuses = await this.containerManager.getAllStatus();
|
||||
metrics.push(`# HELP modelgrid_containers_total Total number of containers`);
|
||||
metrics.push(`# TYPE modelgrid_containers_total gauge`);
|
||||
metrics.push(`modelgrid_containers_total ${statuses.size}`);
|
||||
|
||||
// Running containers
|
||||
const running = Array.from(statuses.values()).filter((s) => s.running).length;
|
||||
metrics.push(`# HELP modelgrid_containers_running Number of running containers`);
|
||||
metrics.push(`# TYPE modelgrid_containers_running gauge`);
|
||||
metrics.push(`modelgrid_containers_running ${running}`);
|
||||
|
||||
// Available models
|
||||
const models = await this.containerManager.getAllAvailableModels();
|
||||
metrics.push(`# HELP modelgrid_models_available Number of available models`);
|
||||
metrics.push(`# TYPE modelgrid_models_available gauge`);
|
||||
metrics.push(`modelgrid_models_available ${models.size}`);
|
||||
|
||||
// GPU count
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
metrics.push(`# HELP modelgrid_gpus_total Total number of GPUs`);
|
||||
metrics.push(`# TYPE modelgrid_gpus_total gauge`);
|
||||
metrics.push(`modelgrid_gpus_total ${gpus.length}`);
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'text/plain; charset=utf-8' });
|
||||
res.end(metrics.join('\n') + '\n');
|
||||
} catch (error) {
|
||||
res.writeHead(500, { 'Content-Type': 'text/plain' });
|
||||
res.end(`# Error: ${error instanceof Error ? error.message : String(error)}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Send error response
|
||||
*/
|
||||
private sendError(
|
||||
res: http.ServerResponse,
|
||||
statusCode: number,
|
||||
message: string,
|
||||
type: string,
|
||||
): void {
|
||||
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({
|
||||
error: {
|
||||
message,
|
||||
type,
|
||||
code: null,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get server info
|
||||
*/
|
||||
public getInfo(): {
|
||||
running: boolean;
|
||||
host: string;
|
||||
port: number;
|
||||
uptime: number;
|
||||
} {
|
||||
return {
|
||||
running: !!this.server,
|
||||
host: this.config.host,
|
||||
port: this.config.port,
|
||||
uptime: this.startTime ? Math.floor((Date.now() - this.startTime) / 1000) : 0,
|
||||
};
|
||||
}
|
||||
}
|
||||
423
ts/cli.ts
Normal file
423
ts/cli.ts
Normal file
@@ -0,0 +1,423 @@
|
||||
/**
|
||||
* ModelGrid CLI
|
||||
*
|
||||
* Command line interface for ModelGrid.
|
||||
*/
|
||||
|
||||
import { ModelGrid } from './modelgrid.ts';
|
||||
import { logger } from './logger.ts';
|
||||
import { theme } from './colors.ts';
|
||||
import { VERSION } from './constants.ts';
|
||||
|
||||
/**
|
||||
* CLI handler for ModelGrid
|
||||
*/
|
||||
export class ModelGridCli {
|
||||
private readonly modelgrid: ModelGrid;
|
||||
|
||||
constructor() {
|
||||
this.modelgrid = new ModelGrid();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse command line arguments and execute the appropriate command
|
||||
*/
|
||||
public async parseAndExecute(args: string[]): Promise<void> {
|
||||
const debugOptions = this.extractDebugOptions(args);
|
||||
|
||||
// Check for version flag
|
||||
if (debugOptions.cleanedArgs.includes('--version') || debugOptions.cleanedArgs.includes('-v')) {
|
||||
this.showVersion();
|
||||
return;
|
||||
}
|
||||
|
||||
// Get the command (default to help if none provided)
|
||||
const command = debugOptions.cleanedArgs[2] || 'help';
|
||||
const commandArgs = debugOptions.cleanedArgs.slice(3);
|
||||
|
||||
await this.executeCommand(command, commandArgs, debugOptions.debugMode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract debug options from args
|
||||
*/
|
||||
private extractDebugOptions(args: string[]): { debugMode: boolean; cleanedArgs: string[] } {
|
||||
const debugMode = args.includes('--debug') || args.includes('-d');
|
||||
const cleanedArgs = args.filter((arg) => arg !== '--debug' && arg !== '-d');
|
||||
return { debugMode, cleanedArgs };
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a command
|
||||
*/
|
||||
private async executeCommand(
|
||||
command: string,
|
||||
commandArgs: string[],
|
||||
debugMode: boolean,
|
||||
): Promise<void> {
|
||||
const serviceHandler = this.modelgrid.getServiceHandler();
|
||||
const gpuHandler = this.modelgrid.getGpuHandler();
|
||||
const containerHandler = this.modelgrid.getContainerHandler();
|
||||
const modelHandler = this.modelgrid.getModelHandler();
|
||||
const configHandler = this.modelgrid.getConfigHandler();
|
||||
|
||||
// Service commands
|
||||
if (command === 'service') {
|
||||
const subcommand = commandArgs[0] || 'status';
|
||||
|
||||
switch (subcommand) {
|
||||
case 'enable':
|
||||
await serviceHandler.enable();
|
||||
break;
|
||||
case 'disable':
|
||||
await serviceHandler.disable();
|
||||
break;
|
||||
case 'start':
|
||||
await serviceHandler.start();
|
||||
break;
|
||||
case 'stop':
|
||||
await serviceHandler.stop();
|
||||
break;
|
||||
case 'restart':
|
||||
await serviceHandler.stop();
|
||||
await new Promise((resolve) => setTimeout(resolve, 2000));
|
||||
await serviceHandler.start();
|
||||
break;
|
||||
case 'status':
|
||||
await serviceHandler.status();
|
||||
break;
|
||||
case 'logs':
|
||||
await serviceHandler.logs();
|
||||
break;
|
||||
case 'start-daemon':
|
||||
await serviceHandler.daemonStart(debugMode);
|
||||
break;
|
||||
default:
|
||||
this.showServiceHelp();
|
||||
break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// GPU commands
|
||||
if (command === 'gpu') {
|
||||
const subcommand = commandArgs[0] || 'list';
|
||||
|
||||
switch (subcommand) {
|
||||
case 'list':
|
||||
case 'ls':
|
||||
await gpuHandler.list();
|
||||
break;
|
||||
case 'status':
|
||||
await gpuHandler.status();
|
||||
break;
|
||||
case 'drivers':
|
||||
await gpuHandler.drivers();
|
||||
break;
|
||||
case 'install':
|
||||
await gpuHandler.install();
|
||||
break;
|
||||
default:
|
||||
this.showGpuHelp();
|
||||
break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Container commands
|
||||
if (command === 'container') {
|
||||
const subcommand = commandArgs[0] || 'list';
|
||||
const subcommandArgs = commandArgs.slice(1);
|
||||
|
||||
switch (subcommand) {
|
||||
case 'list':
|
||||
case 'ls':
|
||||
await containerHandler.list();
|
||||
break;
|
||||
case 'add':
|
||||
await containerHandler.add();
|
||||
break;
|
||||
case 'remove':
|
||||
case 'rm':
|
||||
await containerHandler.remove(subcommandArgs[0]);
|
||||
break;
|
||||
case 'start':
|
||||
await containerHandler.start(subcommandArgs[0]);
|
||||
break;
|
||||
case 'stop':
|
||||
await containerHandler.stop(subcommandArgs[0]);
|
||||
break;
|
||||
case 'logs':
|
||||
await containerHandler.logs(subcommandArgs[0], parseInt(subcommandArgs[1] || '100', 10));
|
||||
break;
|
||||
default:
|
||||
this.showContainerHelp();
|
||||
break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Model commands
|
||||
if (command === 'model') {
|
||||
const subcommand = commandArgs[0] || 'list';
|
||||
const subcommandArgs = commandArgs.slice(1);
|
||||
|
||||
switch (subcommand) {
|
||||
case 'list':
|
||||
case 'ls':
|
||||
await modelHandler.list();
|
||||
break;
|
||||
case 'pull':
|
||||
await modelHandler.pull(subcommandArgs[0]);
|
||||
break;
|
||||
case 'remove':
|
||||
case 'rm':
|
||||
await modelHandler.remove(subcommandArgs[0]);
|
||||
break;
|
||||
case 'status':
|
||||
await modelHandler.status();
|
||||
break;
|
||||
case 'refresh':
|
||||
await modelHandler.refresh();
|
||||
break;
|
||||
default:
|
||||
this.showModelHelp();
|
||||
break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Config commands
|
||||
if (command === 'config') {
|
||||
const subcommand = commandArgs[0] || 'show';
|
||||
const subcommandArgs = commandArgs.slice(1);
|
||||
|
||||
switch (subcommand) {
|
||||
case 'show':
|
||||
case 'display':
|
||||
await configHandler.show();
|
||||
break;
|
||||
case 'init':
|
||||
await configHandler.init();
|
||||
break;
|
||||
case 'apikey':
|
||||
const keySubcommand = subcommandArgs[0] || 'list';
|
||||
switch (keySubcommand) {
|
||||
case 'add':
|
||||
await configHandler.addApiKey(subcommandArgs[1]);
|
||||
break;
|
||||
case 'remove':
|
||||
case 'rm':
|
||||
await configHandler.removeApiKey(subcommandArgs[1]);
|
||||
break;
|
||||
case 'list':
|
||||
case 'ls':
|
||||
default:
|
||||
await configHandler.listApiKeys();
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
this.showConfigHelp();
|
||||
break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Top-level commands
|
||||
switch (command) {
|
||||
case 'update':
|
||||
await serviceHandler.update();
|
||||
break;
|
||||
case 'uninstall':
|
||||
await serviceHandler.uninstall();
|
||||
break;
|
||||
case 'help':
|
||||
case '--help':
|
||||
case '-h':
|
||||
this.showHelp();
|
||||
break;
|
||||
default:
|
||||
logger.error(`Unknown command: ${command}`);
|
||||
logger.log('');
|
||||
this.showHelp();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Display version information
|
||||
*/
|
||||
private showVersion(): void {
|
||||
logger.log(`ModelGrid version ${VERSION}`);
|
||||
logger.log('GPU Infrastructure & AI Model Management (https://modelgrid.com)');
|
||||
}
|
||||
|
||||
/**
|
||||
* Display help message
|
||||
*/
|
||||
private showHelp(): void {
|
||||
console.log('');
|
||||
logger.highlight('ModelGrid - AI Infrastructure Management');
|
||||
logger.dim('GPU detection, container orchestration, and OpenAI-compatible API');
|
||||
console.log('');
|
||||
|
||||
logger.log(theme.info('Usage:'));
|
||||
logger.log(` ${theme.command('modelgrid')} ${theme.dim('<command> [options]')}`);
|
||||
console.log('');
|
||||
|
||||
logger.log(theme.info('Commands:'));
|
||||
this.printCommand('service <subcommand>', 'Manage systemd service');
|
||||
this.printCommand('gpu <subcommand>', 'Manage GPU hardware');
|
||||
this.printCommand('container <subcommand>', 'Manage AI containers');
|
||||
this.printCommand('model <subcommand>', 'Manage AI models');
|
||||
this.printCommand('config <subcommand>', 'Manage configuration');
|
||||
this.printCommand('update', 'Update ModelGrid', theme.dim('(requires root)'));
|
||||
this.printCommand('uninstall', 'Remove ModelGrid', theme.dim('(requires root)'));
|
||||
this.printCommand('help, --help, -h', 'Show this help message');
|
||||
this.printCommand('--version, -v', 'Show version information');
|
||||
console.log('');
|
||||
|
||||
logger.log(theme.info('Quick Start:'));
|
||||
logger.dim(' modelgrid gpu list # Detect GPUs');
|
||||
logger.dim(' modelgrid container add # Add an Ollama/vLLM container');
|
||||
logger.dim(' modelgrid container start # Start containers');
|
||||
logger.dim(' modelgrid model pull llama3 # Pull a model');
|
||||
logger.dim(' modelgrid service enable # Install as service');
|
||||
console.log('');
|
||||
|
||||
logger.log(theme.info('API Usage:'));
|
||||
logger.dim(' curl -X POST http://localhost:8080/v1/chat/completions \\');
|
||||
logger.dim(' -H "Authorization: Bearer YOUR_API_KEY" \\');
|
||||
logger.dim(' -H "Content-Type: application/json" \\');
|
||||
logger.dim(' -d \'{"model": "llama3", "messages": [{"role": "user", "content": "Hello"}]}\'');
|
||||
console.log('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to print a command
|
||||
*/
|
||||
private printCommand(command: string, description: string, extra?: string): void {
|
||||
const paddedCommand = command.padEnd(28);
|
||||
logger.log(` ${theme.command(paddedCommand)} ${description}${extra ? ' ' + extra : ''}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Display service help
|
||||
*/
|
||||
private showServiceHelp(): void {
|
||||
logger.log(`
|
||||
ModelGrid - Service Management Commands
|
||||
|
||||
Usage:
|
||||
modelgrid service <subcommand>
|
||||
|
||||
Subcommands:
|
||||
enable Install and enable the systemd service (requires root)
|
||||
disable Stop and disable the systemd service (requires root)
|
||||
start Start the systemd service
|
||||
stop Stop the systemd service
|
||||
restart Restart the systemd service
|
||||
status Show service status
|
||||
logs Show service logs in real-time
|
||||
start-daemon Start the daemon process directly
|
||||
|
||||
Options:
|
||||
--debug, -d Enable debug mode
|
||||
`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Display GPU help
|
||||
*/
|
||||
private showGpuHelp(): void {
|
||||
logger.log(`
|
||||
ModelGrid - GPU Management Commands
|
||||
|
||||
Usage:
|
||||
modelgrid gpu <subcommand>
|
||||
|
||||
Subcommands:
|
||||
list List detected GPUs
|
||||
status Show GPU utilization and status
|
||||
drivers Check GPU driver status
|
||||
install Install GPU drivers (requires root)
|
||||
|
||||
Examples:
|
||||
modelgrid gpu list # Show all detected GPUs
|
||||
modelgrid gpu status # Show current GPU utilization
|
||||
`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Display container help
|
||||
*/
|
||||
private showContainerHelp(): void {
|
||||
logger.log(`
|
||||
ModelGrid - Container Management Commands
|
||||
|
||||
Usage:
|
||||
modelgrid container <subcommand> [arguments]
|
||||
|
||||
Subcommands:
|
||||
list List all configured containers
|
||||
add Add a new container interactively
|
||||
remove <id> Remove a container by ID
|
||||
start [id] Start a container (or all if no ID)
|
||||
stop [id] Stop a container (or all if no ID)
|
||||
logs <id> Show container logs
|
||||
|
||||
Examples:
|
||||
modelgrid container add # Add new container
|
||||
modelgrid container start ollama # Start specific container
|
||||
modelgrid container logs ollama # View container logs
|
||||
`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Display model help
|
||||
*/
|
||||
private showModelHelp(): void {
|
||||
logger.log(`
|
||||
ModelGrid - Model Management Commands
|
||||
|
||||
Usage:
|
||||
modelgrid model <subcommand> [arguments]
|
||||
|
||||
Subcommands:
|
||||
list List all available models
|
||||
pull <name> Pull a model (must be greenlit)
|
||||
remove <name> Remove a model
|
||||
status Show model loading recommendations
|
||||
refresh Refresh greenlist cache
|
||||
|
||||
Examples:
|
||||
modelgrid model list # Show all models
|
||||
modelgrid model pull llama3:8b # Pull a model
|
||||
modelgrid model status # Show VRAM recommendations
|
||||
`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Display config help
|
||||
*/
|
||||
private showConfigHelp(): void {
|
||||
logger.log(`
|
||||
ModelGrid - Configuration Commands
|
||||
|
||||
Usage:
|
||||
modelgrid config <subcommand> [arguments]
|
||||
|
||||
Subcommands:
|
||||
show Display current configuration
|
||||
init Initialize default configuration
|
||||
apikey list List configured API keys
|
||||
apikey add [key] Add an API key (generates if not provided)
|
||||
apikey remove <key> Remove an API key
|
||||
|
||||
Examples:
|
||||
modelgrid config show # Show current config
|
||||
modelgrid config apikey add # Generate new API key
|
||||
`);
|
||||
}
|
||||
}
|
||||
314
ts/cli/config-handler.ts
Normal file
314
ts/cli/config-handler.ts
Normal file
@@ -0,0 +1,314 @@
|
||||
/**
|
||||
* Config Handler
|
||||
*
|
||||
* CLI commands for configuration management.
|
||||
*/
|
||||
|
||||
import { logger } from '../logger.ts';
|
||||
import { theme } from '../colors.ts';
|
||||
import { PATHS } from '../constants.ts';
|
||||
import type { IModelGridConfig } from '../interfaces/config.ts';
|
||||
import type { ITableColumn } from '../logger.ts';
|
||||
import * as fs from 'node:fs/promises';
|
||||
|
||||
/**
|
||||
* Handler for configuration-related CLI commands
|
||||
*/
|
||||
export class ConfigHandler {
|
||||
/**
|
||||
* Show current configuration
|
||||
*/
|
||||
public async show(): Promise<void> {
|
||||
logger.log('');
|
||||
|
||||
try {
|
||||
const configPath = PATHS.CONFIG_FILE;
|
||||
const configContent = await fs.readFile(configPath, 'utf-8');
|
||||
const config = JSON.parse(configContent) as IModelGridConfig;
|
||||
|
||||
// Overview
|
||||
logger.logBox(
|
||||
'ModelGrid Configuration',
|
||||
[
|
||||
`Version: ${theme.highlight(config.version)}`,
|
||||
`Check Interval: ${theme.info(String(config.checkInterval / 1000))} seconds`,
|
||||
'',
|
||||
theme.dim('Configuration File:'),
|
||||
` ${theme.path(configPath)}`,
|
||||
],
|
||||
60,
|
||||
'info',
|
||||
);
|
||||
|
||||
// API Configuration
|
||||
logger.log('');
|
||||
logger.logBox(
|
||||
'API Server',
|
||||
[
|
||||
`Host: ${theme.info(config.api.host)}`,
|
||||
`Port: ${theme.highlight(String(config.api.port))}`,
|
||||
`API Keys: ${config.api.apiKeys.length} configured`,
|
||||
...(config.api.rateLimit
|
||||
? [`Rate Limit: ${config.api.rateLimit} req/min`]
|
||||
: []),
|
||||
'',
|
||||
theme.dim('Endpoint:'),
|
||||
` http://${config.api.host}:${config.api.port}/v1/chat/completions`,
|
||||
],
|
||||
60,
|
||||
'info',
|
||||
);
|
||||
|
||||
// Docker Configuration
|
||||
logger.log('');
|
||||
logger.logBox(
|
||||
'Docker',
|
||||
[
|
||||
`Runtime: ${theme.info(config.docker.runtime)}`,
|
||||
`Network: ${config.docker.networkName}`,
|
||||
],
|
||||
60,
|
||||
'default',
|
||||
);
|
||||
|
||||
// GPU Configuration
|
||||
logger.log('');
|
||||
logger.logBox(
|
||||
'GPU',
|
||||
[
|
||||
`Auto Detect: ${config.gpus.autoDetect ? theme.success('Yes') : theme.dim('No')}`,
|
||||
`Assignments: ${Object.keys(config.gpus.assignments).length} GPU(s)`,
|
||||
],
|
||||
60,
|
||||
'default',
|
||||
);
|
||||
|
||||
// Model Configuration
|
||||
logger.log('');
|
||||
logger.logBox(
|
||||
'Models',
|
||||
[
|
||||
`Auto Pull: ${config.models.autoPull ? theme.success('Enabled') : theme.dim('Disabled')}`,
|
||||
`Default Container: ${config.models.defaultContainer}`,
|
||||
`Auto Load: ${config.models.autoLoad.length} model(s)`,
|
||||
'',
|
||||
theme.dim('Greenlist URL:'),
|
||||
` ${config.models.greenlistUrl}`,
|
||||
],
|
||||
70,
|
||||
'default',
|
||||
);
|
||||
|
||||
// Containers
|
||||
if (config.containers.length > 0) {
|
||||
logger.log('');
|
||||
logger.info(`Containers (${config.containers.length}):`);
|
||||
logger.log('');
|
||||
|
||||
const rows = config.containers.map((c) => ({
|
||||
id: c.id,
|
||||
name: c.name,
|
||||
type: c.type,
|
||||
image: c.image.length > 40 ? c.image.substring(0, 37) + '...' : c.image,
|
||||
port: c.port,
|
||||
gpus: c.gpuIds.length > 0 ? c.gpuIds.join(',') : theme.dim('None'),
|
||||
}));
|
||||
|
||||
const columns: ITableColumn[] = [
|
||||
{ header: 'ID', key: 'id', align: 'left' },
|
||||
{ header: 'Name', key: 'name', align: 'left', color: theme.highlight },
|
||||
{ header: 'Type', key: 'type', align: 'left' },
|
||||
{ header: 'Image', key: 'image', align: 'left', color: theme.dim },
|
||||
{ header: 'Port', key: 'port', align: 'right' },
|
||||
{ header: 'GPUs', key: 'gpus', align: 'left' },
|
||||
];
|
||||
|
||||
logger.logTable(columns, rows);
|
||||
}
|
||||
|
||||
logger.log('');
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
logger.logBox(
|
||||
'No Configuration',
|
||||
[
|
||||
'No configuration file found.',
|
||||
'',
|
||||
theme.dim('Create configuration with:'),
|
||||
` ${theme.command('modelgrid service enable')}`,
|
||||
'',
|
||||
theme.dim('Or manually create:'),
|
||||
` ${PATHS.CONFIG_FILE}`,
|
||||
],
|
||||
60,
|
||||
'warning',
|
||||
);
|
||||
} else {
|
||||
logger.error(`Failed to read configuration: ${(error as Error).message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize default configuration
|
||||
*/
|
||||
public async init(): Promise<void> {
|
||||
const configPath = PATHS.CONFIG_FILE;
|
||||
|
||||
// Check if config already exists
|
||||
try {
|
||||
await fs.access(configPath);
|
||||
logger.warn('Configuration file already exists');
|
||||
logger.dim(` ${configPath}`);
|
||||
return;
|
||||
} catch {
|
||||
// File doesn't exist, continue
|
||||
}
|
||||
|
||||
// Create config directory
|
||||
const configDir = PATHS.CONFIG_DIR;
|
||||
await fs.mkdir(configDir, { recursive: true });
|
||||
|
||||
// Create default config
|
||||
const defaultConfig: IModelGridConfig = {
|
||||
version: '1.0.0',
|
||||
api: {
|
||||
port: 8080,
|
||||
host: '0.0.0.0',
|
||||
apiKeys: [],
|
||||
cors: true,
|
||||
corsOrigins: ['*'],
|
||||
},
|
||||
docker: {
|
||||
networkName: 'modelgrid',
|
||||
runtime: 'docker',
|
||||
},
|
||||
gpus: {
|
||||
autoDetect: true,
|
||||
assignments: {},
|
||||
},
|
||||
containers: [],
|
||||
models: {
|
||||
greenlistUrl: 'https://code.foss.global/modelgrid.com/model_lists/raw/branch/main/greenlit.json',
|
||||
autoPull: true,
|
||||
defaultContainer: 'ollama',
|
||||
autoLoad: [],
|
||||
},
|
||||
checkInterval: 30000,
|
||||
};
|
||||
|
||||
await fs.writeFile(configPath, JSON.stringify(defaultConfig, null, 2));
|
||||
|
||||
logger.success('Configuration initialized');
|
||||
logger.dim(` ${configPath}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an API key
|
||||
*/
|
||||
public async addApiKey(key?: string): Promise<void> {
|
||||
const configPath = PATHS.CONFIG_FILE;
|
||||
|
||||
try {
|
||||
const configContent = await fs.readFile(configPath, 'utf-8');
|
||||
const config = JSON.parse(configContent) as IModelGridConfig;
|
||||
|
||||
// Generate key if not provided
|
||||
const apiKey = key || this.generateApiKey();
|
||||
|
||||
if (config.api.apiKeys.includes(apiKey)) {
|
||||
logger.warn('API key already exists');
|
||||
return;
|
||||
}
|
||||
|
||||
config.api.apiKeys.push(apiKey);
|
||||
|
||||
await fs.writeFile(configPath, JSON.stringify(config, null, 2));
|
||||
|
||||
logger.success('API key added:');
|
||||
logger.log(` ${theme.highlight(apiKey)}`);
|
||||
logger.log('');
|
||||
logger.dim('Use with Authorization header:');
|
||||
logger.dim(` curl -H "Authorization: Bearer ${apiKey}" ...`);
|
||||
} catch (error) {
|
||||
logger.error(`Failed to add API key: ${(error as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove an API key
|
||||
*/
|
||||
public async removeApiKey(key: string): Promise<void> {
|
||||
if (!key) {
|
||||
logger.error('API key is required');
|
||||
return;
|
||||
}
|
||||
|
||||
const configPath = PATHS.CONFIG_FILE;
|
||||
|
||||
try {
|
||||
const configContent = await fs.readFile(configPath, 'utf-8');
|
||||
const config = JSON.parse(configContent) as IModelGridConfig;
|
||||
|
||||
const index = config.api.apiKeys.indexOf(key);
|
||||
if (index === -1) {
|
||||
logger.warn('API key not found');
|
||||
return;
|
||||
}
|
||||
|
||||
config.api.apiKeys.splice(index, 1);
|
||||
|
||||
await fs.writeFile(configPath, JSON.stringify(config, null, 2));
|
||||
|
||||
logger.success('API key removed');
|
||||
} catch (error) {
|
||||
logger.error(`Failed to remove API key: ${(error as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List API keys
|
||||
*/
|
||||
public async listApiKeys(): Promise<void> {
|
||||
const configPath = PATHS.CONFIG_FILE;
|
||||
|
||||
try {
|
||||
const configContent = await fs.readFile(configPath, 'utf-8');
|
||||
const config = JSON.parse(configContent) as IModelGridConfig;
|
||||
|
||||
if (config.api.apiKeys.length === 0) {
|
||||
logger.warn('No API keys configured');
|
||||
logger.dim('Add a key with: modelgrid config apikey add');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`API Keys (${config.api.apiKeys.length}):`);
|
||||
logger.log('');
|
||||
|
||||
for (const key of config.api.apiKeys) {
|
||||
// Show partial key for security
|
||||
const masked = key.substring(0, 8) + '...' + key.substring(key.length - 4);
|
||||
logger.log(` ${masked}`);
|
||||
}
|
||||
|
||||
logger.log('');
|
||||
} catch (error) {
|
||||
logger.error(`Failed to list API keys: ${(error as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a random API key
|
||||
*/
|
||||
private generateApiKey(): string {
|
||||
const chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
|
||||
const length = 48;
|
||||
let key = 'sk-';
|
||||
|
||||
for (let i = 0; i < length; i++) {
|
||||
key += chars.charAt(Math.floor(Math.random() * chars.length));
|
||||
}
|
||||
|
||||
return key;
|
||||
}
|
||||
}
|
||||
317
ts/cli/container-handler.ts
Normal file
317
ts/cli/container-handler.ts
Normal file
@@ -0,0 +1,317 @@
|
||||
/**
|
||||
* Container Handler
|
||||
*
|
||||
* CLI commands for container management.
|
||||
*/
|
||||
|
||||
import { logger } from '../logger.ts';
|
||||
import { theme } from '../colors.ts';
|
||||
import { ContainerManager } from '../containers/container-manager.ts';
|
||||
import { DockerManager } from '../docker/docker-manager.ts';
|
||||
import type { IContainerConfig } from '../interfaces/container.ts';
|
||||
import type { ITableColumn } from '../logger.ts';
|
||||
import * as helpers from '../helpers/index.ts';
|
||||
|
||||
/**
|
||||
* Handler for container-related CLI commands
|
||||
*/
|
||||
export class ContainerHandler {
|
||||
private containerManager: ContainerManager;
|
||||
private dockerManager: DockerManager;
|
||||
|
||||
constructor(containerManager: ContainerManager) {
|
||||
this.containerManager = containerManager;
|
||||
this.dockerManager = new DockerManager();
|
||||
}
|
||||
|
||||
/**
|
||||
* List all configured containers
|
||||
*/
|
||||
public async list(): Promise<void> {
|
||||
logger.log('');
|
||||
logger.info('Containers');
|
||||
logger.log('');
|
||||
|
||||
const containers = this.containerManager.getAllContainers();
|
||||
|
||||
if (containers.length === 0) {
|
||||
logger.logBox(
|
||||
'No Containers',
|
||||
[
|
||||
'No containers are configured.',
|
||||
'',
|
||||
theme.dim('Add a container with:'),
|
||||
` ${theme.command('modelgrid container add')}`,
|
||||
],
|
||||
60,
|
||||
'warning',
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const rows = [];
|
||||
|
||||
for (const container of containers) {
|
||||
const status = await container.getStatus();
|
||||
const config = container.getConfig();
|
||||
|
||||
rows.push({
|
||||
id: config.id,
|
||||
name: config.name,
|
||||
type: this.formatContainerType(container.type),
|
||||
status: status.running
|
||||
? theme.success('Running')
|
||||
: theme.dim('Stopped'),
|
||||
health: status.running
|
||||
? this.formatHealth(status.health)
|
||||
: theme.dim('N/A'),
|
||||
port: config.externalPort || config.port,
|
||||
models: status.loadedModels.length,
|
||||
gpus: config.gpuIds.length > 0 ? config.gpuIds.join(',') : theme.dim('None'),
|
||||
});
|
||||
}
|
||||
|
||||
const columns: ITableColumn[] = [
|
||||
{ header: 'ID', key: 'id', align: 'left' },
|
||||
{ header: 'Name', key: 'name', align: 'left', color: theme.highlight },
|
||||
{ header: 'Type', key: 'type', align: 'left' },
|
||||
{ header: 'Status', key: 'status', align: 'left' },
|
||||
{ header: 'Health', key: 'health', align: 'left' },
|
||||
{ header: 'Port', key: 'port', align: 'right', color: theme.info },
|
||||
{ header: 'Models', key: 'models', align: 'right' },
|
||||
{ header: 'GPUs', key: 'gpus', align: 'left' },
|
||||
];
|
||||
|
||||
logger.logTable(columns, rows);
|
||||
logger.log('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new container interactively
|
||||
*/
|
||||
public async add(): Promise<void> {
|
||||
const { prompt, close, select } = await helpers.createPrompt();
|
||||
|
||||
try {
|
||||
logger.log('');
|
||||
logger.highlight('Add Container');
|
||||
logger.dim('Configure a new AI model container');
|
||||
logger.log('');
|
||||
|
||||
// Select container type
|
||||
const typeIndex = await select('Select container type:', [
|
||||
'Ollama - Easy to use, good for local models',
|
||||
'vLLM - High performance, OpenAI compatible',
|
||||
'TGI - HuggingFace Text Generation Inference',
|
||||
]);
|
||||
|
||||
const types = ['ollama', 'vllm', 'tgi'] as const;
|
||||
const containerType = types[typeIndex];
|
||||
|
||||
// Container name
|
||||
const name = await prompt('Container name: ');
|
||||
if (!name.trim()) {
|
||||
logger.error('Container name is required');
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate ID from name
|
||||
const id = name.toLowerCase().replace(/[^a-z0-9-]/g, '-');
|
||||
|
||||
// Port
|
||||
const defaultPorts = { ollama: 11434, vllm: 8000, tgi: 8080 };
|
||||
const portStr = await prompt(`Port [${defaultPorts[containerType]}]: `);
|
||||
const port = portStr ? parseInt(portStr, 10) : defaultPorts[containerType];
|
||||
|
||||
// GPU assignment
|
||||
const gpuStr = await prompt('GPU IDs (comma-separated, or "all", or empty for none): ');
|
||||
let gpuIds: string[] = [];
|
||||
|
||||
if (gpuStr.trim().toLowerCase() === 'all') {
|
||||
const { GpuDetector } = await import('../hardware/gpu-detector.ts');
|
||||
const detector = new GpuDetector();
|
||||
const gpus = await detector.detectGpus();
|
||||
gpuIds = gpus.map((g) => g.id);
|
||||
} else if (gpuStr.trim()) {
|
||||
gpuIds = gpuStr.split(',').map((s) => s.trim());
|
||||
}
|
||||
|
||||
// Build config
|
||||
const config: IContainerConfig = {
|
||||
id,
|
||||
type: containerType,
|
||||
name,
|
||||
image: this.getDefaultImage(containerType),
|
||||
port,
|
||||
gpuIds,
|
||||
models: [],
|
||||
};
|
||||
|
||||
// Add container
|
||||
await this.containerManager.addContainer(config);
|
||||
|
||||
logger.log('');
|
||||
logger.success(`Container "${name}" added successfully`);
|
||||
logger.log('');
|
||||
logger.dim('Start the container with:');
|
||||
logger.log(` ${theme.command(`modelgrid container start ${id}`)}`);
|
||||
logger.log('');
|
||||
} finally {
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a container
|
||||
*/
|
||||
public async remove(containerId: string): Promise<void> {
|
||||
if (!containerId) {
|
||||
logger.error('Container ID is required');
|
||||
return;
|
||||
}
|
||||
|
||||
const { prompt, close } = await helpers.createPrompt();
|
||||
|
||||
try {
|
||||
const confirm = await prompt(`Remove container "${containerId}"? (y/N): `);
|
||||
|
||||
if (confirm.toLowerCase() !== 'y') {
|
||||
logger.log('Aborted');
|
||||
return;
|
||||
}
|
||||
|
||||
const success = await this.containerManager.removeContainer(containerId);
|
||||
|
||||
if (success) {
|
||||
logger.success(`Container "${containerId}" removed`);
|
||||
} else {
|
||||
logger.error(`Failed to remove container "${containerId}"`);
|
||||
}
|
||||
} finally {
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a container
|
||||
*/
|
||||
public async start(containerId?: string): Promise<void> {
|
||||
if (containerId) {
|
||||
// Start specific container
|
||||
const container = this.containerManager.getContainer(containerId);
|
||||
if (!container) {
|
||||
logger.error(`Container "${containerId}" not found`);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`Starting container "${containerId}"...`);
|
||||
const success = await container.start();
|
||||
|
||||
if (success) {
|
||||
logger.success(`Container "${containerId}" started`);
|
||||
} else {
|
||||
logger.error(`Failed to start container "${containerId}"`);
|
||||
}
|
||||
} else {
|
||||
// Start all containers
|
||||
logger.info('Starting all containers...');
|
||||
await this.containerManager.startAll();
|
||||
logger.success('All containers started');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop a container
|
||||
*/
|
||||
public async stop(containerId?: string): Promise<void> {
|
||||
if (containerId) {
|
||||
// Stop specific container
|
||||
const container = this.containerManager.getContainer(containerId);
|
||||
if (!container) {
|
||||
logger.error(`Container "${containerId}" not found`);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`Stopping container "${containerId}"...`);
|
||||
const success = await container.stop();
|
||||
|
||||
if (success) {
|
||||
logger.success(`Container "${containerId}" stopped`);
|
||||
} else {
|
||||
logger.error(`Failed to stop container "${containerId}"`);
|
||||
}
|
||||
} else {
|
||||
// Stop all containers
|
||||
logger.info('Stopping all containers...');
|
||||
await this.containerManager.stopAll();
|
||||
logger.success('All containers stopped');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Show container logs
|
||||
*/
|
||||
public async logs(containerId: string, lines: number = 100): Promise<void> {
|
||||
if (!containerId) {
|
||||
logger.error('Container ID is required');
|
||||
return;
|
||||
}
|
||||
|
||||
const container = this.containerManager.getContainer(containerId);
|
||||
if (!container) {
|
||||
logger.error(`Container "${containerId}" not found`);
|
||||
return;
|
||||
}
|
||||
|
||||
const logs = await container.getLogs(lines);
|
||||
console.log(logs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Format container type for display
|
||||
*/
|
||||
private formatContainerType(type: string): string {
|
||||
switch (type) {
|
||||
case 'ollama':
|
||||
return theme.containerOllama('Ollama');
|
||||
case 'vllm':
|
||||
return theme.containerVllm('vLLM');
|
||||
case 'tgi':
|
||||
return theme.containerTgi('TGI');
|
||||
default:
|
||||
return type;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Format health status
|
||||
*/
|
||||
private formatHealth(health: string): string {
|
||||
switch (health) {
|
||||
case 'healthy':
|
||||
return theme.success('Healthy');
|
||||
case 'unhealthy':
|
||||
return theme.error('Unhealthy');
|
||||
case 'starting':
|
||||
return theme.warning('Starting');
|
||||
default:
|
||||
return theme.dim(health);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get default image for container type
|
||||
*/
|
||||
private getDefaultImage(type: string): string {
|
||||
switch (type) {
|
||||
case 'ollama':
|
||||
return 'ollama/ollama:latest';
|
||||
case 'vllm':
|
||||
return 'vllm/vllm-openai:latest';
|
||||
case 'tgi':
|
||||
return 'ghcr.io/huggingface/text-generation-inference:latest';
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
}
|
||||
}
|
||||
255
ts/cli/gpu-handler.ts
Normal file
255
ts/cli/gpu-handler.ts
Normal file
@@ -0,0 +1,255 @@
|
||||
/**
|
||||
* GPU Handler
|
||||
*
|
||||
* CLI commands for GPU management.
|
||||
*/
|
||||
|
||||
import { logger } from '../logger.ts';
|
||||
import { theme } from '../colors.ts';
|
||||
import { GpuDetector } from '../hardware/gpu-detector.ts';
|
||||
import { SystemInfo } from '../hardware/system-info.ts';
|
||||
import { DriverManager } from '../drivers/driver-manager.ts';
|
||||
import type { ITableColumn } from '../logger.ts';
|
||||
|
||||
/**
|
||||
* Handler for GPU-related CLI commands
|
||||
*/
|
||||
export class GpuHandler {
|
||||
private gpuDetector: GpuDetector;
|
||||
private systemInfo: SystemInfo;
|
||||
private driverManager: DriverManager;
|
||||
|
||||
constructor() {
|
||||
this.gpuDetector = new GpuDetector();
|
||||
this.systemInfo = new SystemInfo();
|
||||
this.driverManager = new DriverManager();
|
||||
}
|
||||
|
||||
/**
|
||||
* List detected GPUs
|
||||
*/
|
||||
public async list(): Promise<void> {
|
||||
logger.log('');
|
||||
logger.info('Detecting GPUs...');
|
||||
logger.log('');
|
||||
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
|
||||
if (gpus.length === 0) {
|
||||
logger.logBox(
|
||||
'No GPUs Detected',
|
||||
[
|
||||
'No GPUs were found on this system.',
|
||||
'',
|
||||
theme.dim('Possible reasons:'),
|
||||
' - No discrete GPU installed',
|
||||
' - GPU drivers not installed',
|
||||
' - GPU not properly connected',
|
||||
],
|
||||
60,
|
||||
'warning',
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const rows = gpus.map((gpu) => ({
|
||||
id: gpu.id,
|
||||
vendor: this.formatVendor(gpu.vendor),
|
||||
model: gpu.model,
|
||||
vram: `${Math.round(gpu.vram / 1024)} GB`,
|
||||
driver: gpu.driverVersion || theme.dim('N/A'),
|
||||
cuda: gpu.cudaVersion || theme.dim('N/A'),
|
||||
pci: gpu.pciSlot,
|
||||
}));
|
||||
|
||||
const columns: ITableColumn[] = [
|
||||
{ header: 'ID', key: 'id', align: 'left' },
|
||||
{ header: 'Vendor', key: 'vendor', align: 'left' },
|
||||
{ header: 'Model', key: 'model', align: 'left', color: theme.highlight },
|
||||
{ header: 'VRAM', key: 'vram', align: 'right', color: theme.info },
|
||||
{ header: 'Driver', key: 'driver', align: 'left' },
|
||||
{ header: 'CUDA', key: 'cuda', align: 'left' },
|
||||
{ header: 'PCI', key: 'pci', align: 'left', color: theme.dim },
|
||||
];
|
||||
|
||||
logger.info(`Found ${gpus.length} GPU(s):`);
|
||||
logger.log('');
|
||||
logger.logTable(columns, rows);
|
||||
logger.log('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Show GPU status and utilization
|
||||
*/
|
||||
public async status(): Promise<void> {
|
||||
logger.log('');
|
||||
logger.info('GPU Status');
|
||||
logger.log('');
|
||||
|
||||
const gpuStatus = await this.gpuDetector.getGpuStatus();
|
||||
|
||||
if (gpuStatus.length === 0) {
|
||||
logger.warn('No GPUs detected');
|
||||
return;
|
||||
}
|
||||
|
||||
for (const gpu of gpuStatus) {
|
||||
const utilizationBar = this.createProgressBar(gpu.utilization, 30);
|
||||
const memoryBar = this.createProgressBar(gpu.memoryUsed / gpu.memoryTotal * 100, 30);
|
||||
|
||||
logger.logBoxTitle(`GPU ${gpu.id}: ${gpu.name}`, 70, 'info');
|
||||
logger.logBoxLine(`Utilization: ${utilizationBar} ${gpu.utilization.toFixed(1)}%`);
|
||||
logger.logBoxLine(`Memory: ${memoryBar} ${Math.round(gpu.memoryUsed)}/${Math.round(gpu.memoryTotal)} MB`);
|
||||
logger.logBoxLine(`Temperature: ${this.formatTemperature(gpu.temperature)}`);
|
||||
logger.logBoxLine(`Power: ${gpu.powerDraw.toFixed(0)}W / ${gpu.powerLimit.toFixed(0)}W`);
|
||||
logger.logBoxEnd();
|
||||
logger.log('');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check and install GPU drivers
|
||||
*/
|
||||
public async drivers(): Promise<void> {
|
||||
logger.log('');
|
||||
logger.info('GPU Driver Status');
|
||||
logger.log('');
|
||||
|
||||
// Get system info first
|
||||
const sysInfo = await this.systemInfo.getSystemInfo();
|
||||
|
||||
// Detect GPUs
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
|
||||
if (gpus.length === 0) {
|
||||
logger.warn('No GPUs detected');
|
||||
return;
|
||||
}
|
||||
|
||||
// Check driver status for each vendor
|
||||
const vendors = new Set(gpus.map((g) => g.vendor));
|
||||
|
||||
for (const vendor of vendors) {
|
||||
const driver = this.driverManager.getDriver(vendor);
|
||||
if (!driver) {
|
||||
logger.warn(`No driver support for ${vendor}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const status = await driver.getStatus();
|
||||
|
||||
logger.logBoxTitle(`${this.formatVendor(vendor)} Driver`, 60, status.installed ? 'success' : 'warning');
|
||||
logger.logBoxLine(`Installed: ${status.installed ? theme.success('Yes') : theme.error('No')}`);
|
||||
|
||||
if (status.installed) {
|
||||
logger.logBoxLine(`Version: ${status.version || 'Unknown'}`);
|
||||
logger.logBoxLine(`Runtime: ${status.runtimeVersion || 'Unknown'}`);
|
||||
logger.logBoxLine(`Container Support: ${status.containerSupport ? theme.success('Yes') : theme.warning('No')}`);
|
||||
} else {
|
||||
logger.logBoxLine('');
|
||||
logger.logBoxLine(theme.dim('Run `modelgrid gpu install` to install drivers'));
|
||||
}
|
||||
|
||||
logger.logBoxEnd();
|
||||
logger.log('');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Install GPU drivers
|
||||
*/
|
||||
public async install(): Promise<void> {
|
||||
logger.log('');
|
||||
logger.info('Installing GPU Drivers');
|
||||
logger.log('');
|
||||
|
||||
// Detect GPUs
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
|
||||
if (gpus.length === 0) {
|
||||
logger.error('No GPUs detected - cannot install drivers');
|
||||
return;
|
||||
}
|
||||
|
||||
// Install drivers for each vendor
|
||||
const vendors = new Set(gpus.map((g) => g.vendor));
|
||||
|
||||
for (const vendor of vendors) {
|
||||
const driver = this.driverManager.getDriver(vendor);
|
||||
if (!driver) {
|
||||
logger.warn(`No driver installer for ${vendor}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
logger.info(`Installing ${this.formatVendor(vendor)} drivers...`);
|
||||
|
||||
const success = await driver.install();
|
||||
|
||||
if (success) {
|
||||
logger.success(`${this.formatVendor(vendor)} drivers installed successfully`);
|
||||
|
||||
// Setup container support
|
||||
logger.info('Setting up container support...');
|
||||
const containerSuccess = await driver.setupContainer();
|
||||
|
||||
if (containerSuccess) {
|
||||
logger.success('Container support configured');
|
||||
} else {
|
||||
logger.warn('Container support setup failed - GPU passthrough may not work');
|
||||
}
|
||||
} else {
|
||||
logger.error(`Failed to install ${this.formatVendor(vendor)} drivers`);
|
||||
}
|
||||
|
||||
logger.log('');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Format vendor name for display
|
||||
*/
|
||||
private formatVendor(vendor: string): string {
|
||||
switch (vendor) {
|
||||
case 'nvidia':
|
||||
return theme.gpuNvidia('NVIDIA');
|
||||
case 'amd':
|
||||
return theme.gpuAmd('AMD');
|
||||
case 'intel':
|
||||
return theme.gpuIntel('Intel');
|
||||
default:
|
||||
return vendor;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a progress bar
|
||||
*/
|
||||
private createProgressBar(percent: number, width: number): string {
|
||||
const filled = Math.round((percent / 100) * width);
|
||||
const empty = width - filled;
|
||||
const bar = '█'.repeat(filled) + '░'.repeat(empty);
|
||||
|
||||
if (percent >= 90) {
|
||||
return theme.error(bar);
|
||||
} else if (percent >= 70) {
|
||||
return theme.warning(bar);
|
||||
} else {
|
||||
return theme.success(bar);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Format temperature with color coding
|
||||
*/
|
||||
private formatTemperature(temp: number): string {
|
||||
const tempStr = `${temp}°C`;
|
||||
|
||||
if (temp >= 85) {
|
||||
return theme.error(tempStr);
|
||||
} else if (temp >= 70) {
|
||||
return theme.warning(tempStr);
|
||||
} else {
|
||||
return theme.success(tempStr);
|
||||
}
|
||||
}
|
||||
}
|
||||
202
ts/cli/model-handler.ts
Normal file
202
ts/cli/model-handler.ts
Normal file
@@ -0,0 +1,202 @@
|
||||
/**
|
||||
* Model Handler
|
||||
*
|
||||
* CLI commands for model management.
|
||||
*/
|
||||
|
||||
import { logger } from '../logger.ts';
|
||||
import { theme } from '../colors.ts';
|
||||
import { ContainerManager } from '../containers/container-manager.ts';
|
||||
import { ModelRegistry } from '../models/registry.ts';
|
||||
import { ModelLoader } from '../models/loader.ts';
|
||||
import type { ITableColumn } from '../logger.ts';
|
||||
|
||||
/**
|
||||
* Handler for model-related CLI commands
|
||||
*/
|
||||
export class ModelHandler {
|
||||
private containerManager: ContainerManager;
|
||||
private modelRegistry: ModelRegistry;
|
||||
private modelLoader: ModelLoader;
|
||||
|
||||
constructor(
|
||||
containerManager: ContainerManager,
|
||||
modelRegistry: ModelRegistry,
|
||||
) {
|
||||
this.containerManager = containerManager;
|
||||
this.modelRegistry = modelRegistry;
|
||||
this.modelLoader = new ModelLoader(modelRegistry, containerManager);
|
||||
}
|
||||
|
||||
/**
|
||||
* List all available models
|
||||
*/
|
||||
public async list(): Promise<void> {
|
||||
logger.log('');
|
||||
logger.info('Models');
|
||||
logger.log('');
|
||||
|
||||
// Get loaded models from containers
|
||||
const loadedModels = await this.containerManager.getAllAvailableModels();
|
||||
|
||||
// Get greenlit models
|
||||
const greenlitModels = await this.modelRegistry.getAllGreenlitModels();
|
||||
|
||||
if (loadedModels.size === 0 && greenlitModels.length === 0) {
|
||||
logger.logBox(
|
||||
'No Models',
|
||||
[
|
||||
'No models are loaded or greenlit.',
|
||||
'',
|
||||
theme.dim('Pull a model with:'),
|
||||
` ${theme.command('modelgrid model pull <name>')}`,
|
||||
],
|
||||
60,
|
||||
'warning',
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Show loaded models
|
||||
if (loadedModels.size > 0) {
|
||||
logger.info(`Loaded Models (${loadedModels.size}):`);
|
||||
logger.log('');
|
||||
|
||||
const rows = [];
|
||||
for (const [name, info] of loadedModels) {
|
||||
rows.push({
|
||||
name,
|
||||
container: info.container,
|
||||
size: info.size ? this.formatSize(info.size) : theme.dim('N/A'),
|
||||
format: info.format || theme.dim('N/A'),
|
||||
modified: info.modifiedAt
|
||||
? new Date(info.modifiedAt).toLocaleDateString()
|
||||
: theme.dim('N/A'),
|
||||
});
|
||||
}
|
||||
|
||||
const columns: ITableColumn[] = [
|
||||
{ header: 'Name', key: 'name', align: 'left', color: theme.highlight },
|
||||
{ header: 'Container', key: 'container', align: 'left' },
|
||||
{ header: 'Size', key: 'size', align: 'right', color: theme.info },
|
||||
{ header: 'Format', key: 'format', align: 'left' },
|
||||
{ header: 'Modified', key: 'modified', align: 'left', color: theme.dim },
|
||||
];
|
||||
|
||||
logger.logTable(columns, rows);
|
||||
logger.log('');
|
||||
}
|
||||
|
||||
// Show greenlit models (not yet loaded)
|
||||
const loadedNames = new Set(loadedModels.keys());
|
||||
const unloadedGreenlit = greenlitModels.filter((m) => !loadedNames.has(m.name));
|
||||
|
||||
if (unloadedGreenlit.length > 0) {
|
||||
logger.info(`Available to Pull (${unloadedGreenlit.length}):`);
|
||||
logger.log('');
|
||||
|
||||
const rows = unloadedGreenlit.map((m) => ({
|
||||
name: m.name,
|
||||
container: m.container,
|
||||
vram: `${m.minVram} GB`,
|
||||
tags: m.tags?.join(', ') || theme.dim('None'),
|
||||
}));
|
||||
|
||||
const columns: ITableColumn[] = [
|
||||
{ header: 'Name', key: 'name', align: 'left' },
|
||||
{ header: 'Container', key: 'container', align: 'left' },
|
||||
{ header: 'Min VRAM', key: 'vram', align: 'right', color: theme.info },
|
||||
{ header: 'Tags', key: 'tags', align: 'left', color: theme.dim },
|
||||
];
|
||||
|
||||
logger.logTable(columns, rows);
|
||||
logger.log('');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull a model
|
||||
*/
|
||||
public async pull(modelName: string): Promise<void> {
|
||||
if (!modelName) {
|
||||
logger.error('Model name is required');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log('');
|
||||
logger.info(`Pulling model: ${modelName}`);
|
||||
logger.log('');
|
||||
|
||||
const result = await this.modelLoader.loadModel(modelName);
|
||||
|
||||
if (result.success) {
|
||||
if (result.alreadyLoaded) {
|
||||
logger.success(`Model "${modelName}" is already loaded`);
|
||||
} else {
|
||||
logger.success(`Model "${modelName}" pulled successfully`);
|
||||
}
|
||||
if (result.container) {
|
||||
logger.dim(`Container: ${result.container}`);
|
||||
}
|
||||
} else {
|
||||
logger.error(`Failed to pull model: ${result.error}`);
|
||||
}
|
||||
|
||||
logger.log('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a model
|
||||
*/
|
||||
public async remove(modelName: string): Promise<void> {
|
||||
if (!modelName) {
|
||||
logger.error('Model name is required');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`Removing model: ${modelName}`);
|
||||
|
||||
const success = await this.modelLoader.unloadModel(modelName);
|
||||
|
||||
if (success) {
|
||||
logger.success(`Model "${modelName}" removed`);
|
||||
} else {
|
||||
logger.error(`Failed to remove model "${modelName}"`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Show model loading status and recommendations
|
||||
*/
|
||||
public async status(): Promise<void> {
|
||||
logger.log('');
|
||||
await this.modelLoader.printStatus();
|
||||
}
|
||||
|
||||
/**
|
||||
* Refresh greenlist cache
|
||||
*/
|
||||
public async refresh(): Promise<void> {
|
||||
logger.info('Refreshing greenlist...');
|
||||
|
||||
await this.modelRegistry.refreshGreenlist();
|
||||
|
||||
logger.success('Greenlist refreshed');
|
||||
}
|
||||
|
||||
/**
|
||||
* Format file size
|
||||
*/
|
||||
private formatSize(bytes: number): string {
|
||||
const units = ['B', 'KB', 'MB', 'GB', 'TB'];
|
||||
let size = bytes;
|
||||
let unitIndex = 0;
|
||||
|
||||
while (size >= 1024 && unitIndex < units.length - 1) {
|
||||
size /= 1024;
|
||||
unitIndex++;
|
||||
}
|
||||
|
||||
return `${size.toFixed(1)} ${units[unitIndex]}`;
|
||||
}
|
||||
}
|
||||
252
ts/cli/service-handler.ts
Normal file
252
ts/cli/service-handler.ts
Normal file
@@ -0,0 +1,252 @@
|
||||
/**
|
||||
* Service Handler
|
||||
*
|
||||
* CLI commands for systemd service management.
|
||||
*/
|
||||
|
||||
import process from 'node:process';
|
||||
import { execSync } from 'node:child_process';
|
||||
import { logger } from '../logger.ts';
|
||||
import { theme } from '../colors.ts';
|
||||
import { PATHS } from '../constants.ts';
|
||||
import type { ModelGrid } from '../modelgrid.ts';
|
||||
|
||||
/**
|
||||
* Handler for service-related CLI commands
|
||||
*/
|
||||
export class ServiceHandler {
|
||||
private readonly modelgrid: ModelGrid;
|
||||
|
||||
constructor(modelgrid: ModelGrid) {
|
||||
this.modelgrid = modelgrid;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable the service (requires root)
|
||||
*/
|
||||
public async enable(): Promise<void> {
|
||||
this.checkRootAccess('This command must be run as root.');
|
||||
await this.modelgrid.getSystemd().install();
|
||||
logger.log('ModelGrid service has been installed. Use "modelgrid service start" to start the service.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the daemon directly
|
||||
*/
|
||||
public async daemonStart(debugMode: boolean = false): Promise<void> {
|
||||
logger.log('Starting ModelGrid daemon...');
|
||||
try {
|
||||
if (debugMode) {
|
||||
logger.log('Debug mode enabled');
|
||||
}
|
||||
await this.modelgrid.getDaemon().start();
|
||||
} catch (error) {
|
||||
logger.error(`Daemon start failed: ${error instanceof Error ? error.message : String(error)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Show logs of the systemd service
|
||||
*/
|
||||
public async logs(): Promise<void> {
|
||||
try {
|
||||
const { spawn } = await import('child_process');
|
||||
logger.log('Tailing modelgrid service logs (Ctrl+C to exit)...\n');
|
||||
|
||||
const journalctl = spawn('journalctl', ['-u', 'modelgrid.service', '-n', '50', '-f'], {
|
||||
stdio: ['ignore', 'inherit', 'inherit'],
|
||||
});
|
||||
|
||||
process.on('SIGINT', () => {
|
||||
journalctl.kill('SIGINT');
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
journalctl.on('exit', () => resolve());
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error(`Failed to retrieve logs: ${error}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the systemd service
|
||||
*/
|
||||
public async stop(): Promise<void> {
|
||||
await this.modelgrid.getSystemd().stop();
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the systemd service
|
||||
*/
|
||||
public async start(): Promise<void> {
|
||||
try {
|
||||
await this.modelgrid.getSystemd().start();
|
||||
} catch (error) {
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Show status of the systemd service
|
||||
*/
|
||||
public async status(): Promise<void> {
|
||||
await this.modelgrid.getSystemd().getStatus();
|
||||
}
|
||||
|
||||
/**
|
||||
* Disable the service (requires root)
|
||||
*/
|
||||
public async disable(): Promise<void> {
|
||||
this.checkRootAccess('This command must be run as root.');
|
||||
await this.modelgrid.getSystemd().disable();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the user has root access
|
||||
*/
|
||||
private checkRootAccess(errorMessage: string): void {
|
||||
if (process.getuid && process.getuid() !== 0) {
|
||||
logger.error(errorMessage);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update ModelGrid from repository
|
||||
*/
|
||||
public async update(): Promise<void> {
|
||||
try {
|
||||
this.checkRootAccess('This command must be run as root to update ModelGrid.');
|
||||
|
||||
console.log('');
|
||||
logger.info('Checking for updates...');
|
||||
|
||||
try {
|
||||
const currentVersion = this.modelgrid.getVersion();
|
||||
const apiUrl = 'https://code.foss.global/api/v1/repos/modelgrid.com/modelgrid/releases/latest';
|
||||
const response = execSync(`curl -sSL ${apiUrl}`).toString();
|
||||
const release = JSON.parse(response);
|
||||
const latestVersion = release.tag_name;
|
||||
|
||||
const normalizedCurrent = currentVersion.startsWith('v') ? currentVersion : `v${currentVersion}`;
|
||||
const normalizedLatest = latestVersion.startsWith('v') ? latestVersion : `v${latestVersion}`;
|
||||
|
||||
logger.dim(`Current version: ${normalizedCurrent}`);
|
||||
logger.dim(`Latest version: ${normalizedLatest}`);
|
||||
console.log('');
|
||||
|
||||
if (normalizedCurrent === normalizedLatest) {
|
||||
logger.success('Already up to date!');
|
||||
console.log('');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`New version available: ${latestVersion}`);
|
||||
logger.dim('Downloading and installing...');
|
||||
console.log('');
|
||||
|
||||
const installUrl = 'https://code.foss.global/modelgrid.com/modelgrid/raw/branch/main/install.sh';
|
||||
|
||||
execSync(`curl -sSL ${installUrl} | bash`, {
|
||||
stdio: 'inherit',
|
||||
});
|
||||
|
||||
console.log('');
|
||||
logger.success(`Updated to ${latestVersion}`);
|
||||
console.log('');
|
||||
} catch (error) {
|
||||
console.log('');
|
||||
logger.error('Update failed');
|
||||
logger.dim(`${error instanceof Error ? error.message : String(error)}`);
|
||||
console.log('');
|
||||
process.exit(1);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Update failed: ${error instanceof Error ? error.message : String(error)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Completely uninstall ModelGrid from the system
|
||||
*/
|
||||
public async uninstall(): Promise<void> {
|
||||
this.checkRootAccess('This command must be run as root.');
|
||||
|
||||
try {
|
||||
const helpers = await import('../helpers/index.ts');
|
||||
const { prompt, close } = await helpers.createPrompt();
|
||||
|
||||
logger.log('');
|
||||
logger.highlight('ModelGrid Uninstaller');
|
||||
logger.dim('=====================');
|
||||
logger.log('This will completely remove ModelGrid from your system.');
|
||||
logger.log('');
|
||||
|
||||
const removeConfig = await prompt('Do you want to remove configuration files? (y/N): ');
|
||||
const removeContainers = await prompt('Do you want to remove Docker containers? (y/N): ');
|
||||
|
||||
close();
|
||||
|
||||
// Stop service first
|
||||
try {
|
||||
await this.modelgrid.getSystemd().stop();
|
||||
} catch {
|
||||
// Service might not be running
|
||||
}
|
||||
|
||||
// Disable service
|
||||
try {
|
||||
await this.modelgrid.getSystemd().disable();
|
||||
} catch {
|
||||
// Service might not be installed
|
||||
}
|
||||
|
||||
// Remove containers if requested
|
||||
if (removeContainers.toLowerCase() === 'y') {
|
||||
logger.info('Removing Docker containers...');
|
||||
try {
|
||||
execSync('docker rm -f $(docker ps -aq --filter "name=modelgrid")', { stdio: 'pipe' });
|
||||
} catch {
|
||||
// No containers to remove
|
||||
}
|
||||
}
|
||||
|
||||
// Remove configuration if requested
|
||||
if (removeConfig.toLowerCase() === 'y') {
|
||||
logger.info('Removing configuration...');
|
||||
try {
|
||||
const { rm } = await import('node:fs/promises');
|
||||
await rm(PATHS.CONFIG_DIR, { recursive: true, force: true });
|
||||
} catch {
|
||||
// Config might not exist
|
||||
}
|
||||
}
|
||||
|
||||
// Run uninstall script
|
||||
const { dirname, join } = await import('path');
|
||||
const binPath = process.argv[1];
|
||||
const modulePath = dirname(dirname(binPath));
|
||||
const uninstallScriptPath = join(modulePath, 'uninstall.sh');
|
||||
|
||||
logger.log('');
|
||||
logger.log(`Running uninstaller from ${uninstallScriptPath}...`);
|
||||
|
||||
execSync(`sudo bash ${uninstallScriptPath}`, {
|
||||
env: {
|
||||
...process.env,
|
||||
REMOVE_CONFIG: removeConfig.toLowerCase() === 'y' ? 'yes' : 'no',
|
||||
MODELGRID_CLI_CALL: 'true',
|
||||
},
|
||||
stdio: 'inherit',
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error(`Uninstall failed: ${error instanceof Error ? error.message : String(error)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
157
ts/colors.ts
Normal file
157
ts/colors.ts
Normal file
@@ -0,0 +1,157 @@
|
||||
/**
|
||||
* Color theme and styling utilities for ModelGrid CLI
|
||||
* Uses Deno standard library colors module
|
||||
*/
|
||||
import * as colors from '@std/fmt/colors';
|
||||
|
||||
/**
|
||||
* Color theme for consistent CLI styling
|
||||
*/
|
||||
export const theme = {
|
||||
// Message types
|
||||
error: colors.red,
|
||||
warning: colors.yellow,
|
||||
success: colors.green,
|
||||
info: colors.cyan,
|
||||
dim: colors.dim,
|
||||
highlight: colors.bold,
|
||||
|
||||
// Status indicators
|
||||
statusActive: (text: string) => colors.green(colors.bold(text)),
|
||||
statusInactive: (text: string) => colors.red(text),
|
||||
statusWarning: (text: string) => colors.yellow(text),
|
||||
statusUnknown: (text: string) => colors.dim(text),
|
||||
|
||||
// GPU status colors
|
||||
gpuHealthy: colors.green, // GPU healthy
|
||||
gpuWarning: colors.yellow, // GPU warning
|
||||
gpuError: colors.red, // GPU error
|
||||
|
||||
// VRAM usage colors
|
||||
vramGood: colors.green, // < 60% usage
|
||||
vramMedium: colors.yellow, // 60-85% usage
|
||||
vramCritical: colors.red, // > 85% usage
|
||||
|
||||
// Container status colors
|
||||
containerRunning: colors.green,
|
||||
containerStopped: colors.red,
|
||||
containerStarting: colors.yellow,
|
||||
|
||||
// Box borders
|
||||
borderSuccess: colors.green,
|
||||
borderError: colors.red,
|
||||
borderWarning: colors.yellow,
|
||||
borderInfo: colors.cyan,
|
||||
borderDefault: (text: string) => text, // No color
|
||||
|
||||
// Command/code highlighting
|
||||
command: colors.cyan,
|
||||
code: colors.dim,
|
||||
path: colors.blue,
|
||||
model: colors.magenta,
|
||||
};
|
||||
|
||||
/**
|
||||
* Status symbols with colors
|
||||
*/
|
||||
export const symbols = {
|
||||
success: colors.green('✓'),
|
||||
error: colors.red('✗'),
|
||||
warning: colors.yellow('⚠'),
|
||||
info: colors.cyan('ℹ'),
|
||||
running: colors.green('●'),
|
||||
stopped: colors.red('○'),
|
||||
starting: colors.yellow('◐'),
|
||||
unknown: colors.dim('◯'),
|
||||
gpu: colors.cyan('◆'),
|
||||
container: colors.blue('▣'),
|
||||
model: colors.magenta('◈'),
|
||||
};
|
||||
|
||||
/**
|
||||
* Get color for VRAM usage percentage
|
||||
*/
|
||||
export function getVramColor(percentage: number): (text: string) => string {
|
||||
if (percentage < 60) return theme.vramGood;
|
||||
if (percentage < 85) return theme.vramMedium;
|
||||
return theme.vramCritical;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get color for GPU utilization
|
||||
*/
|
||||
export function getGpuUtilColor(percentage: number): (text: string) => string {
|
||||
if (percentage < 60) return theme.gpuHealthy;
|
||||
if (percentage < 85) return theme.gpuWarning;
|
||||
return theme.gpuError;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format GPU vendor with color
|
||||
*/
|
||||
export function formatGpuVendor(vendor: 'nvidia' | 'amd' | 'intel' | 'unknown'): string {
|
||||
switch (vendor) {
|
||||
case 'nvidia':
|
||||
return colors.green('NVIDIA');
|
||||
case 'amd':
|
||||
return colors.red('AMD');
|
||||
case 'intel':
|
||||
return colors.blue('Intel');
|
||||
case 'unknown':
|
||||
default:
|
||||
return colors.dim('Unknown');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Format container status with color
|
||||
*/
|
||||
export function formatContainerStatus(
|
||||
status: 'running' | 'stopped' | 'starting' | 'error' | 'unknown',
|
||||
): string {
|
||||
switch (status) {
|
||||
case 'running':
|
||||
return theme.containerRunning('Running');
|
||||
case 'stopped':
|
||||
return theme.containerStopped('Stopped');
|
||||
case 'starting':
|
||||
return theme.containerStarting('Starting');
|
||||
case 'error':
|
||||
return theme.error('Error');
|
||||
case 'unknown':
|
||||
default:
|
||||
return theme.dim('Unknown');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Format container type with color
|
||||
*/
|
||||
export function formatContainerType(type: 'ollama' | 'vllm' | 'tgi' | 'custom'): string {
|
||||
switch (type) {
|
||||
case 'ollama':
|
||||
return colors.green('Ollama');
|
||||
case 'vllm':
|
||||
return colors.cyan('vLLM');
|
||||
case 'tgi':
|
||||
return colors.magenta('TGI');
|
||||
case 'custom':
|
||||
return colors.yellow('Custom');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Format model status with color
|
||||
*/
|
||||
export function formatModelStatus(status: 'loaded' | 'loading' | 'unloaded' | 'error'): string {
|
||||
switch (status) {
|
||||
case 'loaded':
|
||||
return theme.success('Loaded');
|
||||
case 'loading':
|
||||
return theme.warning('Loading');
|
||||
case 'unloaded':
|
||||
return theme.dim('Unloaded');
|
||||
case 'error':
|
||||
return theme.error('Error');
|
||||
}
|
||||
}
|
||||
175
ts/constants.ts
Normal file
175
ts/constants.ts
Normal file
@@ -0,0 +1,175 @@
|
||||
/**
|
||||
* ModelGrid Constants
|
||||
*
|
||||
* Central location for all timeout, interval, and configuration values.
|
||||
* This makes configuration easier and code more self-documenting.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Default timing values in milliseconds
|
||||
*/
|
||||
export const TIMING = {
|
||||
/** Default interval between container health checks (30 seconds) */
|
||||
CHECK_INTERVAL_MS: 30000,
|
||||
|
||||
/** Interval for idle monitoring mode (60 seconds) */
|
||||
IDLE_CHECK_INTERVAL_MS: 60000,
|
||||
|
||||
/** Interval for checking config file changes (60 seconds) */
|
||||
CONFIG_CHECK_INTERVAL_MS: 60000,
|
||||
|
||||
/** Interval for logging periodic status updates (5 minutes) */
|
||||
LOG_INTERVAL_MS: 5 * 60 * 1000,
|
||||
|
||||
/** Timeout for GPU driver detection (10 seconds) */
|
||||
GPU_DETECTION_TIMEOUT_MS: 10000,
|
||||
|
||||
/** Timeout for Docker commands (30 seconds) */
|
||||
DOCKER_COMMAND_TIMEOUT_MS: 30000,
|
||||
|
||||
/** Timeout for container startup (2 minutes) */
|
||||
CONTAINER_STARTUP_TIMEOUT_MS: 2 * 60 * 1000,
|
||||
|
||||
/** Timeout for model loading (10 minutes) */
|
||||
MODEL_LOAD_TIMEOUT_MS: 10 * 60 * 1000,
|
||||
|
||||
/** Greenlit model list cache duration (1 hour) */
|
||||
GREENLIST_CACHE_DURATION_MS: 60 * 60 * 1000,
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* API Server constants
|
||||
*/
|
||||
export const API_SERVER = {
|
||||
/** Default API server port */
|
||||
DEFAULT_PORT: 8080,
|
||||
|
||||
/** Default API server host */
|
||||
DEFAULT_HOST: '0.0.0.0',
|
||||
|
||||
/** Default rate limit (requests per minute) */
|
||||
DEFAULT_RATE_LIMIT: 60,
|
||||
|
||||
/** Request timeout (30 seconds) */
|
||||
REQUEST_TIMEOUT_MS: 30000,
|
||||
|
||||
/** Stream keep-alive interval (15 seconds) */
|
||||
STREAM_KEEPALIVE_MS: 15000,
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* Docker/Container constants
|
||||
*/
|
||||
export const DOCKER = {
|
||||
/** Default Docker network name */
|
||||
DEFAULT_NETWORK: 'modelgrid',
|
||||
|
||||
/** Container health check interval (10 seconds) */
|
||||
HEALTH_CHECK_INTERVAL_MS: 10000,
|
||||
|
||||
/** Container restart delay (5 seconds) */
|
||||
RESTART_DELAY_MS: 5000,
|
||||
|
||||
/** Maximum container restart attempts */
|
||||
MAX_RESTART_ATTEMPTS: 3,
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* GPU-related constants
|
||||
*/
|
||||
export const GPU = {
|
||||
/** Minimum VRAM for most models (8GB) */
|
||||
MIN_VRAM_GB: 8,
|
||||
|
||||
/** Recommended VRAM for larger models (24GB) */
|
||||
RECOMMENDED_VRAM_GB: 24,
|
||||
|
||||
/** GPU utilization threshold for load balancing (80%) */
|
||||
UTILIZATION_THRESHOLD_PERCENT: 80,
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* Container port mapping defaults
|
||||
*/
|
||||
export const CONTAINER_PORTS = {
|
||||
/** Ollama default port */
|
||||
OLLAMA: 11434,
|
||||
|
||||
/** vLLM default port */
|
||||
VLLM: 8000,
|
||||
|
||||
/** TGI (Text Generation Inference) default port */
|
||||
TGI: 8080,
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* Container image defaults
|
||||
*/
|
||||
export const CONTAINER_IMAGES = {
|
||||
/** Ollama official image */
|
||||
OLLAMA: 'ollama/ollama:latest',
|
||||
|
||||
/** vLLM official image */
|
||||
VLLM: 'vllm/vllm-openai:latest',
|
||||
|
||||
/** TGI official image */
|
||||
TGI: 'ghcr.io/huggingface/text-generation-inference:latest',
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* Model registry constants
|
||||
*/
|
||||
export const MODEL_REGISTRY = {
|
||||
/** Default greenlit models URL */
|
||||
DEFAULT_GREENLIST_URL:
|
||||
'https://code.foss.global/modelgrid.com/model_lists/raw/branch/main/greenlit.json',
|
||||
|
||||
/** Fallback greenlist if remote fetch fails */
|
||||
FALLBACK_GREENLIST: [
|
||||
{ name: 'llama3.2:1b', container: 'ollama', minVram: 4 },
|
||||
{ name: 'llama3.2:3b', container: 'ollama', minVram: 6 },
|
||||
{ name: 'llama3:8b', container: 'ollama', minVram: 8 },
|
||||
{ name: 'mistral:7b', container: 'ollama', minVram: 8 },
|
||||
{ name: 'codellama:7b', container: 'ollama', minVram: 8 },
|
||||
],
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* Configuration paths
|
||||
*/
|
||||
export const PATHS = {
|
||||
/** Default configuration directory */
|
||||
CONFIG_DIR: '/etc/modelgrid',
|
||||
|
||||
/** Default configuration file */
|
||||
CONFIG_FILE: '/etc/modelgrid/config.json',
|
||||
|
||||
/** Default data directory */
|
||||
DATA_DIR: '/var/lib/modelgrid',
|
||||
|
||||
/** Default log directory */
|
||||
LOG_DIR: '/var/log/modelgrid',
|
||||
|
||||
/** Systemd service file path */
|
||||
SYSTEMD_SERVICE: '/etc/systemd/system/modelgrid.service',
|
||||
|
||||
/** Binary installation path */
|
||||
BINARY_PATH: '/usr/local/bin/modelgrid',
|
||||
|
||||
/** Working directory */
|
||||
WORK_DIR: '/opt/modelgrid',
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* UI/Display constants
|
||||
*/
|
||||
export const UI = {
|
||||
/** Default width for log boxes */
|
||||
DEFAULT_BOX_WIDTH: 50,
|
||||
|
||||
/** Wide box width for status displays */
|
||||
WIDE_BOX_WIDTH: 65,
|
||||
|
||||
/** Extra wide box width for detailed info */
|
||||
EXTRA_WIDE_BOX_WIDTH: 80,
|
||||
} as const;
|
||||
216
ts/containers/base-container.ts
Normal file
216
ts/containers/base-container.ts
Normal file
@@ -0,0 +1,216 @@
|
||||
/**
|
||||
* Base Container
|
||||
*
|
||||
* Abstract base class for AI model containers.
|
||||
*/
|
||||
|
||||
import type {
|
||||
IContainerConfig,
|
||||
IContainerStatus,
|
||||
ILoadedModel,
|
||||
TContainerType,
|
||||
} from '../interfaces/container.ts';
|
||||
import type { IChatCompletionRequest, IChatCompletionResponse } from '../interfaces/api.ts';
|
||||
import { ContainerRuntime } from '../docker/container-runtime.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
|
||||
/**
|
||||
* Model pull progress callback
|
||||
*/
|
||||
export type TModelPullProgress = (progress: {
|
||||
model: string;
|
||||
status: string;
|
||||
percent?: number;
|
||||
}) => void;
|
||||
|
||||
/**
|
||||
* Abstract base class for AI model containers
|
||||
*/
|
||||
export abstract class BaseContainer {
|
||||
/** Container type */
|
||||
public abstract readonly type: TContainerType;
|
||||
|
||||
/** Display name */
|
||||
public abstract readonly displayName: string;
|
||||
|
||||
/** Default Docker image */
|
||||
public abstract readonly defaultImage: string;
|
||||
|
||||
/** Default internal port */
|
||||
public abstract readonly defaultPort: number;
|
||||
|
||||
/** Container configuration */
|
||||
protected config: IContainerConfig;
|
||||
|
||||
/** Container runtime */
|
||||
protected runtime: ContainerRuntime;
|
||||
|
||||
constructor(config: IContainerConfig) {
|
||||
this.config = config;
|
||||
this.runtime = new ContainerRuntime();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the container configuration
|
||||
*/
|
||||
public getConfig(): IContainerConfig {
|
||||
return this.config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the endpoint URL for this container
|
||||
*/
|
||||
public getEndpoint(): string {
|
||||
const port = this.config.externalPort || this.config.port;
|
||||
return `http://localhost:${port}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the container
|
||||
*/
|
||||
public async start(): Promise<boolean> {
|
||||
logger.info(`Starting ${this.displayName} container: ${this.config.name}`);
|
||||
return this.runtime.startContainer(this.config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the container
|
||||
*/
|
||||
public async stop(): Promise<boolean> {
|
||||
logger.info(`Stopping ${this.displayName} container: ${this.config.name}`);
|
||||
return this.runtime.stopContainer(this.config.id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Restart the container
|
||||
*/
|
||||
public async restart(): Promise<boolean> {
|
||||
logger.info(`Restarting ${this.displayName} container: ${this.config.name}`);
|
||||
return this.runtime.restartContainer(this.config.id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the container
|
||||
*/
|
||||
public async remove(): Promise<boolean> {
|
||||
logger.info(`Removing ${this.displayName} container: ${this.config.name}`);
|
||||
return this.runtime.removeContainer(this.config.id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get container status
|
||||
*/
|
||||
public async getStatus(): Promise<IContainerStatus> {
|
||||
return this.runtime.getContainerStatus(this.config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get container logs
|
||||
*/
|
||||
public async getLogs(lines: number = 100): Promise<string> {
|
||||
return this.runtime.getLogs(this.config.id, { lines });
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the container is healthy
|
||||
*/
|
||||
public abstract isHealthy(): Promise<boolean>;
|
||||
|
||||
/**
|
||||
* Get list of available models
|
||||
*/
|
||||
public abstract listModels(): Promise<string[]>;
|
||||
|
||||
/**
|
||||
* Get list of loaded models with details
|
||||
*/
|
||||
public abstract getLoadedModels(): Promise<ILoadedModel[]>;
|
||||
|
||||
/**
|
||||
* Pull a model
|
||||
*/
|
||||
public abstract pullModel(modelName: string, onProgress?: TModelPullProgress): Promise<boolean>;
|
||||
|
||||
/**
|
||||
* Remove a model
|
||||
*/
|
||||
public abstract removeModel(modelName: string): Promise<boolean>;
|
||||
|
||||
/**
|
||||
* Send a chat completion request
|
||||
*/
|
||||
public abstract chatCompletion(request: IChatCompletionRequest): Promise<IChatCompletionResponse>;
|
||||
|
||||
/**
|
||||
* Stream a chat completion request
|
||||
*/
|
||||
public abstract chatCompletionStream(
|
||||
request: IChatCompletionRequest,
|
||||
onChunk: (chunk: string) => void,
|
||||
): Promise<void>;
|
||||
|
||||
/**
|
||||
* Make HTTP request to container
|
||||
*/
|
||||
protected async fetch(
|
||||
path: string,
|
||||
options: {
|
||||
method?: string;
|
||||
headers?: Record<string, string>;
|
||||
body?: unknown;
|
||||
timeout?: number;
|
||||
} = {},
|
||||
): Promise<Response> {
|
||||
const endpoint = this.getEndpoint();
|
||||
const url = `${endpoint}${path}`;
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeout = options.timeout || 30000;
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: options.method || 'GET',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
...options.headers,
|
||||
},
|
||||
body: options.body ? JSON.stringify(options.body) : undefined,
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
return response;
|
||||
} finally {
|
||||
clearTimeout(timeoutId);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Make HTTP request and parse JSON response
|
||||
*/
|
||||
protected async fetchJson<T>(
|
||||
path: string,
|
||||
options: {
|
||||
method?: string;
|
||||
headers?: Record<string, string>;
|
||||
body?: unknown;
|
||||
timeout?: number;
|
||||
} = {},
|
||||
): Promise<T> {
|
||||
const response = await this.fetch(path, options);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`HTTP ${response.status}: ${errorText}`);
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a unique request ID
|
||||
*/
|
||||
protected generateRequestId(): string {
|
||||
return `chatcmpl-${Date.now().toString(36)}-${Math.random().toString(36).substring(2, 8)}`;
|
||||
}
|
||||
}
|
||||
349
ts/containers/container-manager.ts
Normal file
349
ts/containers/container-manager.ts
Normal file
@@ -0,0 +1,349 @@
|
||||
/**
|
||||
* Container Manager
|
||||
*
|
||||
* Orchestrates multiple AI model containers.
|
||||
*/
|
||||
|
||||
import type {
|
||||
IContainerConfig,
|
||||
IContainerStatus,
|
||||
IContainerEndpoint,
|
||||
TContainerType,
|
||||
} from '../interfaces/container.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { DockerManager } from '../docker/docker-manager.ts';
|
||||
import { BaseContainer } from './base-container.ts';
|
||||
import { OllamaContainer } from './ollama.ts';
|
||||
import { VllmContainer } from './vllm.ts';
|
||||
import { TgiContainer } from './tgi.ts';
|
||||
|
||||
/**
|
||||
* Container Manager - orchestrates all containers
|
||||
*/
|
||||
export class ContainerManager {
|
||||
private containers: Map<string, BaseContainer>;
|
||||
private dockerManager: DockerManager;
|
||||
|
||||
constructor() {
|
||||
this.containers = new Map();
|
||||
this.dockerManager = new DockerManager();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize container manager
|
||||
*/
|
||||
public async initialize(): Promise<void> {
|
||||
// Ensure Docker is running
|
||||
if (!await this.dockerManager.isRunning()) {
|
||||
throw new Error('Docker is not running');
|
||||
}
|
||||
|
||||
// Create network if it doesn't exist
|
||||
await this.dockerManager.createNetwork();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a container instance from config
|
||||
*/
|
||||
private createContainerInstance(config: IContainerConfig): BaseContainer {
|
||||
switch (config.type) {
|
||||
case 'ollama':
|
||||
return new OllamaContainer(config);
|
||||
case 'vllm':
|
||||
return new VllmContainer(config);
|
||||
case 'tgi':
|
||||
return new TgiContainer(config);
|
||||
default:
|
||||
throw new Error(`Unknown container type: ${config.type}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a container
|
||||
*/
|
||||
public addContainer(config: IContainerConfig): BaseContainer {
|
||||
if (this.containers.has(config.id)) {
|
||||
throw new Error(`Container with ID ${config.id} already exists`);
|
||||
}
|
||||
|
||||
const container = this.createContainerInstance(config);
|
||||
this.containers.set(config.id, container);
|
||||
return container;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a container
|
||||
*/
|
||||
public async removeContainer(containerId: string): Promise<boolean> {
|
||||
const container = this.containers.get(containerId);
|
||||
if (!container) {
|
||||
return false;
|
||||
}
|
||||
|
||||
await container.remove();
|
||||
this.containers.delete(containerId);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a container by ID
|
||||
*/
|
||||
public getContainer(containerId: string): BaseContainer | undefined {
|
||||
return this.containers.get(containerId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all containers
|
||||
*/
|
||||
public getAllContainers(): BaseContainer[] {
|
||||
return Array.from(this.containers.values());
|
||||
}
|
||||
|
||||
/**
|
||||
* Load containers from configuration
|
||||
*/
|
||||
public loadFromConfig(configs: IContainerConfig[]): void {
|
||||
this.containers.clear();
|
||||
for (const config of configs) {
|
||||
try {
|
||||
this.addContainer(config);
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to load container ${config.id}: ${error instanceof Error ? error.message : String(error)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start all containers
|
||||
*/
|
||||
public async startAll(): Promise<Map<string, boolean>> {
|
||||
const results = new Map<string, boolean>();
|
||||
|
||||
for (const [id, container] of this.containers) {
|
||||
if (!container.getConfig().autoStart) {
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const success = await container.start();
|
||||
results.set(id, success);
|
||||
} catch (error) {
|
||||
logger.error(`Failed to start container ${id}: ${error instanceof Error ? error.message : String(error)}`);
|
||||
results.set(id, false);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop all containers
|
||||
*/
|
||||
public async stopAll(): Promise<Map<string, boolean>> {
|
||||
const results = new Map<string, boolean>();
|
||||
|
||||
for (const [id, container] of this.containers) {
|
||||
try {
|
||||
const success = await container.stop();
|
||||
results.set(id, success);
|
||||
} catch (error) {
|
||||
logger.error(`Failed to stop container ${id}: ${error instanceof Error ? error.message : String(error)}`);
|
||||
results.set(id, false);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get status of all containers
|
||||
*/
|
||||
public async getAllStatus(): Promise<Map<string, IContainerStatus>> {
|
||||
const statuses = new Map<string, IContainerStatus>();
|
||||
|
||||
for (const [id, container] of this.containers) {
|
||||
try {
|
||||
const status = await container.getStatus();
|
||||
statuses.set(id, status);
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to get status for container ${id}: ${error instanceof Error ? error.message : String(error)}`);
|
||||
}
|
||||
}
|
||||
|
||||
return statuses;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available endpoints for a model
|
||||
*/
|
||||
public async getEndpointsForModel(modelName: string): Promise<IContainerEndpoint[]> {
|
||||
const endpoints: IContainerEndpoint[] = [];
|
||||
|
||||
for (const [_id, container] of this.containers) {
|
||||
try {
|
||||
const status = await container.getStatus();
|
||||
|
||||
if (!status.running) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if container has this model
|
||||
const models = await container.listModels();
|
||||
if (!models.includes(modelName)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
endpoints.push({
|
||||
containerId: container.getConfig().id,
|
||||
type: container.type,
|
||||
url: container.getEndpoint(),
|
||||
models,
|
||||
healthy: status.health === 'healthy',
|
||||
priority: 0, // Could be based on load
|
||||
});
|
||||
} catch {
|
||||
// Skip containers that fail to respond
|
||||
}
|
||||
}
|
||||
|
||||
return endpoints;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find best container for a model
|
||||
*/
|
||||
public async findContainerForModel(modelName: string): Promise<BaseContainer | null> {
|
||||
const endpoints = await this.getEndpointsForModel(modelName);
|
||||
|
||||
// Filter to healthy endpoints
|
||||
const healthy = endpoints.filter((e) => e.healthy);
|
||||
if (healthy.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Return first healthy endpoint (could add load balancing)
|
||||
const endpoint = healthy[0];
|
||||
return this.containers.get(endpoint.containerId) || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all available models across all containers
|
||||
*/
|
||||
public async getAllAvailableModels(): Promise<Map<string, IContainerEndpoint[]>> {
|
||||
const modelMap = new Map<string, IContainerEndpoint[]>();
|
||||
|
||||
for (const container of this.containers.values()) {
|
||||
try {
|
||||
const status = await container.getStatus();
|
||||
if (!status.running) continue;
|
||||
|
||||
const models = await container.listModels();
|
||||
|
||||
for (const model of models) {
|
||||
if (!modelMap.has(model)) {
|
||||
modelMap.set(model, []);
|
||||
}
|
||||
|
||||
modelMap.get(model)!.push({
|
||||
containerId: container.getConfig().id,
|
||||
type: container.type,
|
||||
url: container.getEndpoint(),
|
||||
models,
|
||||
healthy: status.health === 'healthy',
|
||||
priority: 0,
|
||||
});
|
||||
}
|
||||
} catch {
|
||||
// Skip failed containers
|
||||
}
|
||||
}
|
||||
|
||||
return modelMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull a model to a specific container type
|
||||
*/
|
||||
public async pullModel(
|
||||
modelName: string,
|
||||
containerType: TContainerType = 'ollama',
|
||||
containerId?: string,
|
||||
): Promise<boolean> {
|
||||
// Find or create appropriate container
|
||||
let container: BaseContainer | undefined;
|
||||
|
||||
if (containerId) {
|
||||
container = this.containers.get(containerId);
|
||||
} else {
|
||||
// Find first container of the specified type
|
||||
for (const c of this.containers.values()) {
|
||||
if (c.type === containerType) {
|
||||
container = c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!container) {
|
||||
logger.error(`No ${containerType} container available to pull model`);
|
||||
return false;
|
||||
}
|
||||
|
||||
return container.pullModel(modelName, (progress) => {
|
||||
const percent = progress.percent !== undefined ? ` (${progress.percent}%)` : '';
|
||||
logger.dim(` ${progress.status}${percent}`);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Health check all containers
|
||||
*/
|
||||
public async healthCheck(): Promise<Map<string, boolean>> {
|
||||
const results = new Map<string, boolean>();
|
||||
|
||||
for (const [id, container] of this.containers) {
|
||||
try {
|
||||
const healthy = await container.isHealthy();
|
||||
results.set(id, healthy);
|
||||
} catch {
|
||||
results.set(id, false);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print container status summary
|
||||
*/
|
||||
public async printStatus(): Promise<void> {
|
||||
const statuses = await this.getAllStatus();
|
||||
|
||||
if (statuses.size === 0) {
|
||||
logger.logBox('Containers', ['No containers configured'], 50, 'warning');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.logBoxTitle('Container Status', 70, 'info');
|
||||
|
||||
for (const [id, status] of statuses) {
|
||||
const runningStr = status.running ? 'Running' : 'Stopped';
|
||||
const healthStr = status.health;
|
||||
const modelsStr = status.loadedModels.length > 0
|
||||
? status.loadedModels.join(', ')
|
||||
: 'None';
|
||||
|
||||
logger.logBoxLine(`${status.name} (${id})`);
|
||||
logger.logBoxLine(` Type: ${status.type} | Status: ${runningStr} | Health: ${healthStr}`);
|
||||
logger.logBoxLine(` Models: ${modelsStr}`);
|
||||
logger.logBoxLine(` Endpoint: ${status.endpoint}`);
|
||||
|
||||
if (status.gpuUtilization !== undefined) {
|
||||
logger.logBoxLine(` GPU: ${status.gpuUtilization}% | Memory: ${status.memoryUsage || 0}MB`);
|
||||
}
|
||||
logger.logBoxLine('');
|
||||
}
|
||||
|
||||
logger.logBoxEnd();
|
||||
}
|
||||
}
|
||||
11
ts/containers/index.ts
Normal file
11
ts/containers/index.ts
Normal file
@@ -0,0 +1,11 @@
|
||||
/**
|
||||
* Container Management Module
|
||||
*
|
||||
* Exports all AI container implementations.
|
||||
*/
|
||||
|
||||
export { BaseContainer } from './base-container.ts';
|
||||
export { OllamaContainer } from './ollama.ts';
|
||||
export { VllmContainer } from './vllm.ts';
|
||||
export { TgiContainer } from './tgi.ts';
|
||||
export { ContainerManager } from './container-manager.ts';
|
||||
387
ts/containers/ollama.ts
Normal file
387
ts/containers/ollama.ts
Normal file
@@ -0,0 +1,387 @@
|
||||
/**
|
||||
* Ollama Container
|
||||
*
|
||||
* Manages Ollama containers for running local LLMs.
|
||||
*/
|
||||
|
||||
import type {
|
||||
IContainerConfig,
|
||||
ILoadedModel,
|
||||
TContainerType,
|
||||
} from '../interfaces/container.ts';
|
||||
import type {
|
||||
IChatCompletionRequest,
|
||||
IChatCompletionResponse,
|
||||
IChatCompletionChoice,
|
||||
IChatMessage,
|
||||
} from '../interfaces/api.ts';
|
||||
import { CONTAINER_IMAGES, CONTAINER_PORTS } from '../constants.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { BaseContainer, type TModelPullProgress } from './base-container.ts';
|
||||
|
||||
/**
|
||||
* Ollama API response types
|
||||
*/
|
||||
interface IOllamaTagsResponse {
|
||||
models: Array<{
|
||||
name: string;
|
||||
size: number;
|
||||
digest: string;
|
||||
modified_at: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
interface IOllamaChatRequest {
|
||||
model: string;
|
||||
messages: Array<{
|
||||
role: string;
|
||||
content: string;
|
||||
}>;
|
||||
stream?: boolean;
|
||||
options?: {
|
||||
temperature?: number;
|
||||
top_p?: number;
|
||||
num_predict?: number;
|
||||
stop?: string[];
|
||||
};
|
||||
}
|
||||
|
||||
interface IOllamaChatResponse {
|
||||
model: string;
|
||||
created_at: string;
|
||||
message: {
|
||||
role: string;
|
||||
content: string;
|
||||
};
|
||||
done: boolean;
|
||||
total_duration?: number;
|
||||
load_duration?: number;
|
||||
prompt_eval_count?: number;
|
||||
eval_count?: number;
|
||||
}
|
||||
|
||||
interface IOllamaPullResponse {
|
||||
status: string;
|
||||
digest?: string;
|
||||
total?: number;
|
||||
completed?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ollama container implementation
|
||||
*/
|
||||
export class OllamaContainer extends BaseContainer {
|
||||
public readonly type: TContainerType = 'ollama';
|
||||
public readonly displayName = 'Ollama';
|
||||
public readonly defaultImage = CONTAINER_IMAGES.OLLAMA;
|
||||
public readonly defaultPort = CONTAINER_PORTS.OLLAMA;
|
||||
|
||||
constructor(config: IContainerConfig) {
|
||||
super(config);
|
||||
|
||||
// Set defaults if not provided
|
||||
if (!config.image) {
|
||||
config.image = this.defaultImage;
|
||||
}
|
||||
if (!config.port) {
|
||||
config.port = this.defaultPort;
|
||||
}
|
||||
|
||||
// Add default volume for model storage
|
||||
if (!config.volumes || config.volumes.length === 0) {
|
||||
config.volumes = [`modelgrid-ollama-${config.id}:/root/.ollama`];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create Ollama container configuration
|
||||
*/
|
||||
public static createConfig(
|
||||
id: string,
|
||||
name: string,
|
||||
gpuIds: string[],
|
||||
options: Partial<IContainerConfig> = {},
|
||||
): IContainerConfig {
|
||||
return {
|
||||
id,
|
||||
name,
|
||||
type: 'ollama',
|
||||
image: options.image || CONTAINER_IMAGES.OLLAMA,
|
||||
gpuIds,
|
||||
port: options.port || CONTAINER_PORTS.OLLAMA,
|
||||
externalPort: options.externalPort,
|
||||
models: options.models || [],
|
||||
env: options.env,
|
||||
volumes: options.volumes || [`modelgrid-ollama-${id}:/root/.ollama`],
|
||||
autoStart: options.autoStart ?? true,
|
||||
restartPolicy: options.restartPolicy || 'unless-stopped',
|
||||
memoryLimit: options.memoryLimit,
|
||||
cpuLimit: options.cpuLimit,
|
||||
command: options.command,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if Ollama is healthy
|
||||
*/
|
||||
public async isHealthy(): Promise<boolean> {
|
||||
try {
|
||||
const response = await this.fetch('/api/tags', { timeout: 5000 });
|
||||
return response.ok;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List available models
|
||||
*/
|
||||
public async listModels(): Promise<string[]> {
|
||||
try {
|
||||
const data = await this.fetchJson<IOllamaTagsResponse>('/api/tags');
|
||||
return (data.models || []).map((m) => m.name);
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to list Ollama models: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get loaded models with details
|
||||
*/
|
||||
public async getLoadedModels(): Promise<ILoadedModel[]> {
|
||||
try {
|
||||
const data = await this.fetchJson<IOllamaTagsResponse>('/api/tags');
|
||||
return (data.models || []).map((m) => ({
|
||||
name: m.name,
|
||||
size: m.size,
|
||||
format: m.digest.substring(0, 12),
|
||||
loaded: true, // Ollama doesn't distinguish loaded vs available
|
||||
requestCount: 0,
|
||||
}));
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull a model
|
||||
*/
|
||||
public async pullModel(modelName: string, onProgress?: TModelPullProgress): Promise<boolean> {
|
||||
try {
|
||||
logger.info(`Pulling model: ${modelName}`);
|
||||
|
||||
const response = await this.fetch('/api/pull', {
|
||||
method: 'POST',
|
||||
body: { name: modelName },
|
||||
timeout: 3600000, // 1 hour for large models
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
// Read streaming response
|
||||
const reader = response.body?.getReader();
|
||||
if (!reader) {
|
||||
throw new Error('No response body');
|
||||
}
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
let lastStatus = '';
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const text = decoder.decode(value);
|
||||
const lines = text.split('\n').filter((l) => l.trim());
|
||||
|
||||
for (const line of lines) {
|
||||
try {
|
||||
const data = JSON.parse(line) as IOllamaPullResponse;
|
||||
const status = data.status;
|
||||
|
||||
if (status !== lastStatus) {
|
||||
lastStatus = status;
|
||||
let percent: number | undefined;
|
||||
|
||||
if (data.total && data.completed) {
|
||||
percent = Math.round((data.completed / data.total) * 100);
|
||||
}
|
||||
|
||||
if (onProgress) {
|
||||
onProgress({ model: modelName, status, percent });
|
||||
} else {
|
||||
const progressStr = percent !== undefined ? ` (${percent}%)` : '';
|
||||
logger.dim(` ${status}${progressStr}`);
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Invalid JSON line, skip
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger.success(`Model ${modelName} pulled successfully`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to pull model ${modelName}: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a model
|
||||
*/
|
||||
public async removeModel(modelName: string): Promise<boolean> {
|
||||
try {
|
||||
const response = await this.fetch('/api/delete', {
|
||||
method: 'DELETE',
|
||||
body: { name: modelName },
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
logger.success(`Model ${modelName} removed`);
|
||||
return true;
|
||||
}
|
||||
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
} catch (error) {
|
||||
logger.error(`Failed to remove model ${modelName}: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a chat completion request
|
||||
*/
|
||||
public async chatCompletion(request: IChatCompletionRequest): Promise<IChatCompletionResponse> {
|
||||
const ollamaRequest: IOllamaChatRequest = {
|
||||
model: request.model,
|
||||
messages: request.messages.map((m) => ({
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
})),
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: request.temperature,
|
||||
top_p: request.top_p,
|
||||
num_predict: request.max_tokens,
|
||||
stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined,
|
||||
},
|
||||
};
|
||||
|
||||
const response = await this.fetchJson<IOllamaChatResponse>('/api/chat', {
|
||||
method: 'POST',
|
||||
body: ollamaRequest,
|
||||
timeout: 300000, // 5 minutes
|
||||
});
|
||||
|
||||
// Convert to OpenAI format
|
||||
const created = Math.floor(Date.now() / 1000);
|
||||
|
||||
const choice: IChatCompletionChoice = {
|
||||
index: 0,
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: response.message.content,
|
||||
},
|
||||
finish_reason: response.done ? 'stop' : null,
|
||||
};
|
||||
|
||||
return {
|
||||
id: this.generateRequestId(),
|
||||
object: 'chat.completion',
|
||||
created,
|
||||
model: request.model,
|
||||
choices: [choice],
|
||||
usage: {
|
||||
prompt_tokens: response.prompt_eval_count || 0,
|
||||
completion_tokens: response.eval_count || 0,
|
||||
total_tokens: (response.prompt_eval_count || 0) + (response.eval_count || 0),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream a chat completion request
|
||||
*/
|
||||
public async chatCompletionStream(
|
||||
request: IChatCompletionRequest,
|
||||
onChunk: (chunk: string) => void,
|
||||
): Promise<void> {
|
||||
const ollamaRequest: IOllamaChatRequest = {
|
||||
model: request.model,
|
||||
messages: request.messages.map((m) => ({
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
})),
|
||||
stream: true,
|
||||
options: {
|
||||
temperature: request.temperature,
|
||||
top_p: request.top_p,
|
||||
num_predict: request.max_tokens,
|
||||
stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined,
|
||||
},
|
||||
};
|
||||
|
||||
const response = await this.fetch('/api/chat', {
|
||||
method: 'POST',
|
||||
body: ollamaRequest,
|
||||
timeout: 300000,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const reader = response.body?.getReader();
|
||||
if (!reader) {
|
||||
throw new Error('No response body');
|
||||
}
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
const requestId = this.generateRequestId();
|
||||
const created = Math.floor(Date.now() / 1000);
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const text = decoder.decode(value);
|
||||
const lines = text.split('\n').filter((l) => l.trim());
|
||||
|
||||
for (const line of lines) {
|
||||
try {
|
||||
const data = JSON.parse(line) as IOllamaChatResponse;
|
||||
|
||||
// Convert to OpenAI streaming format
|
||||
const chunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created,
|
||||
model: request.model,
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
content: data.message.content,
|
||||
} as Partial<IChatMessage>,
|
||||
finish_reason: data.done ? 'stop' : null,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
onChunk(`data: ${JSON.stringify(chunk)}\n\n`);
|
||||
|
||||
if (data.done) {
|
||||
onChunk('data: [DONE]\n\n');
|
||||
}
|
||||
} catch {
|
||||
// Invalid JSON, skip
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
417
ts/containers/tgi.ts
Normal file
417
ts/containers/tgi.ts
Normal file
@@ -0,0 +1,417 @@
|
||||
/**
|
||||
* TGI Container (Text Generation Inference)
|
||||
*
|
||||
* Manages HuggingFace Text Generation Inference containers.
|
||||
*/
|
||||
|
||||
import type {
|
||||
IContainerConfig,
|
||||
ILoadedModel,
|
||||
TContainerType,
|
||||
} from '../interfaces/container.ts';
|
||||
import type {
|
||||
IChatCompletionRequest,
|
||||
IChatCompletionResponse,
|
||||
IChatCompletionChoice,
|
||||
IChatMessage,
|
||||
} from '../interfaces/api.ts';
|
||||
import { CONTAINER_IMAGES, CONTAINER_PORTS } from '../constants.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { BaseContainer, type TModelPullProgress } from './base-container.ts';
|
||||
|
||||
/**
|
||||
* TGI info response
|
||||
*/
|
||||
interface ITgiInfoResponse {
|
||||
model_id: string;
|
||||
model_sha: string;
|
||||
model_dtype: string;
|
||||
model_device_type: string;
|
||||
max_concurrent_requests: number;
|
||||
max_best_of: number;
|
||||
max_stop_sequences: number;
|
||||
max_input_length: number;
|
||||
max_total_tokens: number;
|
||||
version: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* TGI generate request
|
||||
*/
|
||||
interface ITgiGenerateRequest {
|
||||
inputs: string;
|
||||
parameters?: {
|
||||
temperature?: number;
|
||||
top_p?: number;
|
||||
max_new_tokens?: number;
|
||||
stop?: string[];
|
||||
do_sample?: boolean;
|
||||
return_full_text?: boolean;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* TGI generate response
|
||||
*/
|
||||
interface ITgiGenerateResponse {
|
||||
generated_text: string;
|
||||
details?: {
|
||||
finish_reason: string;
|
||||
generated_tokens: number;
|
||||
seed?: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* TGI container implementation
|
||||
*
|
||||
* TGI is optimized for:
|
||||
* - Production deployments
|
||||
* - Flash Attention support
|
||||
* - Quantization (bitsandbytes, GPTQ, AWQ)
|
||||
* - Multiple GPU support with tensor parallelism
|
||||
*/
|
||||
export class TgiContainer extends BaseContainer {
|
||||
public readonly type: TContainerType = 'tgi';
|
||||
public readonly displayName = 'TGI';
|
||||
public readonly defaultImage = CONTAINER_IMAGES.TGI;
|
||||
public readonly defaultPort = CONTAINER_PORTS.TGI;
|
||||
|
||||
constructor(config: IContainerConfig) {
|
||||
super(config);
|
||||
|
||||
// Set defaults if not provided
|
||||
if (!config.image) {
|
||||
config.image = this.defaultImage;
|
||||
}
|
||||
if (!config.port) {
|
||||
config.port = this.defaultPort;
|
||||
}
|
||||
|
||||
// Add default volume for model cache
|
||||
if (!config.volumes || config.volumes.length === 0) {
|
||||
config.volumes = [`modelgrid-tgi-${config.id}:/data`];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create TGI container configuration
|
||||
*/
|
||||
public static createConfig(
|
||||
id: string,
|
||||
name: string,
|
||||
modelName: string,
|
||||
gpuIds: string[],
|
||||
options: Partial<IContainerConfig> = {},
|
||||
): IContainerConfig {
|
||||
const env: Record<string, string> = {
|
||||
MODEL_ID: modelName,
|
||||
PORT: String(options.port || CONTAINER_PORTS.TGI),
|
||||
HUGGING_FACE_HUB_TOKEN: options.env?.HF_TOKEN || options.env?.HUGGING_FACE_HUB_TOKEN || '',
|
||||
...options.env,
|
||||
};
|
||||
|
||||
// Add GPU configuration
|
||||
if (gpuIds.length > 1) {
|
||||
env.NUM_SHARD = String(gpuIds.length);
|
||||
}
|
||||
|
||||
// Add quantization if specified
|
||||
if (options.env?.QUANTIZE) {
|
||||
env.QUANTIZE = options.env.QUANTIZE;
|
||||
}
|
||||
|
||||
return {
|
||||
id,
|
||||
name,
|
||||
type: 'tgi',
|
||||
image: options.image || CONTAINER_IMAGES.TGI,
|
||||
gpuIds,
|
||||
port: options.port || CONTAINER_PORTS.TGI,
|
||||
externalPort: options.externalPort,
|
||||
models: [modelName],
|
||||
env,
|
||||
volumes: options.volumes || [`modelgrid-tgi-${id}:/data`],
|
||||
autoStart: options.autoStart ?? true,
|
||||
restartPolicy: options.restartPolicy || 'unless-stopped',
|
||||
memoryLimit: options.memoryLimit,
|
||||
cpuLimit: options.cpuLimit,
|
||||
command: options.command,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if TGI is healthy
|
||||
*/
|
||||
public async isHealthy(): Promise<boolean> {
|
||||
try {
|
||||
const response = await this.fetch('/health', { timeout: 5000 });
|
||||
return response.ok;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List available models
|
||||
* TGI serves a single model per instance
|
||||
*/
|
||||
public async listModels(): Promise<string[]> {
|
||||
try {
|
||||
const info = await this.fetchJson<ITgiInfoResponse>('/info');
|
||||
return [info.model_id];
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to get TGI info: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return this.config.models || [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get loaded models with details
|
||||
*/
|
||||
public async getLoadedModels(): Promise<ILoadedModel[]> {
|
||||
try {
|
||||
const info = await this.fetchJson<ITgiInfoResponse>('/info');
|
||||
return [{
|
||||
name: info.model_id,
|
||||
size: 0, // TGI doesn't expose model size
|
||||
format: info.model_dtype,
|
||||
loaded: true,
|
||||
requestCount: 0,
|
||||
}];
|
||||
} catch {
|
||||
return this.config.models.map((name) => ({
|
||||
name,
|
||||
size: 0,
|
||||
loaded: true,
|
||||
requestCount: 0,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull a model
|
||||
* TGI downloads models automatically at startup
|
||||
*/
|
||||
public async pullModel(modelName: string, onProgress?: TModelPullProgress): Promise<boolean> {
|
||||
logger.info(`TGI downloads models at startup. Model: ${modelName}`);
|
||||
logger.info('To use a different model, create a new TGI container.');
|
||||
|
||||
if (onProgress) {
|
||||
onProgress({
|
||||
model: modelName,
|
||||
status: 'TGI models are loaded at container startup',
|
||||
percent: 100,
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a model
|
||||
* TGI serves a single model per instance
|
||||
*/
|
||||
public async removeModel(modelName: string): Promise<boolean> {
|
||||
logger.info(`TGI serves a single model per instance.`);
|
||||
logger.info(`To remove model ${modelName}, stop and remove this container.`);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a chat completion request
|
||||
* Convert OpenAI format to TGI format
|
||||
*/
|
||||
public async chatCompletion(request: IChatCompletionRequest): Promise<IChatCompletionResponse> {
|
||||
// Convert messages to TGI prompt format
|
||||
const prompt = this.messagesToPrompt(request.messages);
|
||||
|
||||
const tgiRequest: ITgiGenerateRequest = {
|
||||
inputs: prompt,
|
||||
parameters: {
|
||||
temperature: request.temperature,
|
||||
top_p: request.top_p,
|
||||
max_new_tokens: request.max_tokens || 1024,
|
||||
stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined,
|
||||
do_sample: (request.temperature || 0) > 0,
|
||||
return_full_text: false,
|
||||
},
|
||||
};
|
||||
|
||||
const response = await this.fetchJson<ITgiGenerateResponse>('/generate', {
|
||||
method: 'POST',
|
||||
body: tgiRequest,
|
||||
timeout: 300000, // 5 minutes
|
||||
});
|
||||
|
||||
// Convert to OpenAI format
|
||||
const created = Math.floor(Date.now() / 1000);
|
||||
|
||||
const choice: IChatCompletionChoice = {
|
||||
index: 0,
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: response.generated_text,
|
||||
},
|
||||
finish_reason: response.details?.finish_reason === 'eos_token' ? 'stop' : 'length',
|
||||
};
|
||||
|
||||
return {
|
||||
id: this.generateRequestId(),
|
||||
object: 'chat.completion',
|
||||
created,
|
||||
model: this.config.models[0] || 'unknown',
|
||||
choices: [choice],
|
||||
usage: {
|
||||
prompt_tokens: 0, // TGI doesn't always report this
|
||||
completion_tokens: response.details?.generated_tokens || 0,
|
||||
total_tokens: response.details?.generated_tokens || 0,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream a chat completion request
|
||||
*/
|
||||
public async chatCompletionStream(
|
||||
request: IChatCompletionRequest,
|
||||
onChunk: (chunk: string) => void,
|
||||
): Promise<void> {
|
||||
// Convert messages to TGI prompt format
|
||||
const prompt = this.messagesToPrompt(request.messages);
|
||||
|
||||
const response = await this.fetch('/generate_stream', {
|
||||
method: 'POST',
|
||||
body: {
|
||||
inputs: prompt,
|
||||
parameters: {
|
||||
temperature: request.temperature,
|
||||
top_p: request.top_p,
|
||||
max_new_tokens: request.max_tokens || 1024,
|
||||
stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined,
|
||||
do_sample: (request.temperature || 0) > 0,
|
||||
},
|
||||
},
|
||||
timeout: 300000,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`HTTP ${response.status}: ${error}`);
|
||||
}
|
||||
|
||||
const reader = response.body?.getReader();
|
||||
if (!reader) {
|
||||
throw new Error('No response body');
|
||||
}
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
const requestId = this.generateRequestId();
|
||||
const created = Math.floor(Date.now() / 1000);
|
||||
const model = this.config.models[0] || 'unknown';
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const text = decoder.decode(value);
|
||||
const lines = text.split('\n').filter((l) => l.startsWith('data:'));
|
||||
|
||||
for (const line of lines) {
|
||||
try {
|
||||
const jsonStr = line.substring(5).trim();
|
||||
if (jsonStr === '[DONE]') {
|
||||
onChunk('data: [DONE]\n\n');
|
||||
continue;
|
||||
}
|
||||
|
||||
const data = JSON.parse(jsonStr);
|
||||
|
||||
// Convert to OpenAI streaming format
|
||||
const chunk = {
|
||||
id: requestId,
|
||||
object: 'chat.completion.chunk',
|
||||
created,
|
||||
model,
|
||||
choices: [
|
||||
{
|
||||
index: 0,
|
||||
delta: {
|
||||
content: data.token?.text || '',
|
||||
} as Partial<IChatMessage>,
|
||||
finish_reason: data.details?.finish_reason ? 'stop' : null,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
onChunk(`data: ${JSON.stringify(chunk)}\n\n`);
|
||||
} catch {
|
||||
// Invalid JSON, skip
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert chat messages to TGI prompt format
|
||||
*/
|
||||
private messagesToPrompt(messages: IChatMessage[]): string {
|
||||
// Use a simple chat template
|
||||
// TGI can use model-specific templates via the Messages API
|
||||
let prompt = '';
|
||||
|
||||
for (const message of messages) {
|
||||
switch (message.role) {
|
||||
case 'system':
|
||||
prompt += `System: ${message.content}\n\n`;
|
||||
break;
|
||||
case 'user':
|
||||
prompt += `User: ${message.content}\n\n`;
|
||||
break;
|
||||
case 'assistant':
|
||||
prompt += `Assistant: ${message.content}\n\n`;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
prompt += 'Assistant:';
|
||||
return prompt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get TGI server info
|
||||
*/
|
||||
public async getInfo(): Promise<ITgiInfoResponse | null> {
|
||||
try {
|
||||
return await this.fetchJson<ITgiInfoResponse>('/info');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get TGI metrics
|
||||
*/
|
||||
public async getMetrics(): Promise<Record<string, unknown>> {
|
||||
try {
|
||||
const response = await this.fetch('/metrics', { timeout: 5000 });
|
||||
if (response.ok) {
|
||||
const text = await response.text();
|
||||
// Parse Prometheus metrics
|
||||
const metrics: Record<string, unknown> = {};
|
||||
const lines = text.split('\n');
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('#') || !line.trim()) continue;
|
||||
const match = line.match(/^(\w+)(?:\{[^}]*\})?\s+([\d.e+-]+)/);
|
||||
if (match) {
|
||||
metrics[match[1]] = parseFloat(match[2]);
|
||||
}
|
||||
}
|
||||
return metrics;
|
||||
}
|
||||
} catch {
|
||||
// Metrics endpoint may not be available
|
||||
}
|
||||
return {};
|
||||
}
|
||||
}
|
||||
272
ts/containers/vllm.ts
Normal file
272
ts/containers/vllm.ts
Normal file
@@ -0,0 +1,272 @@
|
||||
/**
|
||||
* vLLM Container
|
||||
*
|
||||
* Manages vLLM containers for high-performance LLM inference.
|
||||
*/
|
||||
|
||||
import type {
|
||||
IContainerConfig,
|
||||
ILoadedModel,
|
||||
TContainerType,
|
||||
} from '../interfaces/container.ts';
|
||||
import type {
|
||||
IChatCompletionRequest,
|
||||
IChatCompletionResponse,
|
||||
IChatMessage,
|
||||
} from '../interfaces/api.ts';
|
||||
import { CONTAINER_IMAGES, CONTAINER_PORTS } from '../constants.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { BaseContainer, type TModelPullProgress } from './base-container.ts';
|
||||
|
||||
/**
|
||||
* vLLM model info response
|
||||
*/
|
||||
interface IVllmModelsResponse {
|
||||
object: 'list';
|
||||
data: Array<{
|
||||
id: string;
|
||||
object: 'model';
|
||||
created: number;
|
||||
owned_by: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
/**
|
||||
* vLLM container implementation
|
||||
*
|
||||
* vLLM serves a single model per instance and is optimized for:
|
||||
* - High throughput with PagedAttention
|
||||
* - Continuous batching
|
||||
* - OpenAI-compatible API
|
||||
*/
|
||||
export class VllmContainer extends BaseContainer {
|
||||
public readonly type: TContainerType = 'vllm';
|
||||
public readonly displayName = 'vLLM';
|
||||
public readonly defaultImage = CONTAINER_IMAGES.VLLM;
|
||||
public readonly defaultPort = CONTAINER_PORTS.VLLM;
|
||||
|
||||
constructor(config: IContainerConfig) {
|
||||
super(config);
|
||||
|
||||
// Set defaults if not provided
|
||||
if (!config.image) {
|
||||
config.image = this.defaultImage;
|
||||
}
|
||||
if (!config.port) {
|
||||
config.port = this.defaultPort;
|
||||
}
|
||||
|
||||
// Add default volume for model cache
|
||||
if (!config.volumes || config.volumes.length === 0) {
|
||||
config.volumes = [`modelgrid-vllm-${config.id}:/root/.cache/huggingface`];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create vLLM container configuration
|
||||
*/
|
||||
public static createConfig(
|
||||
id: string,
|
||||
name: string,
|
||||
modelName: string,
|
||||
gpuIds: string[],
|
||||
options: Partial<IContainerConfig> = {},
|
||||
): IContainerConfig {
|
||||
// vLLM requires model to be specified at startup
|
||||
const command = [
|
||||
'--model', modelName,
|
||||
'--host', '0.0.0.0',
|
||||
'--port', String(options.port || CONTAINER_PORTS.VLLM),
|
||||
];
|
||||
|
||||
// Add tensor parallelism if multiple GPUs
|
||||
if (gpuIds.length > 1) {
|
||||
command.push('--tensor-parallel-size', String(gpuIds.length));
|
||||
}
|
||||
|
||||
// Add additional options
|
||||
if (options.env?.VLLM_MAX_MODEL_LEN) {
|
||||
command.push('--max-model-len', options.env.VLLM_MAX_MODEL_LEN);
|
||||
}
|
||||
|
||||
return {
|
||||
id,
|
||||
name,
|
||||
type: 'vllm',
|
||||
image: options.image || CONTAINER_IMAGES.VLLM,
|
||||
gpuIds,
|
||||
port: options.port || CONTAINER_PORTS.VLLM,
|
||||
externalPort: options.externalPort,
|
||||
models: [modelName],
|
||||
env: {
|
||||
HF_TOKEN: options.env?.HF_TOKEN || '',
|
||||
...options.env,
|
||||
},
|
||||
volumes: options.volumes || [`modelgrid-vllm-${id}:/root/.cache/huggingface`],
|
||||
autoStart: options.autoStart ?? true,
|
||||
restartPolicy: options.restartPolicy || 'unless-stopped',
|
||||
memoryLimit: options.memoryLimit,
|
||||
cpuLimit: options.cpuLimit,
|
||||
command,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if vLLM is healthy
|
||||
*/
|
||||
public async isHealthy(): Promise<boolean> {
|
||||
try {
|
||||
const response = await this.fetch('/health', { timeout: 5000 });
|
||||
return response.ok;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List available models
|
||||
* vLLM serves a single model per instance
|
||||
*/
|
||||
public async listModels(): Promise<string[]> {
|
||||
try {
|
||||
const data = await this.fetchJson<IVllmModelsResponse>('/v1/models');
|
||||
return (data.data || []).map((m) => m.id);
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to list vLLM models: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return this.config.models || [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get loaded models with details
|
||||
*/
|
||||
public async getLoadedModels(): Promise<ILoadedModel[]> {
|
||||
try {
|
||||
const data = await this.fetchJson<IVllmModelsResponse>('/v1/models');
|
||||
return (data.data || []).map((m) => ({
|
||||
name: m.id,
|
||||
size: 0, // vLLM doesn't expose size
|
||||
loaded: true,
|
||||
requestCount: 0,
|
||||
}));
|
||||
} catch {
|
||||
// Return configured model as fallback
|
||||
return this.config.models.map((name) => ({
|
||||
name,
|
||||
size: 0,
|
||||
loaded: true,
|
||||
requestCount: 0,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull a model
|
||||
* vLLM downloads models automatically at startup
|
||||
* This method is a no-op - models are configured at container creation
|
||||
*/
|
||||
public async pullModel(modelName: string, onProgress?: TModelPullProgress): Promise<boolean> {
|
||||
logger.info(`vLLM downloads models at startup. Model: ${modelName}`);
|
||||
logger.info('To use a different model, create a new vLLM container.');
|
||||
|
||||
if (onProgress) {
|
||||
onProgress({
|
||||
model: modelName,
|
||||
status: 'vLLM models are loaded at container startup',
|
||||
percent: 100,
|
||||
});
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a model
|
||||
* vLLM serves a single model per instance
|
||||
*/
|
||||
public async removeModel(modelName: string): Promise<boolean> {
|
||||
logger.info(`vLLM serves a single model per instance.`);
|
||||
logger.info(`To remove model ${modelName}, stop and remove this container.`);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a chat completion request
|
||||
* vLLM is OpenAI-compatible
|
||||
*/
|
||||
public async chatCompletion(request: IChatCompletionRequest): Promise<IChatCompletionResponse> {
|
||||
return this.fetchJson<IChatCompletionResponse>('/v1/chat/completions', {
|
||||
method: 'POST',
|
||||
body: {
|
||||
...request,
|
||||
stream: false,
|
||||
},
|
||||
timeout: 300000, // 5 minutes
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream a chat completion request
|
||||
* vLLM is OpenAI-compatible
|
||||
*/
|
||||
public async chatCompletionStream(
|
||||
request: IChatCompletionRequest,
|
||||
onChunk: (chunk: string) => void,
|
||||
): Promise<void> {
|
||||
const response = await this.fetch('/v1/chat/completions', {
|
||||
method: 'POST',
|
||||
body: {
|
||||
...request,
|
||||
stream: true,
|
||||
},
|
||||
timeout: 300000,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`HTTP ${response.status}: ${error}`);
|
||||
}
|
||||
|
||||
const reader = response.body?.getReader();
|
||||
if (!reader) {
|
||||
throw new Error('No response body');
|
||||
}
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const text = decoder.decode(value);
|
||||
// vLLM already sends data in SSE format
|
||||
onChunk(text);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get vLLM-specific metrics
|
||||
*/
|
||||
public async getMetrics(): Promise<Record<string, unknown>> {
|
||||
try {
|
||||
const response = await this.fetch('/metrics', { timeout: 5000 });
|
||||
if (response.ok) {
|
||||
const text = await response.text();
|
||||
// Parse Prometheus metrics
|
||||
const metrics: Record<string, unknown> = {};
|
||||
const lines = text.split('\n');
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('#') || !line.trim()) continue;
|
||||
const match = line.match(/^(\w+)(?:\{[^}]*\})?\s+([\d.e+-]+)/);
|
||||
if (match) {
|
||||
metrics[match[1]] = parseFloat(match[2]);
|
||||
}
|
||||
}
|
||||
return metrics;
|
||||
}
|
||||
} catch {
|
||||
// Metrics endpoint may not be enabled
|
||||
}
|
||||
return {};
|
||||
}
|
||||
}
|
||||
268
ts/daemon.ts
Normal file
268
ts/daemon.ts
Normal file
@@ -0,0 +1,268 @@
|
||||
/**
|
||||
* ModelGrid Daemon
|
||||
*
|
||||
* Background process for managing containers and serving the API.
|
||||
*/
|
||||
|
||||
import process from 'node:process';
|
||||
import { logger } from './logger.ts';
|
||||
import { TIMING } from './constants.ts';
|
||||
import type { ModelGrid } from './modelgrid.ts';
|
||||
import { ApiServer } from './api/server.ts';
|
||||
import type { IModelGridConfig } from './interfaces/config.ts';
|
||||
|
||||
/**
|
||||
* ModelGrid Daemon
|
||||
*/
|
||||
export class Daemon {
|
||||
private modelgrid: ModelGrid;
|
||||
private isRunning: boolean = false;
|
||||
private apiServer?: ApiServer;
|
||||
|
||||
constructor(modelgrid: ModelGrid) {
|
||||
this.modelgrid = modelgrid;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the daemon
|
||||
*/
|
||||
public async start(): Promise<void> {
|
||||
if (this.isRunning) {
|
||||
logger.warn('Daemon is already running');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log('Starting ModelGrid daemon...');
|
||||
|
||||
try {
|
||||
// Initialize ModelGrid
|
||||
await this.modelgrid.initialize();
|
||||
|
||||
const config = this.modelgrid.getConfig();
|
||||
if (!config) {
|
||||
throw new Error('Failed to load configuration');
|
||||
}
|
||||
|
||||
this.logConfigLoaded(config);
|
||||
|
||||
// Start API server
|
||||
await this.startApiServer(config);
|
||||
|
||||
// Start containers
|
||||
await this.startContainers();
|
||||
|
||||
// Preload models if configured
|
||||
await this.preloadModels(config);
|
||||
|
||||
// Setup signal handlers
|
||||
this.setupSignalHandlers();
|
||||
|
||||
this.isRunning = true;
|
||||
|
||||
// Start monitoring loop
|
||||
await this.monitor();
|
||||
} catch (error) {
|
||||
this.isRunning = false;
|
||||
logger.error(`Daemon failed to start: ${error instanceof Error ? error.message : String(error)}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the daemon
|
||||
*/
|
||||
public async stop(): Promise<void> {
|
||||
if (!this.isRunning) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.log('Stopping ModelGrid daemon...');
|
||||
|
||||
this.isRunning = false;
|
||||
|
||||
// Stop API server
|
||||
if (this.apiServer) {
|
||||
await this.apiServer.stop();
|
||||
}
|
||||
|
||||
// Shutdown ModelGrid (stops containers)
|
||||
await this.modelgrid.shutdown();
|
||||
|
||||
logger.success('ModelGrid daemon stopped');
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the API server
|
||||
*/
|
||||
private async startApiServer(config: IModelGridConfig): Promise<void> {
|
||||
logger.info('Starting API server...');
|
||||
|
||||
this.apiServer = new ApiServer(
|
||||
config.api,
|
||||
this.modelgrid.getContainerManager(),
|
||||
this.modelgrid.getModelRegistry(),
|
||||
);
|
||||
|
||||
await this.apiServer.start();
|
||||
}
|
||||
|
||||
/**
|
||||
* Start configured containers
|
||||
*/
|
||||
private async startContainers(): Promise<void> {
|
||||
logger.info('Starting containers...');
|
||||
|
||||
const containerManager = this.modelgrid.getContainerManager();
|
||||
await containerManager.startAll();
|
||||
|
||||
// Wait for containers to be healthy
|
||||
logger.dim('Waiting for containers to become healthy...');
|
||||
await this.waitForContainersHealthy();
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for all containers to report healthy
|
||||
*/
|
||||
private async waitForContainersHealthy(timeout: number = 60000): Promise<void> {
|
||||
const startTime = Date.now();
|
||||
const containerManager = this.modelgrid.getContainerManager();
|
||||
|
||||
while (Date.now() - startTime < timeout) {
|
||||
const allHealthy = await containerManager.checkAllHealth();
|
||||
|
||||
if (allHealthy) {
|
||||
logger.success('All containers are healthy');
|
||||
return;
|
||||
}
|
||||
|
||||
await this.sleep(5000);
|
||||
}
|
||||
|
||||
logger.warn('Timeout waiting for containers to become healthy');
|
||||
}
|
||||
|
||||
/**
|
||||
* Preload configured models
|
||||
*/
|
||||
private async preloadModels(config: IModelGridConfig): Promise<void> {
|
||||
if (!config.models.autoLoad || config.models.autoLoad.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`Preloading ${config.models.autoLoad.length} model(s)...`);
|
||||
|
||||
const modelLoader = this.modelgrid.getModelLoader();
|
||||
const results = await modelLoader.preloadModels(config.models.autoLoad);
|
||||
|
||||
let loaded = 0;
|
||||
let failed = 0;
|
||||
|
||||
for (const [name, result] of results) {
|
||||
if (result.success) {
|
||||
loaded++;
|
||||
logger.dim(` ✓ ${name}`);
|
||||
} else {
|
||||
failed++;
|
||||
logger.warn(` ✗ ${name}: ${result.error}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (failed > 0) {
|
||||
logger.warn(`Preloaded ${loaded}/${config.models.autoLoad.length} models (${failed} failed)`);
|
||||
} else {
|
||||
logger.success(`Preloaded ${loaded} model(s)`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup signal handlers for graceful shutdown
|
||||
*/
|
||||
private setupSignalHandlers(): void {
|
||||
const shutdown = async () => {
|
||||
logger.log('');
|
||||
logger.log('Received shutdown signal');
|
||||
await this.stop();
|
||||
process.exit(0);
|
||||
};
|
||||
|
||||
process.on('SIGINT', shutdown);
|
||||
process.on('SIGTERM', shutdown);
|
||||
}
|
||||
|
||||
/**
|
||||
* Main monitoring loop
|
||||
*/
|
||||
private async monitor(): Promise<void> {
|
||||
logger.log('Starting monitoring loop...');
|
||||
|
||||
const config = this.modelgrid.getConfig();
|
||||
const checkInterval = config?.checkInterval || TIMING.CHECK_INTERVAL_MS;
|
||||
|
||||
while (this.isRunning) {
|
||||
try {
|
||||
// Check container health
|
||||
await this.checkContainerHealth();
|
||||
|
||||
// Log periodic status
|
||||
this.logPeriodicStatus();
|
||||
|
||||
await this.sleep(checkInterval);
|
||||
} catch (error) {
|
||||
logger.error(`Monitor error: ${error instanceof Error ? error.message : String(error)}`);
|
||||
await this.sleep(checkInterval);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check health of all containers
|
||||
*/
|
||||
private async checkContainerHealth(): Promise<void> {
|
||||
const containerManager = this.modelgrid.getContainerManager();
|
||||
const statuses = await containerManager.getAllStatus();
|
||||
|
||||
for (const [id, status] of statuses) {
|
||||
if (status.running && status.health === 'unhealthy') {
|
||||
logger.warn(`Container ${id} is unhealthy, attempting restart...`);
|
||||
|
||||
const container = containerManager.getContainer(id);
|
||||
if (container) {
|
||||
await container.restart();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Log periodic status
|
||||
*/
|
||||
private logPeriodicStatus(): void {
|
||||
if (this.apiServer) {
|
||||
const info = this.apiServer.getInfo();
|
||||
if (info.running) {
|
||||
logger.dim(`API server running on ${info.host}:${info.port} (uptime: ${info.uptime}s)`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Log configuration loaded message
|
||||
*/
|
||||
private logConfigLoaded(config: IModelGridConfig): void {
|
||||
logger.log('');
|
||||
logger.logBoxTitle('Configuration Loaded', 60, 'success');
|
||||
logger.logBoxLine(`API Port: ${config.api.port}`);
|
||||
logger.logBoxLine(`Containers: ${config.containers.length}`);
|
||||
logger.logBoxLine(`Auto-pull: ${config.models.autoPull ? 'Enabled' : 'Disabled'}`);
|
||||
logger.logBoxLine(`Check Interval: ${config.checkInterval / 1000}s`);
|
||||
logger.logBoxEnd();
|
||||
logger.log('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Sleep for specified milliseconds
|
||||
*/
|
||||
private sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
}
|
||||
558
ts/docker/container-runtime.ts
Normal file
558
ts/docker/container-runtime.ts
Normal file
@@ -0,0 +1,558 @@
|
||||
/**
|
||||
* Container Runtime
|
||||
*
|
||||
* Manages individual Docker containers for AI model serving.
|
||||
*/
|
||||
|
||||
import { exec, spawn } from 'node:child_process';
|
||||
import { promisify } from 'node:util';
|
||||
import type {
|
||||
IContainerConfig,
|
||||
IContainerStatus,
|
||||
TContainerHealth,
|
||||
TContainerRunStatus,
|
||||
} from '../interfaces/container.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { DOCKER, TIMING } from '../constants.ts';
|
||||
import { DriverManager } from '../drivers/driver-manager.ts';
|
||||
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
/**
|
||||
* Container runtime execution result
|
||||
*/
|
||||
export interface IContainerExecResult {
|
||||
success: boolean;
|
||||
output?: string;
|
||||
error?: string;
|
||||
exitCode?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Container logs options
|
||||
*/
|
||||
export interface ILogsOptions {
|
||||
lines?: number;
|
||||
follow?: boolean;
|
||||
timestamps?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Container Runtime class - manages individual containers
|
||||
*/
|
||||
export class ContainerRuntime {
|
||||
private driverManager: DriverManager;
|
||||
|
||||
constructor() {
|
||||
this.driverManager = new DriverManager();
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a container with the given configuration
|
||||
*/
|
||||
public async startContainer(config: IContainerConfig): Promise<boolean> {
|
||||
const containerName = `modelgrid-${config.id}`;
|
||||
|
||||
// Check if container already exists
|
||||
const existingId = await this.getContainerIdByName(containerName);
|
||||
if (existingId) {
|
||||
// Check if it's running
|
||||
const isRunning = await this.isContainerRunning(existingId);
|
||||
if (isRunning) {
|
||||
logger.dim(`Container ${containerName} is already running`);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Start existing container
|
||||
try {
|
||||
await execAsync(`docker start ${existingId}`, {
|
||||
timeout: TIMING.CONTAINER_STARTUP_TIMEOUT_MS,
|
||||
});
|
||||
logger.success(`Started existing container: ${containerName}`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to start existing container: ${error instanceof Error ? error.message : String(error)}`);
|
||||
// Try to remove and recreate
|
||||
await this.removeContainer(config.id);
|
||||
}
|
||||
}
|
||||
|
||||
// Build docker run command
|
||||
const args = await this.buildRunArgs(config);
|
||||
const cmd = `docker run ${args.join(' ')}`;
|
||||
|
||||
logger.info(`Starting container: ${containerName}`);
|
||||
logger.dim(`Command: ${cmd}`);
|
||||
|
||||
try {
|
||||
await execAsync(cmd, { timeout: TIMING.CONTAINER_STARTUP_TIMEOUT_MS });
|
||||
logger.success(`Container ${containerName} started`);
|
||||
|
||||
// Wait for container to be healthy
|
||||
await this.waitForHealth(containerName);
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to start container: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop a container
|
||||
*/
|
||||
public async stopContainer(containerId: string, timeout: number = 30): Promise<boolean> {
|
||||
const containerName = `modelgrid-${containerId}`;
|
||||
|
||||
try {
|
||||
const dockerId = await this.getContainerIdByName(containerName);
|
||||
if (!dockerId) {
|
||||
logger.dim(`Container ${containerName} not found`);
|
||||
return true;
|
||||
}
|
||||
|
||||
logger.info(`Stopping container: ${containerName}`);
|
||||
await execAsync(`docker stop -t ${timeout} ${dockerId}`, {
|
||||
timeout: (timeout + 10) * 1000,
|
||||
});
|
||||
logger.success(`Container ${containerName} stopped`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to stop container: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a container
|
||||
*/
|
||||
public async removeContainer(containerId: string, force: boolean = true): Promise<boolean> {
|
||||
const containerName = `modelgrid-${containerId}`;
|
||||
|
||||
try {
|
||||
const dockerId = await this.getContainerIdByName(containerName);
|
||||
if (!dockerId) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const forceFlag = force ? '-f' : '';
|
||||
await execAsync(`docker rm ${forceFlag} ${dockerId}`, { timeout: 30000 });
|
||||
logger.success(`Container ${containerName} removed`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to remove container: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Restart a container
|
||||
*/
|
||||
public async restartContainer(containerId: string): Promise<boolean> {
|
||||
const containerName = `modelgrid-${containerId}`;
|
||||
|
||||
try {
|
||||
const dockerId = await this.getContainerIdByName(containerName);
|
||||
if (!dockerId) {
|
||||
logger.error(`Container ${containerName} not found`);
|
||||
return false;
|
||||
}
|
||||
|
||||
await execAsync(`docker restart ${dockerId}`, {
|
||||
timeout: TIMING.CONTAINER_STARTUP_TIMEOUT_MS,
|
||||
});
|
||||
logger.success(`Container ${containerName} restarted`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to restart container: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get container status
|
||||
*/
|
||||
public async getContainerStatus(config: IContainerConfig): Promise<IContainerStatus> {
|
||||
const containerName = `modelgrid-${config.id}`;
|
||||
|
||||
const status: IContainerStatus = {
|
||||
id: config.id,
|
||||
name: config.name,
|
||||
type: config.type,
|
||||
running: false,
|
||||
runStatus: 'stopped',
|
||||
health: 'unknown',
|
||||
loadedModels: [],
|
||||
assignedGpus: config.gpuIds,
|
||||
endpoint: `http://localhost:${config.externalPort || config.port}`,
|
||||
};
|
||||
|
||||
try {
|
||||
const dockerId = await this.getContainerIdByName(containerName);
|
||||
if (!dockerId) {
|
||||
return status;
|
||||
}
|
||||
|
||||
status.dockerId = dockerId;
|
||||
|
||||
// Get container info
|
||||
const { stdout } = await execAsync(
|
||||
`docker inspect --format='{{json .}}' ${dockerId}`,
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
|
||||
const info = JSON.parse(stdout);
|
||||
|
||||
// Get run status
|
||||
status.running = info.State.Running === true;
|
||||
if (info.State.Running) {
|
||||
status.runStatus = 'running';
|
||||
} else if (info.State.Restarting) {
|
||||
status.runStatus = 'starting';
|
||||
} else if (info.State.ExitCode !== 0) {
|
||||
status.runStatus = 'error';
|
||||
status.lastError = info.State.Error || `Exit code: ${info.State.ExitCode}`;
|
||||
} else {
|
||||
status.runStatus = 'stopped';
|
||||
}
|
||||
|
||||
// Get health status
|
||||
if (info.State.Health) {
|
||||
status.health = info.State.Health.Status as TContainerHealth;
|
||||
if (info.State.Health.Log && info.State.Health.Log.length > 0) {
|
||||
const lastLog = info.State.Health.Log[info.State.Health.Log.length - 1];
|
||||
if (lastLog.Output) {
|
||||
status.healthMessage = lastLog.Output.substring(0, 200);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get uptime
|
||||
if (info.State.StartedAt) {
|
||||
const startTime = new Date(info.State.StartedAt).getTime();
|
||||
status.startTime = startTime;
|
||||
if (status.running) {
|
||||
status.uptime = Math.floor((Date.now() - startTime) / 1000);
|
||||
}
|
||||
}
|
||||
|
||||
// Try to get loaded models from container
|
||||
if (status.running) {
|
||||
status.loadedModels = await this.getLoadedModels(config);
|
||||
}
|
||||
|
||||
// Get resource usage
|
||||
const stats = await this.getContainerStats(dockerId);
|
||||
if (stats) {
|
||||
status.memoryUsage = stats.memoryUsage;
|
||||
status.cpuUsage = stats.cpuUsage;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.dim(`Error getting container status: ${error instanceof Error ? error.message : String(error)}`);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get container resource stats
|
||||
*/
|
||||
private async getContainerStats(
|
||||
dockerId: string,
|
||||
): Promise<{ memoryUsage: number; cpuUsage: number } | null> {
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
`docker stats ${dockerId} --no-stream --format "{{.MemUsage}},{{.CPUPerc}}"`,
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
|
||||
const [memStr, cpuStr] = stdout.trim().split(',');
|
||||
|
||||
// Parse memory (e.g., "1.5GiB / 16GiB")
|
||||
const memMatch = memStr.match(/([\d.]+)(MiB|GiB)/i);
|
||||
let memoryUsage = 0;
|
||||
if (memMatch) {
|
||||
memoryUsage = parseFloat(memMatch[1]);
|
||||
if (memMatch[2].toLowerCase() === 'gib') {
|
||||
memoryUsage *= 1024;
|
||||
}
|
||||
}
|
||||
|
||||
// Parse CPU (e.g., "25.50%")
|
||||
const cpuUsage = parseFloat(cpuStr.replace('%', '')) || 0;
|
||||
|
||||
return { memoryUsage: Math.round(memoryUsage), cpuUsage };
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get loaded models from a container
|
||||
*/
|
||||
private async getLoadedModels(config: IContainerConfig): Promise<string[]> {
|
||||
const containerName = `modelgrid-${config.id}`;
|
||||
|
||||
try {
|
||||
switch (config.type) {
|
||||
case 'ollama': {
|
||||
// Query Ollama API for loaded models
|
||||
const { stdout } = await execAsync(
|
||||
`docker exec ${containerName} curl -s http://localhost:11434/api/tags`,
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
const data = JSON.parse(stdout);
|
||||
return (data.models || []).map((m: { name: string }) => m.name);
|
||||
}
|
||||
|
||||
case 'vllm':
|
||||
case 'tgi': {
|
||||
// These typically serve a single model
|
||||
return config.models || [];
|
||||
}
|
||||
|
||||
default:
|
||||
return [];
|
||||
}
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a command inside a container
|
||||
*/
|
||||
public async exec(
|
||||
containerId: string,
|
||||
command: string,
|
||||
timeout: number = 30000,
|
||||
): Promise<IContainerExecResult> {
|
||||
const containerName = `modelgrid-${containerId}`;
|
||||
|
||||
try {
|
||||
const dockerId = await this.getContainerIdByName(containerName);
|
||||
if (!dockerId) {
|
||||
return { success: false, error: 'Container not found' };
|
||||
}
|
||||
|
||||
const { stdout, stderr } = await execAsync(
|
||||
`docker exec ${dockerId} ${command}`,
|
||||
{ timeout },
|
||||
);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: stdout,
|
||||
error: stderr || undefined,
|
||||
};
|
||||
} catch (error) {
|
||||
const err = error as { code?: number; stdout?: string; stderr?: string };
|
||||
return {
|
||||
success: false,
|
||||
output: err.stdout,
|
||||
error: err.stderr || (error instanceof Error ? error.message : String(error)),
|
||||
exitCode: err.code,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get container logs
|
||||
*/
|
||||
public async getLogs(
|
||||
containerId: string,
|
||||
options: ILogsOptions = {},
|
||||
): Promise<string> {
|
||||
const containerName = `modelgrid-${containerId}`;
|
||||
const { lines = 100, timestamps = false } = options;
|
||||
|
||||
try {
|
||||
const dockerId = await this.getContainerIdByName(containerName);
|
||||
if (!dockerId) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const args = ['logs'];
|
||||
if (lines) args.push(`--tail=${lines}`);
|
||||
if (timestamps) args.push('--timestamps');
|
||||
args.push(dockerId);
|
||||
|
||||
const { stdout, stderr } = await execAsync(
|
||||
`docker ${args.join(' ')}`,
|
||||
{ timeout: 10000 },
|
||||
);
|
||||
|
||||
return stdout + stderr;
|
||||
} catch (error) {
|
||||
return error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Follow container logs (returns a way to stop following)
|
||||
*/
|
||||
public followLogs(
|
||||
containerId: string,
|
||||
onData: (data: string) => void,
|
||||
): { stop: () => void } {
|
||||
const containerName = `modelgrid-${containerId}`;
|
||||
|
||||
const child = spawn('docker', ['logs', '-f', containerName], {
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
});
|
||||
|
||||
child.stdout.on('data', (data) => onData(data.toString()));
|
||||
child.stderr.on('data', (data) => onData(data.toString()));
|
||||
|
||||
return {
|
||||
stop: () => {
|
||||
child.kill();
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Build docker run arguments
|
||||
*/
|
||||
private async buildRunArgs(config: IContainerConfig): Promise<string[]> {
|
||||
const containerName = `modelgrid-${config.id}`;
|
||||
const args: string[] = [
|
||||
'-d', // Detached mode
|
||||
`--name=${containerName}`,
|
||||
`--network=${DOCKER.DEFAULT_NETWORK}`,
|
||||
];
|
||||
|
||||
// Port mapping
|
||||
const externalPort = config.externalPort || config.port;
|
||||
args.push(`-p ${externalPort}:${config.port}`);
|
||||
|
||||
// Restart policy
|
||||
args.push(`--restart=${config.restartPolicy}`);
|
||||
|
||||
// Memory limit
|
||||
if (config.memoryLimit) {
|
||||
args.push(`--memory=${config.memoryLimit}`);
|
||||
}
|
||||
|
||||
// CPU limit
|
||||
if (config.cpuLimit) {
|
||||
args.push(`--cpus=${config.cpuLimit}`);
|
||||
}
|
||||
|
||||
// GPU support
|
||||
if (config.gpuIds && config.gpuIds.length > 0) {
|
||||
const gpuArgs = await this.driverManager.getDockerGpuArgs(config.gpuIds);
|
||||
args.push(...gpuArgs);
|
||||
}
|
||||
|
||||
// Environment variables
|
||||
if (config.env) {
|
||||
for (const [key, value] of Object.entries(config.env)) {
|
||||
args.push(`-e ${key}=${value}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Volume mounts
|
||||
if (config.volumes) {
|
||||
for (const volume of config.volumes) {
|
||||
args.push(`-v ${volume}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Add image
|
||||
args.push(config.image);
|
||||
|
||||
// Add custom command if provided
|
||||
if (config.command && config.command.length > 0) {
|
||||
args.push(...config.command);
|
||||
}
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Docker container ID by name
|
||||
*/
|
||||
private async getContainerIdByName(name: string): Promise<string | null> {
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
`docker ps -a --filter "name=^${name}$" --format "{{.ID}}"`,
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
return stdout.trim() || null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a container is running
|
||||
*/
|
||||
private async isContainerRunning(dockerId: string): Promise<boolean> {
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
`docker inspect --format='{{.State.Running}}' ${dockerId}`,
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
return stdout.trim() === 'true';
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for container to be healthy
|
||||
*/
|
||||
private async waitForHealth(
|
||||
containerName: string,
|
||||
timeout: number = TIMING.CONTAINER_STARTUP_TIMEOUT_MS,
|
||||
): Promise<boolean> {
|
||||
const startTime = Date.now();
|
||||
const checkInterval = 2000;
|
||||
|
||||
while (Date.now() - startTime < timeout) {
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
`docker inspect --format='{{.State.Health.Status}}' ${containerName} 2>/dev/null || echo "none"`,
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
|
||||
const status = stdout.trim();
|
||||
|
||||
if (status === 'healthy') {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (status === 'none') {
|
||||
// Container has no health check, assume healthy if running
|
||||
const { stdout: running } = await execAsync(
|
||||
`docker inspect --format='{{.State.Running}}' ${containerName}`,
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
if (running.trim() === 'true') {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (status === 'unhealthy') {
|
||||
logger.warn(`Container ${containerName} is unhealthy`);
|
||||
return false;
|
||||
}
|
||||
} catch {
|
||||
// Container might not be ready yet
|
||||
}
|
||||
|
||||
await this.sleep(checkInterval);
|
||||
}
|
||||
|
||||
logger.warn(`Timeout waiting for container ${containerName} to be healthy`);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sleep helper
|
||||
*/
|
||||
private async sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
}
|
||||
509
ts/docker/docker-manager.ts
Normal file
509
ts/docker/docker-manager.ts
Normal file
@@ -0,0 +1,509 @@
|
||||
/**
|
||||
* Docker Manager
|
||||
*
|
||||
* Handles Docker installation, configuration, and management.
|
||||
*/
|
||||
|
||||
import { exec } from 'node:child_process';
|
||||
import { promisify } from 'node:util';
|
||||
import * as fs from 'node:fs';
|
||||
import { logger } from '../logger.ts';
|
||||
import { DOCKER, TIMING } from '../constants.ts';
|
||||
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
/**
|
||||
* Docker installation result
|
||||
*/
|
||||
export interface IDockerInstallResult {
|
||||
success: boolean;
|
||||
version?: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Docker status information
|
||||
*/
|
||||
export interface IDockerStatus {
|
||||
installed: boolean;
|
||||
running: boolean;
|
||||
version?: string;
|
||||
runtimes: string[];
|
||||
hasNvidiaRuntime: boolean;
|
||||
networkExists: boolean;
|
||||
storageDriver?: string;
|
||||
rootDir?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Docker Manager class
|
||||
*/
|
||||
export class DockerManager {
|
||||
private networkName: string;
|
||||
|
||||
constructor(networkName: string = DOCKER.DEFAULT_NETWORK) {
|
||||
this.networkName = networkName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if Docker is installed
|
||||
*/
|
||||
public async isInstalled(): Promise<boolean> {
|
||||
try {
|
||||
const { stdout } = await execAsync('docker --version', { timeout: 5000 });
|
||||
return stdout.includes('Docker');
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if Docker daemon is running
|
||||
*/
|
||||
public async isRunning(): Promise<boolean> {
|
||||
try {
|
||||
await execAsync('docker info', { timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Docker version
|
||||
*/
|
||||
public async getVersion(): Promise<string | undefined> {
|
||||
try {
|
||||
const { stdout } = await execAsync('docker --version', { timeout: 5000 });
|
||||
const match = stdout.match(/Docker version (\d+\.\d+\.\d+)/);
|
||||
return match ? match[1] : undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get full Docker status
|
||||
*/
|
||||
public async getStatus(): Promise<IDockerStatus> {
|
||||
const status: IDockerStatus = {
|
||||
installed: false,
|
||||
running: false,
|
||||
runtimes: [],
|
||||
hasNvidiaRuntime: false,
|
||||
networkExists: false,
|
||||
};
|
||||
|
||||
// Check installation
|
||||
status.installed = await this.isInstalled();
|
||||
if (!status.installed) {
|
||||
return status;
|
||||
}
|
||||
|
||||
status.version = await this.getVersion();
|
||||
|
||||
// Check if running
|
||||
status.running = await this.isRunning();
|
||||
if (!status.running) {
|
||||
return status;
|
||||
}
|
||||
|
||||
// Get detailed info
|
||||
try {
|
||||
const { stdout } = await execAsync('docker info --format json', {
|
||||
timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS,
|
||||
});
|
||||
|
||||
const info = JSON.parse(stdout);
|
||||
|
||||
// Get runtimes
|
||||
if (info.Runtimes) {
|
||||
status.runtimes = Object.keys(info.Runtimes);
|
||||
status.hasNvidiaRuntime = status.runtimes.includes('nvidia');
|
||||
}
|
||||
|
||||
status.storageDriver = info.Driver;
|
||||
status.rootDir = info.DockerRootDir;
|
||||
} catch {
|
||||
// Try alternative method for runtimes
|
||||
try {
|
||||
const { stdout } = await execAsync('docker info 2>/dev/null | grep -i "runtimes"', {
|
||||
timeout: 5000,
|
||||
});
|
||||
status.hasNvidiaRuntime = stdout.toLowerCase().includes('nvidia');
|
||||
} catch {
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
|
||||
// Check network exists
|
||||
status.networkExists = await this.networkExists();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Install Docker on the system
|
||||
*/
|
||||
public async install(): Promise<IDockerInstallResult> {
|
||||
try {
|
||||
// Check if already installed
|
||||
if (await this.isInstalled()) {
|
||||
return {
|
||||
success: true,
|
||||
version: await this.getVersion(),
|
||||
};
|
||||
}
|
||||
|
||||
// Detect distribution
|
||||
const distro = await this.getLinuxDistro();
|
||||
logger.info(`Installing Docker on ${distro.id}...`);
|
||||
|
||||
if (distro.id === 'ubuntu' || distro.id === 'debian') {
|
||||
await this.installOnDebian();
|
||||
} else if (
|
||||
distro.id === 'fedora' ||
|
||||
distro.id === 'rhel' ||
|
||||
distro.id === 'centos' ||
|
||||
distro.id === 'rocky' ||
|
||||
distro.id === 'almalinux'
|
||||
) {
|
||||
await this.installOnRhel();
|
||||
} else {
|
||||
// Use convenience script as fallback
|
||||
await this.installWithScript();
|
||||
}
|
||||
|
||||
// Start Docker service
|
||||
await this.startService();
|
||||
|
||||
// Verify installation
|
||||
const version = await this.getVersion();
|
||||
if (version) {
|
||||
logger.success(`Docker ${version} installed successfully`);
|
||||
return { success: true, version };
|
||||
}
|
||||
|
||||
return { success: false, error: 'Installation completed but Docker not found' };
|
||||
} catch (error) {
|
||||
return {
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Install Docker on Debian/Ubuntu
|
||||
*/
|
||||
private async installOnDebian(): Promise<void> {
|
||||
// Remove old versions
|
||||
await execAsync(
|
||||
'apt-get remove -y docker docker-engine docker.io containerd runc || true',
|
||||
{ timeout: 60000 },
|
||||
);
|
||||
|
||||
// Install prerequisites
|
||||
await execAsync('apt-get update', { timeout: 120000 });
|
||||
await execAsync(
|
||||
'DEBIAN_FRONTEND=noninteractive apt-get install -y ca-certificates curl gnupg',
|
||||
{ timeout: 120000 },
|
||||
);
|
||||
|
||||
// Add Docker's official GPG key
|
||||
await execAsync('install -m 0755 -d /etc/apt/keyrings');
|
||||
|
||||
const distro = await this.getLinuxDistro();
|
||||
const isUbuntu = distro.id === 'ubuntu';
|
||||
|
||||
if (isUbuntu) {
|
||||
await execAsync(
|
||||
'curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg',
|
||||
);
|
||||
await execAsync('chmod a+r /etc/apt/keyrings/docker.gpg');
|
||||
|
||||
// Get Ubuntu codename
|
||||
const { stdout: codename } = await execAsync('lsb_release -cs');
|
||||
await execAsync(
|
||||
`echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu ${codename.trim()} stable" > /etc/apt/sources.list.d/docker.list`,
|
||||
);
|
||||
} else {
|
||||
await execAsync(
|
||||
'curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg',
|
||||
);
|
||||
await execAsync('chmod a+r /etc/apt/keyrings/docker.gpg');
|
||||
|
||||
const { stdout: codename } = await execAsync('lsb_release -cs');
|
||||
await execAsync(
|
||||
`echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian ${codename.trim()} stable" > /etc/apt/sources.list.d/docker.list`,
|
||||
);
|
||||
}
|
||||
|
||||
// Install Docker
|
||||
await execAsync('apt-get update', { timeout: 120000 });
|
||||
await execAsync(
|
||||
'DEBIAN_FRONTEND=noninteractive apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin',
|
||||
{ timeout: 300000 },
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Install Docker on RHEL/Fedora/CentOS
|
||||
*/
|
||||
private async installOnRhel(): Promise<void> {
|
||||
const distro = await this.getLinuxDistro();
|
||||
const isFedora = distro.id === 'fedora';
|
||||
|
||||
// Remove old versions
|
||||
await execAsync(
|
||||
'dnf remove -y docker docker-client docker-client-latest docker-common docker-latest docker-latest-logrotate docker-logrotate docker-engine || true',
|
||||
{ timeout: 60000 },
|
||||
);
|
||||
|
||||
// Install prerequisites
|
||||
await execAsync('dnf install -y dnf-plugins-core', { timeout: 120000 });
|
||||
|
||||
// Add Docker repository
|
||||
const repoUrl = isFedora
|
||||
? 'https://download.docker.com/linux/fedora/docker-ce.repo'
|
||||
: 'https://download.docker.com/linux/centos/docker-ce.repo';
|
||||
|
||||
await execAsync(`dnf config-manager --add-repo ${repoUrl}`);
|
||||
|
||||
// Install Docker
|
||||
await execAsync(
|
||||
'dnf install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin',
|
||||
{ timeout: 300000 },
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Install Docker using convenience script
|
||||
*/
|
||||
private async installWithScript(): Promise<void> {
|
||||
logger.info('Installing Docker using convenience script...');
|
||||
await execAsync('curl -fsSL https://get.docker.com | sh', {
|
||||
timeout: 600000, // 10 minutes
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Start Docker service
|
||||
*/
|
||||
public async startService(): Promise<void> {
|
||||
try {
|
||||
await execAsync('systemctl start docker');
|
||||
await execAsync('systemctl enable docker');
|
||||
logger.success('Docker service started and enabled');
|
||||
} catch (error) {
|
||||
logger.warn(`Could not start Docker service: ${error instanceof Error ? error.message : String(error)}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop Docker service
|
||||
*/
|
||||
public async stopService(): Promise<void> {
|
||||
try {
|
||||
await execAsync('systemctl stop docker');
|
||||
logger.success('Docker service stopped');
|
||||
} catch (error) {
|
||||
logger.warn(`Could not stop Docker service: ${error instanceof Error ? error.message : String(error)}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Restart Docker service
|
||||
*/
|
||||
public async restartService(): Promise<void> {
|
||||
try {
|
||||
await execAsync('systemctl restart docker');
|
||||
logger.success('Docker service restarted');
|
||||
} catch (error) {
|
||||
logger.warn(`Could not restart Docker service: ${error instanceof Error ? error.message : String(error)}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if ModelGrid network exists
|
||||
*/
|
||||
public async networkExists(): Promise<boolean> {
|
||||
try {
|
||||
await execAsync(`docker network inspect ${this.networkName}`, { timeout: 5000 });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the ModelGrid Docker network
|
||||
*/
|
||||
public async createNetwork(): Promise<boolean> {
|
||||
try {
|
||||
if (await this.networkExists()) {
|
||||
logger.dim(`Network '${this.networkName}' already exists`);
|
||||
return true;
|
||||
}
|
||||
|
||||
await execAsync(`docker network create ${this.networkName}`, {
|
||||
timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS,
|
||||
});
|
||||
logger.success(`Created Docker network '${this.networkName}'`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to create network: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the ModelGrid Docker network
|
||||
*/
|
||||
public async removeNetwork(): Promise<boolean> {
|
||||
try {
|
||||
if (!await this.networkExists()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
await execAsync(`docker network rm ${this.networkName}`, {
|
||||
timeout: TIMING.DOCKER_COMMAND_TIMEOUT_MS,
|
||||
});
|
||||
logger.success(`Removed Docker network '${this.networkName}'`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to remove network: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull a Docker image
|
||||
*/
|
||||
public async pullImage(image: string): Promise<boolean> {
|
||||
try {
|
||||
logger.info(`Pulling image: ${image}`);
|
||||
await execAsync(`docker pull ${image}`, {
|
||||
timeout: 600000, // 10 minutes for large images
|
||||
});
|
||||
logger.success(`Pulled image: ${image}`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to pull image: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an image exists locally
|
||||
*/
|
||||
public async imageExists(image: string): Promise<boolean> {
|
||||
try {
|
||||
await execAsync(`docker image inspect ${image}`, { timeout: 5000 });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List running containers
|
||||
*/
|
||||
public async listContainers(all: boolean = false): Promise<string[]> {
|
||||
try {
|
||||
const flag = all ? '-a' : '';
|
||||
const { stdout } = await execAsync(
|
||||
`docker ps ${flag} --format "{{.ID}}"`,
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
return stdout.trim().split('\n').filter((id) => id);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get container by name
|
||||
*/
|
||||
public async getContainerByName(name: string): Promise<string | null> {
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
`docker ps -a --filter "name=${name}" --format "{{.ID}}"`,
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
const id = stdout.trim();
|
||||
return id || null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add user to docker group
|
||||
*/
|
||||
public async addUserToDockerGroup(username?: string): Promise<boolean> {
|
||||
try {
|
||||
const user = username || process.env.SUDO_USER || process.env.USER || '';
|
||||
if (!user) {
|
||||
logger.warn('Could not determine username for docker group');
|
||||
return false;
|
||||
}
|
||||
|
||||
await execAsync(`usermod -aG docker ${user}`);
|
||||
logger.success(`Added user '${user}' to docker group`);
|
||||
logger.info('Log out and log back in for the change to take effect');
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to add user to docker group: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Linux distribution info
|
||||
*/
|
||||
private async getLinuxDistro(): Promise<{ id: string; version: string }> {
|
||||
try {
|
||||
const content = await fs.promises.readFile('/etc/os-release', 'utf8');
|
||||
const idMatch = content.match(/^ID=["']?(\w+)["']?$/m);
|
||||
const versionMatch = content.match(/^VERSION_ID=["']?([\d.]+)["']?$/m);
|
||||
|
||||
return {
|
||||
id: idMatch ? idMatch[1].toLowerCase() : 'unknown',
|
||||
version: versionMatch ? versionMatch[1] : '',
|
||||
};
|
||||
} catch {
|
||||
return { id: 'unknown', version: '' };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Print Docker status
|
||||
*/
|
||||
public async printStatus(): Promise<void> {
|
||||
const status = await this.getStatus();
|
||||
|
||||
const lines: string[] = [];
|
||||
lines.push(`Installed: ${status.installed ? 'Yes' : 'No'}`);
|
||||
|
||||
if (status.installed) {
|
||||
lines.push(`Version: ${status.version || 'Unknown'}`);
|
||||
lines.push(`Running: ${status.running ? 'Yes' : 'No'}`);
|
||||
|
||||
if (status.running) {
|
||||
lines.push(`NVIDIA Runtime: ${status.hasNvidiaRuntime ? 'Yes' : 'No'}`);
|
||||
lines.push(`ModelGrid Network: ${status.networkExists ? 'Yes' : 'No'}`);
|
||||
if (status.storageDriver) {
|
||||
lines.push(`Storage Driver: ${status.storageDriver}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger.logBox(
|
||||
'Docker Status',
|
||||
lines,
|
||||
50,
|
||||
status.installed && status.running ? 'success' : status.installed ? 'warning' : 'error',
|
||||
);
|
||||
}
|
||||
}
|
||||
8
ts/docker/index.ts
Normal file
8
ts/docker/index.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
/**
|
||||
* Docker Management Module
|
||||
*
|
||||
* Exports all Docker-related functionality.
|
||||
*/
|
||||
|
||||
export { DockerManager } from './docker-manager.ts';
|
||||
export { ContainerRuntime } from './container-runtime.ts';
|
||||
281
ts/drivers/amd.ts
Normal file
281
ts/drivers/amd.ts
Normal file
@@ -0,0 +1,281 @@
|
||||
/**
|
||||
* AMD Driver Management
|
||||
*
|
||||
* Handles AMD ROCm driver detection, installation, and container setup.
|
||||
*/
|
||||
|
||||
import type { IDriverStatus } from '../interfaces/gpu.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { BaseDriver, type IDriverInstallOptions } from './base-driver.ts';
|
||||
|
||||
/**
|
||||
* AMD ROCm Driver Manager
|
||||
*/
|
||||
export class AmdDriver extends BaseDriver {
|
||||
public readonly vendor = 'amd' as const;
|
||||
public readonly displayName = 'AMD ROCm';
|
||||
|
||||
/**
|
||||
* Check if AMD ROCm driver is installed
|
||||
*/
|
||||
public async isInstalled(): Promise<boolean> {
|
||||
try {
|
||||
const { stdout } = await this.execCommand('rocm-smi --showdriverversion 2>/dev/null | head -1', {
|
||||
timeout: 5000,
|
||||
ignoreErrors: true,
|
||||
});
|
||||
return stdout.includes('Driver');
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get AMD ROCm driver status
|
||||
*/
|
||||
public async getStatus(): Promise<IDriverStatus> {
|
||||
const status: IDriverStatus = {
|
||||
vendor: 'amd',
|
||||
installed: false,
|
||||
containerSupport: false,
|
||||
issues: [],
|
||||
};
|
||||
|
||||
// Check if rocm-smi is available
|
||||
try {
|
||||
const { stdout: driverInfo } = await this.execCommand(
|
||||
'rocm-smi --showdriverversion 2>/dev/null',
|
||||
{ timeout: 5000, ignoreErrors: true },
|
||||
);
|
||||
|
||||
if (driverInfo.includes('Driver')) {
|
||||
status.installed = true;
|
||||
const match = driverInfo.match(/Driver version:\s*(\S+)/i);
|
||||
if (match) {
|
||||
status.version = match[1];
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
status.issues.push('ROCm driver not installed or rocm-smi not available');
|
||||
return status;
|
||||
}
|
||||
|
||||
// Check ROCm toolkit version
|
||||
try {
|
||||
const { stdout: rocmVersion } = await this.execCommand(
|
||||
'cat /opt/rocm/.info/version 2>/dev/null || rocminfo 2>/dev/null | grep "ROCm" | head -1',
|
||||
{ timeout: 5000, ignoreErrors: true },
|
||||
);
|
||||
const match = rocmVersion.match(/(\d+\.\d+(?:\.\d+)?)/);
|
||||
if (match) {
|
||||
status.toolkitVersion = match[1];
|
||||
}
|
||||
} catch {
|
||||
// ROCm toolkit version not available
|
||||
}
|
||||
|
||||
// Check Docker ROCm support
|
||||
try {
|
||||
const { stdout: dockerInfo } = await this.execCommand(
|
||||
'docker info 2>/dev/null | grep -i "rocm\\|amd"',
|
||||
{ timeout: 5000, ignoreErrors: true },
|
||||
);
|
||||
|
||||
// Check if rocm/pytorch or similar images can run
|
||||
const { stdout: deviceCheck } = await this.execCommand(
|
||||
'ls /dev/kfd /dev/dri/render* 2>/dev/null',
|
||||
{ timeout: 5000, ignoreErrors: true },
|
||||
);
|
||||
|
||||
if (deviceCheck.includes('/dev/kfd') || dockerInfo.includes('rocm')) {
|
||||
status.containerSupport = true;
|
||||
} else {
|
||||
status.issues.push('ROCm device files not available for container access');
|
||||
}
|
||||
} catch {
|
||||
status.issues.push('Could not verify Docker ROCm support');
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Install AMD ROCm driver
|
||||
*/
|
||||
public async install(options: IDriverInstallOptions): Promise<boolean> {
|
||||
if (!await this.isRoot()) {
|
||||
logger.error('Root privileges required to install AMD ROCm drivers');
|
||||
return false;
|
||||
}
|
||||
|
||||
const distro = await this.getLinuxDistro();
|
||||
logger.info(`Detected Linux distribution: ${distro.id} ${distro.version}`);
|
||||
|
||||
try {
|
||||
if (distro.id === 'ubuntu') {
|
||||
return await this.installOnUbuntu(options);
|
||||
} else if (distro.id === 'rhel' || distro.id === 'centos' || distro.id === 'rocky' || distro.id === 'almalinux') {
|
||||
return await this.installOnRhel(options);
|
||||
} else {
|
||||
logger.error(`Unsupported distribution: ${distro.id}`);
|
||||
logger.info('Please install ROCm drivers manually from https://rocm.docs.amd.com/');
|
||||
return false;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Failed to install AMD ROCm drivers: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Install on Ubuntu
|
||||
*/
|
||||
private async installOnUbuntu(options: IDriverInstallOptions): Promise<boolean> {
|
||||
logger.info('Installing AMD ROCm on Ubuntu...');
|
||||
|
||||
// Install prerequisites
|
||||
await this.aptUpdate();
|
||||
await this.aptInstall(['wget', 'gnupg2']);
|
||||
|
||||
// Add ROCm repository
|
||||
const rocmVersion = options.toolkitVersion || '6.0';
|
||||
const ubuntuVersion = (await this.getLinuxDistro()).version.replace('.', '');
|
||||
|
||||
// Download and install ROCm repository
|
||||
await this.execCommand(
|
||||
`wget -q https://repo.radeon.com/rocm/rocm.gpg.key -O - | apt-key add -`,
|
||||
);
|
||||
|
||||
await this.execCommand(
|
||||
`echo "deb [arch=amd64] https://repo.radeon.com/rocm/apt/${rocmVersion} ubuntu main" > /etc/apt/sources.list.d/rocm.list`,
|
||||
);
|
||||
|
||||
// Add AMDGPU repository
|
||||
await this.execCommand(
|
||||
`echo "deb [arch=amd64] https://repo.radeon.com/amdgpu/${rocmVersion}/ubuntu ${ubuntuVersion === '2204' ? 'jammy' : 'focal'} main" > /etc/apt/sources.list.d/amdgpu.list`,
|
||||
);
|
||||
|
||||
await this.aptUpdate();
|
||||
|
||||
// Install AMDGPU driver and ROCm
|
||||
await this.aptInstall('amdgpu-dkms');
|
||||
|
||||
if (options.installToolkit) {
|
||||
await this.aptInstall('rocm-hip-sdk');
|
||||
} else {
|
||||
await this.aptInstall('rocm-smi-lib');
|
||||
}
|
||||
|
||||
// Add user to video and render groups
|
||||
await this.execCommand('usermod -a -G video,render $SUDO_USER || true');
|
||||
|
||||
// Install container support if requested
|
||||
if (options.installContainerSupport) {
|
||||
await this.installContainerSupport();
|
||||
}
|
||||
|
||||
logger.success('AMD ROCm installation completed');
|
||||
logger.warn('A system reboot is required to load the new driver');
|
||||
logger.info('After reboot, verify with: rocm-smi');
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Install on RHEL
|
||||
*/
|
||||
private async installOnRhel(options: IDriverInstallOptions): Promise<boolean> {
|
||||
logger.info('Installing AMD ROCm on RHEL/CentOS...');
|
||||
|
||||
const rocmVersion = options.toolkitVersion || '6.0';
|
||||
const distro = await this.getLinuxDistro();
|
||||
const rhelVersion = distro.version.split('.')[0];
|
||||
|
||||
// Add EPEL repository
|
||||
await this.dnfInstall('epel-release');
|
||||
|
||||
// Add ROCm repository
|
||||
await this.execCommand(
|
||||
`cat <<EOF > /etc/yum.repos.d/rocm.repo
|
||||
[ROCm]
|
||||
name=ROCm
|
||||
baseurl=https://repo.radeon.com/rocm/yum/${rocmVersion}/main
|
||||
enabled=1
|
||||
gpgcheck=1
|
||||
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
|
||||
EOF`,
|
||||
);
|
||||
|
||||
// Add AMDGPU repository
|
||||
await this.execCommand(
|
||||
`cat <<EOF > /etc/yum.repos.d/amdgpu.repo
|
||||
[amdgpu]
|
||||
name=amdgpu
|
||||
baseurl=https://repo.radeon.com/amdgpu/${rocmVersion}/rhel/${rhelVersion}/main/x86_64/
|
||||
enabled=1
|
||||
gpgcheck=1
|
||||
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
|
||||
EOF`,
|
||||
);
|
||||
|
||||
// Install AMDGPU driver
|
||||
await this.dnfInstall('amdgpu-dkms');
|
||||
|
||||
if (options.installToolkit) {
|
||||
await this.dnfInstall('rocm-hip-sdk');
|
||||
} else {
|
||||
await this.dnfInstall('rocm-smi-lib');
|
||||
}
|
||||
|
||||
// Add user to video and render groups
|
||||
await this.execCommand('usermod -a -G video,render $SUDO_USER || true');
|
||||
|
||||
// Install container support if requested
|
||||
if (options.installContainerSupport) {
|
||||
await this.installContainerSupport();
|
||||
}
|
||||
|
||||
logger.success('AMD ROCm installation completed');
|
||||
logger.warn('A system reboot is required to load the new driver');
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Install container support for AMD GPUs
|
||||
*/
|
||||
public async installContainerSupport(): Promise<boolean> {
|
||||
logger.info('Configuring Docker for AMD ROCm...');
|
||||
|
||||
try {
|
||||
// AMD ROCm containers work by passing through device files
|
||||
// No special runtime needed, just need to pass --device flags
|
||||
|
||||
// Verify device files exist
|
||||
const { stdout: devices } = await this.execCommand('ls -la /dev/kfd /dev/dri/render* 2>/dev/null || true');
|
||||
|
||||
if (!devices.includes('/dev/kfd')) {
|
||||
logger.warn('/dev/kfd not found. ROCm driver may not be properly loaded.');
|
||||
logger.info('Try rebooting the system after driver installation.');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set permissions
|
||||
await this.execCommand('chmod 666 /dev/kfd /dev/dri/render* || true');
|
||||
|
||||
logger.success('AMD ROCm container support configured');
|
||||
logger.info('Use the following Docker flags for ROCm containers:');
|
||||
logger.info(' --device=/dev/kfd --device=/dev/dri --group-add video');
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to configure ROCm container support: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available ROCm versions
|
||||
*/
|
||||
public async getAvailableVersions(): Promise<string[]> {
|
||||
// ROCm has a standard set of supported versions
|
||||
return ['6.0', '5.7', '5.6', '5.5', '5.4'];
|
||||
}
|
||||
}
|
||||
217
ts/drivers/base-driver.ts
Normal file
217
ts/drivers/base-driver.ts
Normal file
@@ -0,0 +1,217 @@
|
||||
/**
|
||||
* Base Driver Class
|
||||
*
|
||||
* Abstract base class for GPU driver management.
|
||||
*/
|
||||
|
||||
import { exec } from 'node:child_process';
|
||||
import { promisify } from 'node:util';
|
||||
import type { IDriverStatus, TGpuVendor } from '../interfaces/gpu.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
/**
|
||||
* Options for driver installation
|
||||
*/
|
||||
export interface IDriverInstallOptions {
|
||||
/** Whether to install the GPU toolkit (CUDA, ROCm, oneAPI) */
|
||||
installToolkit: boolean;
|
||||
/** Whether to install container support (nvidia-docker, etc.) */
|
||||
installContainerSupport: boolean;
|
||||
/** Specific driver version to install (optional) */
|
||||
driverVersion?: string;
|
||||
/** Specific toolkit version to install (optional) */
|
||||
toolkitVersion?: string;
|
||||
/** Whether to run non-interactively */
|
||||
nonInteractive: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Abstract base class for GPU drivers
|
||||
*/
|
||||
export abstract class BaseDriver {
|
||||
/** GPU vendor this driver supports */
|
||||
public abstract readonly vendor: TGpuVendor;
|
||||
|
||||
/** Display name for this driver */
|
||||
public abstract readonly displayName: string;
|
||||
|
||||
/**
|
||||
* Check if the driver is installed
|
||||
*/
|
||||
public abstract isInstalled(): Promise<boolean>;
|
||||
|
||||
/**
|
||||
* Get the current driver status
|
||||
*/
|
||||
public abstract getStatus(): Promise<IDriverStatus>;
|
||||
|
||||
/**
|
||||
* Install the driver
|
||||
*/
|
||||
public abstract install(options: IDriverInstallOptions): Promise<boolean>;
|
||||
|
||||
/**
|
||||
* Install container runtime support (e.g., nvidia-docker)
|
||||
*/
|
||||
public abstract installContainerSupport(): Promise<boolean>;
|
||||
|
||||
/**
|
||||
* Get available driver versions
|
||||
*/
|
||||
public abstract getAvailableVersions(): Promise<string[]>;
|
||||
|
||||
/**
|
||||
* Execute a shell command with error handling
|
||||
*/
|
||||
protected async execCommand(
|
||||
command: string,
|
||||
options: { timeout?: number; ignoreErrors?: boolean } = {},
|
||||
): Promise<{ stdout: string; stderr: string }> {
|
||||
const { timeout = 30000, ignoreErrors = false } = options;
|
||||
|
||||
try {
|
||||
const result = await execAsync(command, { timeout });
|
||||
return { stdout: result.stdout, stderr: result.stderr };
|
||||
} catch (error) {
|
||||
if (ignoreErrors) {
|
||||
return { stdout: '', stderr: String(error) };
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if running as root
|
||||
*/
|
||||
protected async isRoot(): Promise<boolean> {
|
||||
try {
|
||||
const { stdout } = await this.execCommand('id -u');
|
||||
return stdout.trim() === '0';
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Linux distribution
|
||||
*/
|
||||
protected async getLinuxDistro(): Promise<{ id: string; version: string }> {
|
||||
try {
|
||||
const { stdout } = await this.execCommand('cat /etc/os-release', { ignoreErrors: true });
|
||||
|
||||
const idMatch = stdout.match(/^ID=["']?(\w+)["']?$/m);
|
||||
const versionMatch = stdout.match(/^VERSION_ID=["']?([\d.]+)["']?$/m);
|
||||
|
||||
return {
|
||||
id: idMatch ? idMatch[1].toLowerCase() : 'unknown',
|
||||
version: versionMatch ? versionMatch[1] : '',
|
||||
};
|
||||
} catch {
|
||||
return { id: 'unknown', version: '' };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a package is installed (apt-based)
|
||||
*/
|
||||
protected async isAptPackageInstalled(packageName: string): Promise<boolean> {
|
||||
try {
|
||||
const { stdout } = await this.execCommand(`dpkg -l ${packageName} 2>/dev/null | grep "^ii"`, {
|
||||
ignoreErrors: true,
|
||||
});
|
||||
return stdout.includes(packageName);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a package is installed (dnf/yum-based)
|
||||
*/
|
||||
protected async isDnfPackageInstalled(packageName: string): Promise<boolean> {
|
||||
try {
|
||||
const { stdout } = await this.execCommand(`rpm -q ${packageName} 2>/dev/null`, {
|
||||
ignoreErrors: true,
|
||||
});
|
||||
return !stdout.includes('not installed');
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run apt-get update
|
||||
*/
|
||||
protected async aptUpdate(): Promise<void> {
|
||||
logger.info('Updating package lists...');
|
||||
await this.execCommand('apt-get update', { timeout: 120000 });
|
||||
}
|
||||
|
||||
/**
|
||||
* Install a package using apt
|
||||
*/
|
||||
protected async aptInstall(packages: string | string[]): Promise<void> {
|
||||
const pkgList = Array.isArray(packages) ? packages.join(' ') : packages;
|
||||
logger.info(`Installing packages: ${pkgList}`);
|
||||
await this.execCommand(`DEBIAN_FRONTEND=noninteractive apt-get install -y ${pkgList}`, {
|
||||
timeout: 600000, // 10 minutes for large packages
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Install a package using dnf
|
||||
*/
|
||||
protected async dnfInstall(packages: string | string[]): Promise<void> {
|
||||
const pkgList = Array.isArray(packages) ? packages.join(' ') : packages;
|
||||
logger.info(`Installing packages: ${pkgList}`);
|
||||
await this.execCommand(`dnf install -y ${pkgList}`, {
|
||||
timeout: 600000,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an apt repository
|
||||
*/
|
||||
protected async addAptRepository(repo: string, keyUrl?: string): Promise<void> {
|
||||
if (keyUrl) {
|
||||
// Add GPG key
|
||||
await this.execCommand(`curl -fsSL ${keyUrl} | gpg --dearmor -o /usr/share/keyrings/$(basename ${keyUrl}).gpg`);
|
||||
}
|
||||
await this.execCommand(`add-apt-repository -y "${repo}"`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Log driver status summary
|
||||
*/
|
||||
public async logStatus(): Promise<void> {
|
||||
const status = await this.getStatus();
|
||||
|
||||
logger.logBoxTitle(`${this.displayName} Driver Status`, 60, status.installed ? 'success' : 'warning');
|
||||
logger.logBoxLine(`Installed: ${status.installed ? 'Yes' : 'No'}`);
|
||||
|
||||
if (status.installed) {
|
||||
if (status.version) {
|
||||
logger.logBoxLine(`Driver Version: ${status.version}`);
|
||||
}
|
||||
if (status.toolkitVersion) {
|
||||
logger.logBoxLine(`Toolkit Version: ${status.toolkitVersion}`);
|
||||
}
|
||||
logger.logBoxLine(`Container Support: ${status.containerSupport ? 'Yes' : 'No'}`);
|
||||
if (status.containerRuntimeVersion) {
|
||||
logger.logBoxLine(`Container Runtime: ${status.containerRuntimeVersion}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (status.issues.length > 0) {
|
||||
logger.logBoxLine('');
|
||||
logger.logBoxLine('Issues:');
|
||||
for (const issue of status.issues) {
|
||||
logger.logBoxLine(` - ${issue}`);
|
||||
}
|
||||
}
|
||||
|
||||
logger.logBoxEnd();
|
||||
}
|
||||
}
|
||||
267
ts/drivers/driver-manager.ts
Normal file
267
ts/drivers/driver-manager.ts
Normal file
@@ -0,0 +1,267 @@
|
||||
/**
|
||||
* Driver Manager
|
||||
*
|
||||
* Coordinates detection and installation of GPU drivers across all vendors.
|
||||
*/
|
||||
|
||||
import type { IDriverStatus, TGpuVendor } from '../interfaces/gpu.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { GpuDetector } from '../hardware/gpu-detector.ts';
|
||||
import { BaseDriver, type IDriverInstallOptions } from './base-driver.ts';
|
||||
import { NvidiaDriver } from './nvidia.ts';
|
||||
import { AmdDriver } from './amd.ts';
|
||||
import { IntelDriver } from './intel.ts';
|
||||
|
||||
/**
|
||||
* Driver Manager - coordinates GPU driver management
|
||||
*/
|
||||
export class DriverManager {
|
||||
private gpuDetector: GpuDetector;
|
||||
private drivers: Map<TGpuVendor, BaseDriver>;
|
||||
|
||||
constructor() {
|
||||
this.gpuDetector = new GpuDetector();
|
||||
this.drivers = new Map([
|
||||
['nvidia', new NvidiaDriver()],
|
||||
['amd', new AmdDriver()],
|
||||
['intel', new IntelDriver()],
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get driver manager for a specific vendor
|
||||
*/
|
||||
public getDriver(vendor: TGpuVendor): BaseDriver | undefined {
|
||||
return this.drivers.get(vendor);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get status of all GPU drivers
|
||||
*/
|
||||
public async getAllDriverStatus(): Promise<Map<TGpuVendor, IDriverStatus>> {
|
||||
const statuses = new Map<TGpuVendor, IDriverStatus>();
|
||||
|
||||
// Only check drivers for detected GPUs
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
const detectedVendors = new Set(gpus.map((g) => g.vendor));
|
||||
|
||||
for (const vendor of detectedVendors) {
|
||||
if (vendor === 'unknown') continue;
|
||||
|
||||
const driver = this.drivers.get(vendor);
|
||||
if (driver) {
|
||||
const status = await driver.getStatus();
|
||||
statuses.set(vendor, status);
|
||||
}
|
||||
}
|
||||
|
||||
return statuses;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check drivers for all detected GPUs
|
||||
*/
|
||||
public async checkAllDrivers(): Promise<{
|
||||
allInstalled: boolean;
|
||||
allContainerReady: boolean;
|
||||
issues: string[];
|
||||
}> {
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
const issues: string[] = [];
|
||||
let allInstalled = true;
|
||||
let allContainerReady = true;
|
||||
|
||||
if (gpus.length === 0) {
|
||||
issues.push('No GPUs detected');
|
||||
return { allInstalled: false, allContainerReady: false, issues };
|
||||
}
|
||||
|
||||
// Group GPUs by vendor
|
||||
const vendorCounts = new Map<TGpuVendor, number>();
|
||||
for (const gpu of gpus) {
|
||||
vendorCounts.set(gpu.vendor, (vendorCounts.get(gpu.vendor) || 0) + 1);
|
||||
}
|
||||
|
||||
// Check each vendor
|
||||
for (const [vendor, count] of vendorCounts) {
|
||||
if (vendor === 'unknown') {
|
||||
issues.push(`${count} GPU(s) with unknown vendor - cannot manage drivers`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const driver = this.drivers.get(vendor);
|
||||
if (!driver) {
|
||||
issues.push(`No driver manager for ${vendor}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const status = await driver.getStatus();
|
||||
|
||||
if (!status.installed) {
|
||||
allInstalled = false;
|
||||
issues.push(`${driver.displayName} driver not installed for ${count} GPU(s)`);
|
||||
}
|
||||
|
||||
if (!status.containerSupport) {
|
||||
allContainerReady = false;
|
||||
issues.push(`${driver.displayName} container support not configured`);
|
||||
}
|
||||
|
||||
// Add specific issues
|
||||
issues.push(...status.issues);
|
||||
}
|
||||
|
||||
return { allInstalled, allContainerReady, issues };
|
||||
}
|
||||
|
||||
/**
|
||||
* Install drivers for all detected GPUs
|
||||
*/
|
||||
public async installAllDrivers(options: Partial<IDriverInstallOptions> = {}): Promise<boolean> {
|
||||
const fullOptions: IDriverInstallOptions = {
|
||||
installToolkit: options.installToolkit ?? true,
|
||||
installContainerSupport: options.installContainerSupport ?? true,
|
||||
nonInteractive: options.nonInteractive ?? false,
|
||||
driverVersion: options.driverVersion,
|
||||
toolkitVersion: options.toolkitVersion,
|
||||
};
|
||||
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
const vendors = new Set(gpus.map((g) => g.vendor).filter((v) => v !== 'unknown'));
|
||||
|
||||
if (vendors.size === 0) {
|
||||
logger.error('No supported GPUs detected');
|
||||
return false;
|
||||
}
|
||||
|
||||
let allSuccess = true;
|
||||
|
||||
for (const vendor of vendors) {
|
||||
const driver = this.drivers.get(vendor);
|
||||
if (!driver) continue;
|
||||
|
||||
logger.info(`Installing ${driver.displayName} drivers...`);
|
||||
|
||||
const success = await driver.install(fullOptions);
|
||||
if (!success) {
|
||||
allSuccess = false;
|
||||
logger.error(`Failed to install ${driver.displayName} drivers`);
|
||||
}
|
||||
}
|
||||
|
||||
return allSuccess;
|
||||
}
|
||||
|
||||
/**
|
||||
* Install container support for all GPUs
|
||||
*/
|
||||
public async installContainerSupport(): Promise<boolean> {
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
const vendors = new Set(gpus.map((g) => g.vendor).filter((v) => v !== 'unknown'));
|
||||
|
||||
let allSuccess = true;
|
||||
|
||||
for (const vendor of vendors) {
|
||||
const driver = this.drivers.get(vendor);
|
||||
if (!driver) continue;
|
||||
|
||||
const success = await driver.installContainerSupport();
|
||||
if (!success) {
|
||||
allSuccess = false;
|
||||
}
|
||||
}
|
||||
|
||||
return allSuccess;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print driver status summary
|
||||
*/
|
||||
public async printDriverStatus(): Promise<void> {
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
|
||||
if (gpus.length === 0) {
|
||||
logger.logBox('Driver Status', ['No GPUs detected'], 50, 'warning');
|
||||
return;
|
||||
}
|
||||
|
||||
// Group by vendor
|
||||
const vendorGpus = new Map<TGpuVendor, typeof gpus>();
|
||||
for (const gpu of gpus) {
|
||||
if (!vendorGpus.has(gpu.vendor)) {
|
||||
vendorGpus.set(gpu.vendor, []);
|
||||
}
|
||||
vendorGpus.get(gpu.vendor)!.push(gpu);
|
||||
}
|
||||
|
||||
// Print status for each vendor
|
||||
for (const [vendor, gpuList] of vendorGpus) {
|
||||
if (vendor === 'unknown') {
|
||||
logger.logBox('Unknown GPUs', [
|
||||
`${gpuList.length} GPU(s) with unknown vendor`,
|
||||
'Manual driver installation may be required',
|
||||
], 50, 'warning');
|
||||
continue;
|
||||
}
|
||||
|
||||
const driver = this.drivers.get(vendor);
|
||||
if (driver) {
|
||||
await driver.logStatus();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Docker run arguments for GPU support
|
||||
*/
|
||||
public async getDockerGpuArgs(gpuIds?: string[]): Promise<string[]> {
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
const args: string[] = [];
|
||||
|
||||
// Filter to specific GPUs if provided
|
||||
const targetGpus = gpuIds
|
||||
? gpus.filter((g) => gpuIds.includes(g.id))
|
||||
: gpus;
|
||||
|
||||
if (targetGpus.length === 0) {
|
||||
return args;
|
||||
}
|
||||
|
||||
// Determine vendor (assume single vendor for simplicity)
|
||||
const vendor = targetGpus[0].vendor;
|
||||
|
||||
switch (vendor) {
|
||||
case 'nvidia':
|
||||
// NVIDIA uses nvidia-docker runtime
|
||||
args.push('--runtime=nvidia');
|
||||
if (gpuIds && gpuIds.length > 0) {
|
||||
// Use specific GPU indices
|
||||
const indices = targetGpus.map((g) => g.index).join(',');
|
||||
args.push(`--gpus="device=${indices}"`);
|
||||
} else {
|
||||
args.push('--gpus=all');
|
||||
}
|
||||
break;
|
||||
|
||||
case 'amd':
|
||||
// AMD uses device passthrough
|
||||
args.push('--device=/dev/kfd');
|
||||
for (const gpu of targetGpus) {
|
||||
args.push(`--device=/dev/dri/renderD${128 + gpu.index}`);
|
||||
}
|
||||
args.push('--group-add=video');
|
||||
args.push('--security-opt=seccomp=unconfined');
|
||||
break;
|
||||
|
||||
case 'intel':
|
||||
// Intel uses device passthrough
|
||||
for (const gpu of targetGpus) {
|
||||
args.push(`--device=/dev/dri/renderD${128 + gpu.index}`);
|
||||
}
|
||||
args.push('--group-add=render');
|
||||
break;
|
||||
}
|
||||
|
||||
return args;
|
||||
}
|
||||
}
|
||||
11
ts/drivers/index.ts
Normal file
11
ts/drivers/index.ts
Normal file
@@ -0,0 +1,11 @@
|
||||
/**
|
||||
* Driver Management Module
|
||||
*
|
||||
* Exports all driver detection and installation functionality.
|
||||
*/
|
||||
|
||||
export { BaseDriver, type IDriverInstallOptions } from './base-driver.ts';
|
||||
export { NvidiaDriver } from './nvidia.ts';
|
||||
export { AmdDriver } from './amd.ts';
|
||||
export { IntelDriver } from './intel.ts';
|
||||
export { DriverManager } from './driver-manager.ts';
|
||||
339
ts/drivers/intel.ts
Normal file
339
ts/drivers/intel.ts
Normal file
@@ -0,0 +1,339 @@
|
||||
/**
|
||||
* Intel Driver Management
|
||||
*
|
||||
* Handles Intel Arc GPU driver detection, installation, and oneAPI setup.
|
||||
*/
|
||||
|
||||
import type { IDriverStatus } from '../interfaces/gpu.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { BaseDriver, type IDriverInstallOptions } from './base-driver.ts';
|
||||
|
||||
/**
|
||||
* Intel Arc/oneAPI Driver Manager
|
||||
*/
|
||||
export class IntelDriver extends BaseDriver {
|
||||
public readonly vendor = 'intel' as const;
|
||||
public readonly displayName = 'Intel Arc';
|
||||
|
||||
/**
|
||||
* Check if Intel GPU driver is installed
|
||||
*/
|
||||
public async isInstalled(): Promise<boolean> {
|
||||
try {
|
||||
// Check for xpu-smi or intel_gpu_top
|
||||
const { stdout } = await this.execCommand(
|
||||
'xpu-smi discovery 2>/dev/null || intel_gpu_top -l 2>/dev/null || ls /dev/dri/renderD* 2>/dev/null | grep -c renderD',
|
||||
{ timeout: 5000, ignoreErrors: true },
|
||||
);
|
||||
return stdout.trim().length > 0 && !stdout.includes('not found');
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Intel GPU driver status
|
||||
*/
|
||||
public async getStatus(): Promise<IDriverStatus> {
|
||||
const status: IDriverStatus = {
|
||||
vendor: 'intel',
|
||||
installed: false,
|
||||
containerSupport: false,
|
||||
issues: [],
|
||||
};
|
||||
|
||||
// Check for i915 driver (Intel integrated/Arc)
|
||||
try {
|
||||
const { stdout: driverInfo } = await this.execCommand(
|
||||
'modinfo i915 2>/dev/null | grep "^version:"',
|
||||
{ timeout: 5000, ignoreErrors: true },
|
||||
);
|
||||
|
||||
if (driverInfo.includes('version')) {
|
||||
status.installed = true;
|
||||
const match = driverInfo.match(/version:\s*(\S+)/i);
|
||||
if (match) {
|
||||
status.version = match[1];
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// i915 module info not available
|
||||
}
|
||||
|
||||
// Check for xpu-smi (Intel Arc specific)
|
||||
try {
|
||||
const { stdout: xpuVersion } = await this.execCommand(
|
||||
'xpu-smi --version 2>/dev/null',
|
||||
{ timeout: 5000, ignoreErrors: true },
|
||||
);
|
||||
|
||||
if (xpuVersion.includes('xpu-smi')) {
|
||||
status.installed = true;
|
||||
const match = xpuVersion.match(/(\d+\.\d+(?:\.\d+)?)/);
|
||||
if (match) {
|
||||
status.version = match[1];
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// xpu-smi not available
|
||||
}
|
||||
|
||||
// Check oneAPI toolkit
|
||||
try {
|
||||
const { stdout: oneApiVersion } = await this.execCommand(
|
||||
'ls /opt/intel/oneapi/compiler/*/env/vars.sh 2>/dev/null | head -1 | xargs dirname | xargs dirname | xargs basename',
|
||||
{ timeout: 5000, ignoreErrors: true },
|
||||
);
|
||||
|
||||
if (oneApiVersion.trim()) {
|
||||
status.toolkitVersion = oneApiVersion.trim();
|
||||
}
|
||||
} catch {
|
||||
// oneAPI not installed
|
||||
}
|
||||
|
||||
// Check container support
|
||||
try {
|
||||
const { stdout: renderDevices } = await this.execCommand(
|
||||
'ls /dev/dri/renderD* 2>/dev/null',
|
||||
{ timeout: 5000, ignoreErrors: true },
|
||||
);
|
||||
|
||||
if (renderDevices.includes('renderD')) {
|
||||
status.containerSupport = true;
|
||||
} else {
|
||||
status.issues.push('Intel GPU render devices not available');
|
||||
}
|
||||
} catch {
|
||||
status.issues.push('Could not check Intel GPU device availability');
|
||||
}
|
||||
|
||||
if (!status.installed) {
|
||||
status.issues.push('Intel GPU driver not detected');
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Install Intel GPU drivers and optionally oneAPI
|
||||
*/
|
||||
public async install(options: IDriverInstallOptions): Promise<boolean> {
|
||||
if (!await this.isRoot()) {
|
||||
logger.error('Root privileges required to install Intel GPU drivers');
|
||||
return false;
|
||||
}
|
||||
|
||||
const distro = await this.getLinuxDistro();
|
||||
logger.info(`Detected Linux distribution: ${distro.id} ${distro.version}`);
|
||||
|
||||
try {
|
||||
if (distro.id === 'ubuntu') {
|
||||
return await this.installOnUbuntu(options);
|
||||
} else if (distro.id === 'fedora') {
|
||||
return await this.installOnFedora(options);
|
||||
} else {
|
||||
logger.error(`Unsupported distribution for Intel Arc: ${distro.id}`);
|
||||
logger.info('Please install Intel drivers manually from https://dgpu-docs.intel.com/');
|
||||
return false;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Failed to install Intel drivers: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Install on Ubuntu
|
||||
*/
|
||||
private async installOnUbuntu(options: IDriverInstallOptions): Promise<boolean> {
|
||||
logger.info('Installing Intel GPU drivers on Ubuntu...');
|
||||
|
||||
// Install prerequisites
|
||||
await this.aptUpdate();
|
||||
await this.aptInstall(['wget', 'gpg']);
|
||||
|
||||
// Add Intel graphics repository
|
||||
await this.execCommand(
|
||||
'wget -qO - https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg',
|
||||
);
|
||||
|
||||
const distro = await this.getLinuxDistro();
|
||||
const ubuntuCodename = distro.version === '22.04' ? 'jammy' : distro.version === '24.04' ? 'noble' : 'jammy';
|
||||
|
||||
await this.execCommand(
|
||||
`echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu ${ubuntuCodename} arc" > /etc/apt/sources.list.d/intel-graphics.list`,
|
||||
);
|
||||
|
||||
await this.aptUpdate();
|
||||
|
||||
// Install Intel GPU packages
|
||||
await this.aptInstall([
|
||||
'intel-opencl-icd',
|
||||
'intel-level-zero-gpu',
|
||||
'level-zero',
|
||||
'intel-media-va-driver-non-free',
|
||||
'libmfx1',
|
||||
'libmfxgen1',
|
||||
'libvpl2',
|
||||
'libegl-mesa0',
|
||||
'libegl1-mesa',
|
||||
'libegl1-mesa-dev',
|
||||
'libgbm1',
|
||||
'libgl1-mesa-dev',
|
||||
'libgl1-mesa-dri',
|
||||
'libglapi-mesa',
|
||||
'libgles2-mesa-dev',
|
||||
'libglx-mesa0',
|
||||
'libigdgmm12',
|
||||
'libxatracker2',
|
||||
'mesa-va-drivers',
|
||||
'mesa-vdpau-drivers',
|
||||
'mesa-vulkan-drivers',
|
||||
'va-driver-all',
|
||||
]);
|
||||
|
||||
// Install xpu-smi for monitoring
|
||||
await this.aptInstall('xpu-smi');
|
||||
|
||||
// Install oneAPI toolkit if requested
|
||||
if (options.installToolkit) {
|
||||
await this.installOneApi();
|
||||
}
|
||||
|
||||
// Add user to video and render groups
|
||||
await this.execCommand('usermod -a -G video,render $SUDO_USER || true');
|
||||
|
||||
// Install container support if requested
|
||||
if (options.installContainerSupport) {
|
||||
await this.installContainerSupport();
|
||||
}
|
||||
|
||||
logger.success('Intel GPU driver installation completed');
|
||||
logger.info('Verify installation with: xpu-smi discovery');
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Install on Fedora
|
||||
*/
|
||||
private async installOnFedora(options: IDriverInstallOptions): Promise<boolean> {
|
||||
logger.info('Installing Intel GPU drivers on Fedora...');
|
||||
|
||||
// Intel GPU support is included in newer Fedora kernels
|
||||
// We just need to install the user-space components
|
||||
|
||||
await this.dnfInstall([
|
||||
'intel-media-driver',
|
||||
'libva-intel-driver',
|
||||
'intel-compute-runtime',
|
||||
'level-zero',
|
||||
'oneapi-level-zero',
|
||||
]);
|
||||
|
||||
// Try to install xpu-smi from Intel repo
|
||||
try {
|
||||
await this.execCommand(
|
||||
'dnf copr enable -y intel/oneapi || true',
|
||||
);
|
||||
await this.dnfInstall('xpu-smi');
|
||||
} catch {
|
||||
logger.warn('Could not install xpu-smi. Intel Arc monitoring may be limited.');
|
||||
}
|
||||
|
||||
// Add user to video and render groups
|
||||
await this.execCommand('usermod -a -G video,render $SUDO_USER || true');
|
||||
|
||||
// Install oneAPI if requested
|
||||
if (options.installToolkit) {
|
||||
await this.installOneApi();
|
||||
}
|
||||
|
||||
// Install container support if requested
|
||||
if (options.installContainerSupport) {
|
||||
await this.installContainerSupport();
|
||||
}
|
||||
|
||||
logger.success('Intel GPU driver installation completed');
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Install Intel oneAPI toolkit
|
||||
*/
|
||||
private async installOneApi(): Promise<void> {
|
||||
logger.info('Installing Intel oneAPI toolkit...');
|
||||
|
||||
const distro = await this.getLinuxDistro();
|
||||
|
||||
if (distro.id === 'ubuntu' || distro.id === 'debian') {
|
||||
// Add Intel oneAPI repository
|
||||
await this.execCommand(
|
||||
'wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null',
|
||||
);
|
||||
|
||||
await this.execCommand(
|
||||
'echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list',
|
||||
);
|
||||
|
||||
await this.aptUpdate();
|
||||
await this.aptInstall('intel-basekit');
|
||||
} else if (distro.id === 'fedora') {
|
||||
// Add Intel oneAPI repository
|
||||
await this.execCommand(
|
||||
`cat <<EOF > /etc/yum.repos.d/oneAPI.repo
|
||||
[oneAPI]
|
||||
name=Intel oneAPI repository
|
||||
baseurl=https://yum.repos.intel.com/oneapi
|
||||
enabled=1
|
||||
gpgcheck=1
|
||||
repo_gpgcheck=1
|
||||
gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||
EOF`,
|
||||
);
|
||||
|
||||
await this.dnfInstall('intel-basekit');
|
||||
}
|
||||
|
||||
logger.success('Intel oneAPI toolkit installed');
|
||||
logger.info('Source the environment with: source /opt/intel/oneapi/setvars.sh');
|
||||
}
|
||||
|
||||
/**
|
||||
* Install container support for Intel GPUs
|
||||
*/
|
||||
public async installContainerSupport(): Promise<boolean> {
|
||||
logger.info('Configuring Docker for Intel GPUs...');
|
||||
|
||||
try {
|
||||
// Intel GPUs work by passing through device files
|
||||
// Verify render devices exist
|
||||
const { stdout: devices } = await this.execCommand('ls -la /dev/dri/renderD* 2>/dev/null || true');
|
||||
|
||||
if (!devices.includes('renderD')) {
|
||||
logger.warn('/dev/dri/renderD* not found. Intel GPU driver may not be properly loaded.');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set permissions
|
||||
await this.execCommand('chmod 666 /dev/dri/renderD* || true');
|
||||
|
||||
logger.success('Intel GPU container support configured');
|
||||
logger.info('Use the following Docker flags for Intel GPU containers:');
|
||||
logger.info(' --device=/dev/dri --group-add render');
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to configure Intel container support: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available driver versions
|
||||
*/
|
||||
public async getAvailableVersions(): Promise<string[]> {
|
||||
// Intel Arc drivers are typically tied to kernel versions
|
||||
// Return oneAPI versions as reference
|
||||
return ['2024.0', '2023.2', '2023.1', '2023.0'];
|
||||
}
|
||||
}
|
||||
318
ts/drivers/nvidia.ts
Normal file
318
ts/drivers/nvidia.ts
Normal file
@@ -0,0 +1,318 @@
|
||||
/**
|
||||
* NVIDIA Driver Management
|
||||
*
|
||||
* Handles NVIDIA driver detection, installation, and container toolkit setup.
|
||||
*/
|
||||
|
||||
import type { IDriverStatus } from '../interfaces/gpu.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { BaseDriver, type IDriverInstallOptions } from './base-driver.ts';
|
||||
|
||||
/**
|
||||
* NVIDIA Driver Manager
|
||||
*/
|
||||
export class NvidiaDriver extends BaseDriver {
|
||||
public readonly vendor = 'nvidia' as const;
|
||||
public readonly displayName = 'NVIDIA';
|
||||
|
||||
/**
|
||||
* Check if the NVIDIA driver is installed
|
||||
*/
|
||||
public async isInstalled(): Promise<boolean> {
|
||||
try {
|
||||
const { stdout } = await this.execCommand('nvidia-smi --query-gpu=driver_version --format=csv,noheader', {
|
||||
timeout: 5000,
|
||||
ignoreErrors: true,
|
||||
});
|
||||
return stdout.trim().length > 0;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get NVIDIA driver status
|
||||
*/
|
||||
public async getStatus(): Promise<IDriverStatus> {
|
||||
const status: IDriverStatus = {
|
||||
vendor: 'nvidia',
|
||||
installed: false,
|
||||
containerSupport: false,
|
||||
issues: [],
|
||||
};
|
||||
|
||||
// Check if nvidia-smi is available
|
||||
try {
|
||||
const { stdout: driverVersion } = await this.execCommand(
|
||||
'nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1',
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
status.installed = true;
|
||||
status.version = driverVersion.trim();
|
||||
} catch {
|
||||
status.issues.push('NVIDIA driver not installed or nvidia-smi not available');
|
||||
return status;
|
||||
}
|
||||
|
||||
// Check CUDA toolkit
|
||||
try {
|
||||
const { stdout: cudaVersion } = await this.execCommand(
|
||||
'nvcc --version 2>/dev/null | grep "release" | sed "s/.*release \\([0-9.]*\\).*/\\1/"',
|
||||
{ timeout: 5000, ignoreErrors: true },
|
||||
);
|
||||
if (cudaVersion.trim()) {
|
||||
status.toolkitVersion = cudaVersion.trim();
|
||||
}
|
||||
} catch {
|
||||
// CUDA toolkit not installed
|
||||
}
|
||||
|
||||
// Check nvidia-container-toolkit
|
||||
try {
|
||||
const { stdout: containerVersion } = await this.execCommand(
|
||||
'nvidia-container-cli --version 2>&1 | head -1',
|
||||
{ timeout: 5000, ignoreErrors: true },
|
||||
);
|
||||
if (containerVersion.includes('version')) {
|
||||
status.containerSupport = true;
|
||||
const match = containerVersion.match(/version (\d+\.\d+\.\d+)/);
|
||||
if (match) {
|
||||
status.containerRuntimeVersion = match[1];
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
status.issues.push('NVIDIA Container Toolkit not installed');
|
||||
}
|
||||
|
||||
// Check if Docker has nvidia runtime
|
||||
try {
|
||||
const { stdout: dockerInfo } = await this.execCommand(
|
||||
'docker info --format "{{.Runtimes}}" 2>/dev/null',
|
||||
{ timeout: 5000, ignoreErrors: true },
|
||||
);
|
||||
if (!dockerInfo.includes('nvidia')) {
|
||||
status.issues.push('Docker nvidia runtime not configured');
|
||||
}
|
||||
} catch {
|
||||
// Docker check failed
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Install NVIDIA driver and optionally CUDA toolkit
|
||||
*/
|
||||
public async install(options: IDriverInstallOptions): Promise<boolean> {
|
||||
if (!await this.isRoot()) {
|
||||
logger.error('Root privileges required to install NVIDIA drivers');
|
||||
return false;
|
||||
}
|
||||
|
||||
const distro = await this.getLinuxDistro();
|
||||
logger.info(`Detected Linux distribution: ${distro.id} ${distro.version}`);
|
||||
|
||||
try {
|
||||
if (distro.id === 'ubuntu' || distro.id === 'debian') {
|
||||
return await this.installOnDebian(options);
|
||||
} else if (distro.id === 'fedora' || distro.id === 'rhel' || distro.id === 'centos' || distro.id === 'rocky' || distro.id === 'almalinux') {
|
||||
return await this.installOnRhel(options);
|
||||
} else {
|
||||
logger.error(`Unsupported distribution: ${distro.id}`);
|
||||
logger.info('Please install NVIDIA drivers manually');
|
||||
return false;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Failed to install NVIDIA drivers: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Install on Debian/Ubuntu
|
||||
*/
|
||||
private async installOnDebian(options: IDriverInstallOptions): Promise<boolean> {
|
||||
logger.info('Installing NVIDIA drivers on Debian/Ubuntu...');
|
||||
|
||||
// Add NVIDIA repository
|
||||
await this.aptUpdate();
|
||||
|
||||
// Install prerequisites
|
||||
await this.aptInstall(['software-properties-common', 'build-essential', 'dkms']);
|
||||
|
||||
// Add NVIDIA PPA (for Ubuntu)
|
||||
try {
|
||||
await this.execCommand('add-apt-repository -y ppa:graphics-drivers/ppa 2>/dev/null || true');
|
||||
await this.aptUpdate();
|
||||
} catch {
|
||||
// PPA might not be available on all systems
|
||||
}
|
||||
|
||||
// Install NVIDIA driver
|
||||
const driverPackage = options.driverVersion
|
||||
? `nvidia-driver-${options.driverVersion}`
|
||||
: 'nvidia-driver-535'; // Default to stable version
|
||||
|
||||
await this.aptInstall(driverPackage);
|
||||
|
||||
// Install CUDA toolkit if requested
|
||||
if (options.installToolkit) {
|
||||
await this.installCudaToolkit(options);
|
||||
}
|
||||
|
||||
// Install container support if requested
|
||||
if (options.installContainerSupport) {
|
||||
await this.installContainerSupport();
|
||||
}
|
||||
|
||||
logger.success('NVIDIA driver installation completed');
|
||||
logger.warn('A system reboot is required to load the new driver');
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Install on RHEL/Fedora
|
||||
*/
|
||||
private async installOnRhel(options: IDriverInstallOptions): Promise<boolean> {
|
||||
logger.info('Installing NVIDIA drivers on RHEL/Fedora...');
|
||||
|
||||
// Install prerequisites
|
||||
await this.dnfInstall(['kernel-devel', 'kernel-headers', 'gcc', 'make', 'dkms', 'acpid']);
|
||||
|
||||
// Add NVIDIA CUDA repository
|
||||
const distro = await this.getLinuxDistro();
|
||||
const repoUrl = `https://developer.download.nvidia.com/compute/cuda/repos/rhel${distro.version.split('.')[0]}/x86_64/cuda-rhel${distro.version.split('.')[0]}.repo`;
|
||||
|
||||
await this.execCommand(`dnf config-manager --add-repo ${repoUrl}`);
|
||||
|
||||
// Install NVIDIA driver
|
||||
await this.dnfInstall('nvidia-driver-latest-dkms');
|
||||
|
||||
// Install CUDA toolkit if requested
|
||||
if (options.installToolkit) {
|
||||
await this.dnfInstall('cuda');
|
||||
}
|
||||
|
||||
// Install container support if requested
|
||||
if (options.installContainerSupport) {
|
||||
await this.installContainerSupport();
|
||||
}
|
||||
|
||||
logger.success('NVIDIA driver installation completed');
|
||||
logger.warn('A system reboot is required to load the new driver');
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Install CUDA toolkit
|
||||
*/
|
||||
private async installCudaToolkit(options: IDriverInstallOptions): Promise<void> {
|
||||
logger.info('Installing CUDA toolkit...');
|
||||
|
||||
const distro = await this.getLinuxDistro();
|
||||
|
||||
if (distro.id === 'ubuntu' || distro.id === 'debian') {
|
||||
// Add CUDA repository
|
||||
const cudaKeyUrl = 'https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb';
|
||||
await this.execCommand(`wget -q ${cudaKeyUrl} -O /tmp/cuda-keyring.deb && dpkg -i /tmp/cuda-keyring.deb`);
|
||||
await this.aptUpdate();
|
||||
|
||||
const cudaPackage = options.toolkitVersion
|
||||
? `cuda-toolkit-${options.toolkitVersion.replace('.', '-')}`
|
||||
: 'cuda-toolkit';
|
||||
|
||||
await this.aptInstall(cudaPackage);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Install NVIDIA Container Toolkit
|
||||
*/
|
||||
public async installContainerSupport(): Promise<boolean> {
|
||||
if (!await this.isRoot()) {
|
||||
logger.error('Root privileges required to install NVIDIA Container Toolkit');
|
||||
return false;
|
||||
}
|
||||
|
||||
const distro = await this.getLinuxDistro();
|
||||
logger.info('Installing NVIDIA Container Toolkit...');
|
||||
|
||||
try {
|
||||
if (distro.id === 'ubuntu' || distro.id === 'debian') {
|
||||
// Add repository
|
||||
await this.execCommand(
|
||||
'curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg',
|
||||
);
|
||||
|
||||
const distribution = `${distro.id}${distro.version}`;
|
||||
await this.execCommand(
|
||||
`curl -s -L https://nvidia.github.io/libnvidia-container/${distribution}/libnvidia-container.list | ` +
|
||||
'sed "s#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g" | ' +
|
||||
'tee /etc/apt/sources.list.d/nvidia-container-toolkit.list',
|
||||
);
|
||||
|
||||
await this.aptUpdate();
|
||||
await this.aptInstall('nvidia-container-toolkit');
|
||||
} else {
|
||||
// RHEL/Fedora
|
||||
await this.execCommand(
|
||||
'curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | ' +
|
||||
'tee /etc/yum.repos.d/nvidia-container-toolkit.repo',
|
||||
);
|
||||
await this.dnfInstall('nvidia-container-toolkit');
|
||||
}
|
||||
|
||||
// Configure Docker runtime
|
||||
await this.configureDockerRuntime();
|
||||
|
||||
logger.success('NVIDIA Container Toolkit installed successfully');
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to install NVIDIA Container Toolkit: ${error instanceof Error ? error.message : String(error)}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure Docker to use NVIDIA runtime
|
||||
*/
|
||||
private async configureDockerRuntime(): Promise<void> {
|
||||
logger.info('Configuring Docker to use NVIDIA runtime...');
|
||||
|
||||
try {
|
||||
// Run nvidia-ctk to configure Docker
|
||||
await this.execCommand('nvidia-ctk runtime configure --runtime=docker');
|
||||
|
||||
// Restart Docker
|
||||
await this.execCommand('systemctl restart docker');
|
||||
|
||||
logger.success('Docker configured to use NVIDIA runtime');
|
||||
} catch (error) {
|
||||
logger.warn(`Could not configure Docker runtime automatically: ${error instanceof Error ? error.message : String(error)}`);
|
||||
logger.info('Please run: nvidia-ctk runtime configure --runtime=docker');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available driver versions
|
||||
*/
|
||||
public async getAvailableVersions(): Promise<string[]> {
|
||||
const versions: string[] = [];
|
||||
|
||||
try {
|
||||
const distro = await this.getLinuxDistro();
|
||||
|
||||
if (distro.id === 'ubuntu' || distro.id === 'debian') {
|
||||
const { stdout } = await this.execCommand(
|
||||
'apt-cache search nvidia-driver | grep "^nvidia-driver-[0-9]" | sed "s/nvidia-driver-\\([0-9]*\\).*/\\1/" | sort -rn | uniq',
|
||||
{ ignoreErrors: true },
|
||||
);
|
||||
versions.push(...stdout.trim().split('\n').filter((v: string) => v.trim()));
|
||||
}
|
||||
} catch {
|
||||
// Failed to get versions
|
||||
}
|
||||
|
||||
return versions;
|
||||
}
|
||||
}
|
||||
565
ts/hardware/gpu-detector.ts
Normal file
565
ts/hardware/gpu-detector.ts
Normal file
@@ -0,0 +1,565 @@
|
||||
/**
|
||||
* GPU Detector
|
||||
*
|
||||
* Detects GPUs on the system (NVIDIA, AMD, Intel Arc) and retrieves their information.
|
||||
*/
|
||||
|
||||
import { exec } from 'node:child_process';
|
||||
import { promisify } from 'node:util';
|
||||
import * as fs from 'node:fs';
|
||||
import type { IGpuInfo, IGpuStatus, TGpuVendor } from '../interfaces/gpu.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { TIMING } from '../constants.ts';
|
||||
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
/**
|
||||
* GPU Detector class for detecting and querying GPU information
|
||||
*/
|
||||
export class GpuDetector {
|
||||
private cachedGpus: IGpuInfo[] | null = null;
|
||||
private cacheTime: number = 0;
|
||||
private readonly cacheDuration = TIMING.GPU_DETECTION_TIMEOUT_MS;
|
||||
|
||||
/**
|
||||
* Detect all GPUs on the system
|
||||
* @param forceRefresh Force refresh even if cache is valid
|
||||
* @returns Array of detected GPU information
|
||||
*/
|
||||
public async detectGpus(forceRefresh: boolean = false): Promise<IGpuInfo[]> {
|
||||
// Return cached data if still valid
|
||||
if (!forceRefresh && this.cachedGpus && Date.now() - this.cacheTime < this.cacheDuration) {
|
||||
return this.cachedGpus;
|
||||
}
|
||||
|
||||
const gpus: IGpuInfo[] = [];
|
||||
|
||||
// Detect NVIDIA GPUs
|
||||
const nvidiaGpus = await this.detectNvidiaGpus();
|
||||
gpus.push(...nvidiaGpus);
|
||||
|
||||
// Detect AMD GPUs
|
||||
const amdGpus = await this.detectAmdGpus();
|
||||
gpus.push(...amdGpus);
|
||||
|
||||
// Detect Intel GPUs
|
||||
const intelGpus = await this.detectIntelGpus();
|
||||
gpus.push(...intelGpus);
|
||||
|
||||
// If no GPUs found via specific tools, try generic detection
|
||||
if (gpus.length === 0) {
|
||||
const genericGpus = await this.detectGenericGpus();
|
||||
gpus.push(...genericGpus);
|
||||
}
|
||||
|
||||
// Update cache
|
||||
this.cachedGpus = gpus;
|
||||
this.cacheTime = Date.now();
|
||||
|
||||
return gpus;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect NVIDIA GPUs using nvidia-smi
|
||||
*/
|
||||
private async detectNvidiaGpus(): Promise<IGpuInfo[]> {
|
||||
const gpus: IGpuInfo[] = [];
|
||||
|
||||
try {
|
||||
// Check if nvidia-smi is available
|
||||
const { stdout } = await execAsync(
|
||||
'nvidia-smi --query-gpu=index,gpu_uuid,name,memory.total,driver_version,pci.bus_id,compute_cap --format=csv,noheader,nounits',
|
||||
{ timeout: TIMING.GPU_DETECTION_TIMEOUT_MS },
|
||||
);
|
||||
|
||||
const lines = stdout.trim().split('\n').filter((line: string) => line.trim());
|
||||
|
||||
for (const line of lines) {
|
||||
const parts = line.split(',').map((p: string) => p.trim());
|
||||
if (parts.length >= 7) {
|
||||
const [index, _uuid, name, memory, driver, pciId, computeCap] = parts;
|
||||
|
||||
gpus.push({
|
||||
id: `nvidia-${index}`,
|
||||
vendor: 'nvidia',
|
||||
model: name,
|
||||
vram: parseInt(memory, 10), // Already in MB
|
||||
driverVersion: driver,
|
||||
computeCapability: computeCap,
|
||||
pciSlot: this.extractPciSlot(pciId),
|
||||
pciBusId: pciId,
|
||||
index: parseInt(index, 10),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Get CUDA version separately
|
||||
if (gpus.length > 0) {
|
||||
try {
|
||||
const { stdout: cudaOut } = await execAsync('nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1 && nvcc --version 2>/dev/null | grep "release" | sed "s/.*release \\([0-9.]*\\).*/\\1/"', {
|
||||
timeout: 5000,
|
||||
});
|
||||
const cudaMatch = cudaOut.match(/(\d+\.\d+)/);
|
||||
if (cudaMatch) {
|
||||
for (const gpu of gpus) {
|
||||
gpu.cudaVersion = cudaMatch[1];
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// CUDA version detection failed, that's okay
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// nvidia-smi not available or failed
|
||||
logger.dim('NVIDIA GPU detection: nvidia-smi not available');
|
||||
}
|
||||
|
||||
return gpus;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect AMD GPUs using rocm-smi or amdgpu-ls
|
||||
*/
|
||||
private async detectAmdGpus(): Promise<IGpuInfo[]> {
|
||||
const gpus: IGpuInfo[] = [];
|
||||
|
||||
try {
|
||||
// Try rocm-smi first
|
||||
const { stdout } = await execAsync(
|
||||
'rocm-smi --showproductname --showmeminfo vram --showdriverversion --showbus --csv 2>/dev/null || rocm-smi -a --json 2>/dev/null',
|
||||
{ timeout: TIMING.GPU_DETECTION_TIMEOUT_MS },
|
||||
);
|
||||
|
||||
// Parse rocm-smi output
|
||||
if (stdout.includes('{')) {
|
||||
// JSON output
|
||||
const data = JSON.parse(stdout);
|
||||
let index = 0;
|
||||
for (const [key, value] of Object.entries(data)) {
|
||||
if (key.startsWith('card')) {
|
||||
const cardData = value as Record<string, unknown>;
|
||||
gpus.push({
|
||||
id: `amd-${index}`,
|
||||
vendor: 'amd',
|
||||
model: String(cardData['Card series'] || cardData['card_series'] || 'AMD GPU'),
|
||||
vram: this.parseMemory(String(cardData['VRAM Total Memory (B)'] || cardData['vram_total'] || '0')),
|
||||
driverVersion: String(cardData['Driver version'] || cardData['driver_version'] || ''),
|
||||
rocmVersion: await this.getRocmVersion(),
|
||||
pciSlot: String(cardData['PCI Bus'] || cardData['pci_bus'] || ''),
|
||||
pciBusId: String(cardData['PCI Bus'] || cardData['pci_bus'] || ''),
|
||||
index: index++,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// CSV output - parse line by line
|
||||
const lines = stdout.trim().split('\n');
|
||||
let index = 0;
|
||||
for (const line of lines) {
|
||||
if (line.includes('GPU') || line.includes('Radeon') || line.includes('AMD')) {
|
||||
// This is a GPU entry
|
||||
gpus.push({
|
||||
id: `amd-${index}`,
|
||||
vendor: 'amd',
|
||||
model: line.trim(),
|
||||
vram: 0, // Will need additional parsing
|
||||
pciSlot: '',
|
||||
index: index++,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// rocm-smi not available, try lspci
|
||||
try {
|
||||
const { stdout: lspciOut } = await execAsync(
|
||||
'lspci -nn | grep -i "VGA\\|3D\\|Display" | grep -i "AMD\\|ATI\\|Radeon"',
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
|
||||
const lines = lspciOut.trim().split('\n').filter((l: string) => l.trim());
|
||||
let index = 0;
|
||||
for (const line of lines) {
|
||||
const match = line.match(/^([0-9a-f:.]+)\s+.*:\s+(.+)$/i);
|
||||
if (match) {
|
||||
gpus.push({
|
||||
id: `amd-${index}`,
|
||||
vendor: 'amd',
|
||||
model: match[2].trim(),
|
||||
vram: await this.getAmdVramFromSysfs(match[1]),
|
||||
pciSlot: match[1],
|
||||
pciBusId: match[1],
|
||||
index: index++,
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
logger.dim('AMD GPU detection: rocm-smi and lspci detection failed');
|
||||
}
|
||||
}
|
||||
|
||||
return gpus;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect Intel GPUs using intel_gpu_top or xpu-smi
|
||||
*/
|
||||
private async detectIntelGpus(): Promise<IGpuInfo[]> {
|
||||
const gpus: IGpuInfo[] = [];
|
||||
|
||||
try {
|
||||
// Try xpu-smi first (for Intel Arc GPUs)
|
||||
const { stdout } = await execAsync(
|
||||
'xpu-smi discovery --json 2>/dev/null',
|
||||
{ timeout: TIMING.GPU_DETECTION_TIMEOUT_MS },
|
||||
);
|
||||
|
||||
const data = JSON.parse(stdout);
|
||||
if (data.device_list) {
|
||||
let index = 0;
|
||||
for (const device of data.device_list) {
|
||||
gpus.push({
|
||||
id: `intel-${index}`,
|
||||
vendor: 'intel',
|
||||
model: device.device_name || 'Intel GPU',
|
||||
vram: device.memory_physical_size_byte
|
||||
? Math.round(device.memory_physical_size_byte / (1024 * 1024))
|
||||
: 0,
|
||||
oneApiVersion: await this.getOneApiVersion(),
|
||||
pciSlot: device.pci_bdf || '',
|
||||
pciBusId: device.pci_bdf || '',
|
||||
index: index++,
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// xpu-smi not available, try lspci
|
||||
try {
|
||||
const { stdout: lspciOut } = await execAsync(
|
||||
'lspci -nn | grep -i "VGA\\|3D\\|Display" | grep -i "Intel.*Arc\\|Intel.*Graphics"',
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
|
||||
const lines = lspciOut.trim().split('\n').filter((l: string) => l.trim());
|
||||
let index = 0;
|
||||
for (const line of lines) {
|
||||
// Skip integrated graphics, only look for discrete Arc GPUs
|
||||
if (line.toLowerCase().includes('arc')) {
|
||||
const match = line.match(/^([0-9a-f:.]+)\s+.*:\s+(.+)$/i);
|
||||
if (match) {
|
||||
gpus.push({
|
||||
id: `intel-${index}`,
|
||||
vendor: 'intel',
|
||||
model: match[2].trim(),
|
||||
vram: 0, // Intel Arc VRAM detection needs sysfs
|
||||
pciSlot: match[1],
|
||||
pciBusId: match[1],
|
||||
index: index++,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
logger.dim('Intel GPU detection: xpu-smi and lspci detection failed');
|
||||
}
|
||||
}
|
||||
|
||||
return gpus;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic GPU detection using lspci
|
||||
*/
|
||||
private async detectGenericGpus(): Promise<IGpuInfo[]> {
|
||||
const gpus: IGpuInfo[] = [];
|
||||
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
'lspci -nn | grep -i "VGA\\|3D\\|Display"',
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
|
||||
const lines = stdout.trim().split('\n').filter((l: string) => l.trim());
|
||||
let index = 0;
|
||||
|
||||
for (const line of lines) {
|
||||
const match = line.match(/^([0-9a-f:.]+)\s+.*:\s+(.+)$/i);
|
||||
if (match) {
|
||||
const model = match[2].trim();
|
||||
let vendor: TGpuVendor = 'unknown';
|
||||
|
||||
if (/nvidia/i.test(model)) vendor = 'nvidia';
|
||||
else if (/amd|ati|radeon/i.test(model)) vendor = 'amd';
|
||||
else if (/intel/i.test(model)) vendor = 'intel';
|
||||
|
||||
gpus.push({
|
||||
id: `gpu-${index}`,
|
||||
vendor,
|
||||
model,
|
||||
vram: 0,
|
||||
pciSlot: match[1],
|
||||
pciBusId: match[1],
|
||||
index: index++,
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
logger.dim('Generic GPU detection: lspci not available');
|
||||
}
|
||||
|
||||
return gpus;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get real-time status for a specific GPU
|
||||
*/
|
||||
public async getGpuStatus(gpuId: string): Promise<IGpuStatus | null> {
|
||||
const gpus = await this.detectGpus();
|
||||
const gpu = gpus.find((g) => g.id === gpuId);
|
||||
|
||||
if (!gpu) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (gpu.vendor === 'nvidia') {
|
||||
return this.getNvidiaGpuStatus(gpu);
|
||||
} else if (gpu.vendor === 'amd') {
|
||||
return this.getAmdGpuStatus(gpu);
|
||||
} else if (gpu.vendor === 'intel') {
|
||||
return this.getIntelGpuStatus(gpu);
|
||||
}
|
||||
|
||||
// Unknown vendor - return basic status
|
||||
return {
|
||||
id: gpuId,
|
||||
utilization: 0,
|
||||
memoryUsed: 0,
|
||||
memoryTotal: gpu.vram,
|
||||
memoryPercent: 0,
|
||||
temperature: 0,
|
||||
powerUsage: 0,
|
||||
powerLimit: 0,
|
||||
lastUpdate: Date.now(),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get real-time status for all GPUs
|
||||
*/
|
||||
public async getAllGpuStatus(): Promise<Map<string, IGpuStatus>> {
|
||||
const statuses = new Map<string, IGpuStatus>();
|
||||
const gpus = await this.detectGpus();
|
||||
|
||||
for (const gpu of gpus) {
|
||||
const status = await this.getGpuStatus(gpu.id);
|
||||
if (status) {
|
||||
statuses.set(gpu.id, status);
|
||||
}
|
||||
}
|
||||
|
||||
return statuses;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get NVIDIA GPU status using nvidia-smi
|
||||
*/
|
||||
private async getNvidiaGpuStatus(gpu: IGpuInfo): Promise<IGpuStatus> {
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
`nvidia-smi --query-gpu=utilization.gpu,memory.used,memory.total,temperature.gpu,power.draw,power.limit,fan.speed,clocks.gr,clocks.mem --format=csv,noheader,nounits -i ${gpu.index}`,
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
|
||||
const parts = stdout.trim().split(',').map((p: string) => p.trim());
|
||||
const [utilization, memUsed, memTotal, temp, power, powerLimit, fan, gpuClock, memClock] = parts;
|
||||
|
||||
return {
|
||||
id: gpu.id,
|
||||
utilization: parseInt(utilization, 10) || 0,
|
||||
memoryUsed: parseInt(memUsed, 10) || 0,
|
||||
memoryTotal: parseInt(memTotal, 10) || gpu.vram,
|
||||
memoryPercent: memTotal ? Math.round((parseInt(memUsed, 10) / parseInt(memTotal, 10)) * 100) : 0,
|
||||
temperature: parseInt(temp, 10) || 0,
|
||||
powerUsage: parseFloat(power) || 0,
|
||||
powerLimit: parseFloat(powerLimit) || 0,
|
||||
fanSpeed: fan !== '[N/A]' ? parseInt(fan, 10) : undefined,
|
||||
gpuClock: gpuClock !== '[N/A]' ? parseInt(gpuClock, 10) : undefined,
|
||||
memoryClock: memClock !== '[N/A]' ? parseInt(memClock, 10) : undefined,
|
||||
lastUpdate: Date.now(),
|
||||
};
|
||||
} catch {
|
||||
return {
|
||||
id: gpu.id,
|
||||
utilization: 0,
|
||||
memoryUsed: 0,
|
||||
memoryTotal: gpu.vram,
|
||||
memoryPercent: 0,
|
||||
temperature: 0,
|
||||
powerUsage: 0,
|
||||
powerLimit: 0,
|
||||
lastUpdate: Date.now(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get AMD GPU status using rocm-smi
|
||||
*/
|
||||
private async getAmdGpuStatus(gpu: IGpuInfo): Promise<IGpuStatus> {
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
`rocm-smi -d ${gpu.index} --showuse --showmemuse --showtemp --showpower --json 2>/dev/null`,
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
|
||||
const data = JSON.parse(stdout);
|
||||
const cardKey = `card${gpu.index}`;
|
||||
const cardData = data[cardKey] || {};
|
||||
|
||||
return {
|
||||
id: gpu.id,
|
||||
utilization: parseInt(cardData['GPU use (%)'] || '0', 10),
|
||||
memoryUsed: this.parseMemory(cardData['GPU memory use (%)'] || '0'),
|
||||
memoryTotal: gpu.vram,
|
||||
memoryPercent: parseInt(cardData['GPU memory use (%)'] || '0', 10),
|
||||
temperature: parseFloat(cardData['Temperature (Sensor edge) (C)'] || '0'),
|
||||
powerUsage: parseFloat(cardData['Average Graphics Package Power (W)'] || '0'),
|
||||
powerLimit: parseFloat(cardData['Max Graphics Package Power (W)'] || '0'),
|
||||
lastUpdate: Date.now(),
|
||||
};
|
||||
} catch {
|
||||
return {
|
||||
id: gpu.id,
|
||||
utilization: 0,
|
||||
memoryUsed: 0,
|
||||
memoryTotal: gpu.vram,
|
||||
memoryPercent: 0,
|
||||
temperature: 0,
|
||||
powerUsage: 0,
|
||||
powerLimit: 0,
|
||||
lastUpdate: Date.now(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Intel GPU status using xpu-smi
|
||||
*/
|
||||
private async getIntelGpuStatus(gpu: IGpuInfo): Promise<IGpuStatus> {
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
`xpu-smi stats -d ${gpu.index} --json 2>/dev/null`,
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
|
||||
const data = JSON.parse(stdout);
|
||||
const stats = data.device_level || {};
|
||||
|
||||
return {
|
||||
id: gpu.id,
|
||||
utilization: Math.round(parseFloat(stats.gpu_utilization || '0')),
|
||||
memoryUsed: Math.round(parseFloat(stats.memory_used || '0') / (1024 * 1024)),
|
||||
memoryTotal: gpu.vram,
|
||||
memoryPercent: Math.round(parseFloat(stats.memory_utilization || '0')),
|
||||
temperature: parseFloat(stats.gpu_temperature || '0'),
|
||||
powerUsage: parseFloat(stats.power || '0'),
|
||||
powerLimit: 0, // Intel doesn't expose this easily
|
||||
lastUpdate: Date.now(),
|
||||
};
|
||||
} catch {
|
||||
return {
|
||||
id: gpu.id,
|
||||
utilization: 0,
|
||||
memoryUsed: 0,
|
||||
memoryTotal: gpu.vram,
|
||||
memoryPercent: 0,
|
||||
temperature: 0,
|
||||
powerUsage: 0,
|
||||
powerLimit: 0,
|
||||
lastUpdate: Date.now(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to extract PCI slot from full bus ID
|
||||
*/
|
||||
private extractPciSlot(pciId: string): string {
|
||||
// Input: "00000000:01:00.0" -> Output: "01:00.0"
|
||||
const match = pciId.match(/([0-9a-f]+:[0-9a-f]+\.[0-9a-f]+)$/i);
|
||||
return match ? match[1] : pciId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to parse memory values with units
|
||||
*/
|
||||
private parseMemory(value: string): number {
|
||||
const match = value.match(/(\d+(?:\.\d+)?)\s*(B|KB|MB|GB|TB)?/i);
|
||||
if (!match) return 0;
|
||||
|
||||
let bytes = parseFloat(match[1]);
|
||||
const unit = (match[2] || 'B').toUpperCase();
|
||||
|
||||
switch (unit) {
|
||||
case 'TB':
|
||||
bytes *= 1024;
|
||||
// falls through
|
||||
case 'GB':
|
||||
bytes *= 1024;
|
||||
// falls through
|
||||
case 'MB':
|
||||
break; // Already in MB
|
||||
case 'KB':
|
||||
bytes /= 1024;
|
||||
break;
|
||||
case 'B':
|
||||
bytes /= (1024 * 1024);
|
||||
break;
|
||||
}
|
||||
|
||||
return Math.round(bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get AMD VRAM from sysfs (async)
|
||||
*/
|
||||
private async getAmdVramFromSysfs(pciBusId: string): Promise<number> {
|
||||
try {
|
||||
const sysfsPath = `/sys/bus/pci/devices/0000:${pciBusId}/mem_info_vram_total`;
|
||||
const exists = await fs.promises.access(sysfsPath).then(() => true).catch(() => false);
|
||||
if (exists) {
|
||||
const content = await fs.promises.readFile(sysfsPath, 'utf8');
|
||||
return Math.round(parseInt(content.trim(), 10) / (1024 * 1024));
|
||||
}
|
||||
} catch {
|
||||
// sysfs not available
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get ROCm version
|
||||
*/
|
||||
private async getRocmVersion(): Promise<string | undefined> {
|
||||
try {
|
||||
const { stdout } = await execAsync('cat /opt/rocm/.info/version 2>/dev/null || rocminfo 2>/dev/null | grep "ROCm" | head -1');
|
||||
const match = stdout.match(/(\d+\.\d+(?:\.\d+)?)/);
|
||||
return match ? match[1] : undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get oneAPI version
|
||||
*/
|
||||
private async getOneApiVersion(): Promise<string | undefined> {
|
||||
try {
|
||||
const { stdout } = await execAsync('source /opt/intel/oneapi/setvars.sh 2>/dev/null && echo $ONEAPI_ROOT 2>/dev/null || cat /opt/intel/oneapi/compiler/latest/env/vars.sh 2>/dev/null | grep VERSION');
|
||||
const match = stdout.match(/(\d+\.\d+(?:\.\d+)?)/);
|
||||
return match ? match[1] : undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
}
|
||||
8
ts/hardware/index.ts
Normal file
8
ts/hardware/index.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
/**
|
||||
* Hardware Detection Module
|
||||
*
|
||||
* Exports all hardware detection functionality.
|
||||
*/
|
||||
|
||||
export { GpuDetector } from './gpu-detector.ts';
|
||||
export { SystemInfo } from './system-info.ts';
|
||||
233
ts/hardware/system-info.ts
Normal file
233
ts/hardware/system-info.ts
Normal file
@@ -0,0 +1,233 @@
|
||||
/**
|
||||
* System Info
|
||||
*
|
||||
* Gathers system information including CPU, RAM, OS, and Docker status.
|
||||
*/
|
||||
|
||||
import { exec } from 'node:child_process';
|
||||
import { promisify } from 'node:util';
|
||||
import * as os from 'node:os';
|
||||
import type { ISystemInfo } from '../interfaces/gpu.ts';
|
||||
import { GpuDetector } from './gpu-detector.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
/**
|
||||
* System Info class for gathering system information
|
||||
*/
|
||||
export class SystemInfo {
|
||||
private gpuDetector: GpuDetector;
|
||||
|
||||
constructor() {
|
||||
this.gpuDetector = new GpuDetector();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get complete system information
|
||||
*/
|
||||
public async getSystemInfo(): Promise<ISystemInfo> {
|
||||
const [gpus, dockerVersion, nvidiaContainerVersion, kernelVersion] = await Promise.all([
|
||||
this.gpuDetector.detectGpus(),
|
||||
this.getDockerVersion(),
|
||||
this.getNvidiaContainerVersion(),
|
||||
this.getKernelVersion(),
|
||||
]);
|
||||
|
||||
return {
|
||||
hostname: os.hostname(),
|
||||
cpuModel: this.getCpuModel(),
|
||||
cpuCores: os.cpus().length,
|
||||
ramTotal: Math.round(os.totalmem() / (1024 * 1024)),
|
||||
ramAvailable: Math.round(os.freemem() / (1024 * 1024)),
|
||||
os: this.getOsInfo(),
|
||||
kernelVersion,
|
||||
gpus,
|
||||
dockerVersion,
|
||||
nvidiaContainerVersion,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get CPU model name
|
||||
*/
|
||||
private getCpuModel(): string {
|
||||
const cpus = os.cpus();
|
||||
if (cpus.length > 0) {
|
||||
return cpus[0].model;
|
||||
}
|
||||
return 'Unknown CPU';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get OS information string
|
||||
*/
|
||||
private getOsInfo(): string {
|
||||
const platform = os.platform();
|
||||
const release = os.release();
|
||||
|
||||
if (platform === 'linux') {
|
||||
return `Linux ${release}`;
|
||||
} else if (platform === 'darwin') {
|
||||
return `macOS ${release}`;
|
||||
}
|
||||
|
||||
return `${platform} ${release}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get kernel version
|
||||
*/
|
||||
private async getKernelVersion(): Promise<string> {
|
||||
try {
|
||||
const { stdout } = await execAsync('uname -r', { timeout: 5000 });
|
||||
return stdout.trim();
|
||||
} catch {
|
||||
return os.release();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Docker version
|
||||
*/
|
||||
private async getDockerVersion(): Promise<string | undefined> {
|
||||
try {
|
||||
const { stdout } = await execAsync('docker --version', { timeout: 5000 });
|
||||
const match = stdout.match(/Docker version (\d+\.\d+\.\d+)/);
|
||||
return match ? match[1] : stdout.trim();
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get NVIDIA Container Toolkit version
|
||||
*/
|
||||
private async getNvidiaContainerVersion(): Promise<string | undefined> {
|
||||
try {
|
||||
const { stdout } = await execAsync('nvidia-container-cli --version 2>&1 | head -1', { timeout: 5000 });
|
||||
const match = stdout.match(/version (\d+\.\d+\.\d+)/);
|
||||
return match ? match[1] : undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if Docker is running
|
||||
*/
|
||||
public async isDockerRunning(): Promise<boolean> {
|
||||
try {
|
||||
await execAsync('docker info', { timeout: 5000 });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if NVIDIA Docker runtime is available
|
||||
*/
|
||||
public async isNvidiaRuntimeAvailable(): Promise<boolean> {
|
||||
try {
|
||||
const { stdout } = await execAsync('docker info --format "{{.Runtimes}}"', { timeout: 5000 });
|
||||
return stdout.includes('nvidia');
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if Podman is available
|
||||
*/
|
||||
public async isPodmanAvailable(): Promise<boolean> {
|
||||
try {
|
||||
await execAsync('podman --version', { timeout: 5000 });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available disk space in the data directory
|
||||
* @param path Directory to check
|
||||
* @returns Available space in MB
|
||||
*/
|
||||
public async getAvailableDiskSpace(path: string = '/var/lib'): Promise<number> {
|
||||
try {
|
||||
const { stdout } = await execAsync(`df -m "${path}" | tail -1 | awk '{print $4}'`, { timeout: 5000 });
|
||||
return parseInt(stdout.trim(), 10) || 0;
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get system memory usage
|
||||
*/
|
||||
public getMemoryUsage(): { total: number; used: number; available: number; percent: number } {
|
||||
const total = Math.round(os.totalmem() / (1024 * 1024));
|
||||
const available = Math.round(os.freemem() / (1024 * 1024));
|
||||
const used = total - available;
|
||||
const percent = Math.round((used / total) * 100);
|
||||
|
||||
return { total, used, available, percent };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get system load average
|
||||
*/
|
||||
public getLoadAverage(): { load1: number; load5: number; load15: number } {
|
||||
const [load1, load5, load15] = os.loadavg();
|
||||
return {
|
||||
load1: Math.round(load1 * 100) / 100,
|
||||
load5: Math.round(load5 * 100) / 100,
|
||||
load15: Math.round(load15 * 100) / 100,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Print system info summary to logger
|
||||
*/
|
||||
public async printSystemInfo(): Promise<void> {
|
||||
const info = await this.getSystemInfo();
|
||||
|
||||
logger.logBoxTitle('System Information', 70, 'info');
|
||||
logger.logBoxLine(`Hostname: ${info.hostname}`);
|
||||
logger.logBoxLine(`OS: ${info.os}`);
|
||||
logger.logBoxLine(`Kernel: ${info.kernelVersion}`);
|
||||
logger.logBoxLine(`CPU: ${info.cpuModel} (${info.cpuCores} cores)`);
|
||||
logger.logBoxLine(`RAM: ${Math.round(info.ramTotal / 1024)} GB total, ${Math.round(info.ramAvailable / 1024)} GB available`);
|
||||
logger.logBoxLine('');
|
||||
|
||||
if (info.dockerVersion) {
|
||||
logger.logBoxLine(`Docker: v${info.dockerVersion}`);
|
||||
} else {
|
||||
logger.logBoxLine('Docker: Not installed');
|
||||
}
|
||||
|
||||
if (info.nvidiaContainerVersion) {
|
||||
logger.logBoxLine(`NVIDIA Container Toolkit: v${info.nvidiaContainerVersion}`);
|
||||
}
|
||||
|
||||
logger.logBoxLine('');
|
||||
logger.logBoxLine(`GPUs Detected: ${info.gpus.length}`);
|
||||
|
||||
for (const gpu of info.gpus) {
|
||||
const vramGb = Math.round(gpu.vram / 1024 * 10) / 10;
|
||||
logger.logBoxLine(` ${gpu.id}: ${gpu.model} (${vramGb} GB)`);
|
||||
if (gpu.driverVersion) {
|
||||
logger.logBoxLine(` Driver: ${gpu.driverVersion}`);
|
||||
}
|
||||
if (gpu.cudaVersion) {
|
||||
logger.logBoxLine(` CUDA: ${gpu.cudaVersion}`);
|
||||
}
|
||||
if (gpu.rocmVersion) {
|
||||
logger.logBoxLine(` ROCm: ${gpu.rocmVersion}`);
|
||||
}
|
||||
}
|
||||
|
||||
logger.logBoxEnd();
|
||||
}
|
||||
}
|
||||
2
ts/helpers/index.ts
Normal file
2
ts/helpers/index.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
export * from './shortid.ts';
|
||||
export * from './prompt.ts';
|
||||
55
ts/helpers/prompt.ts
Normal file
55
ts/helpers/prompt.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
import process from 'node:process';
|
||||
|
||||
/**
|
||||
* Result from creating a prompt interface
|
||||
*/
|
||||
export interface IPromptInterface {
|
||||
/** Function to prompt for user input */
|
||||
prompt: (question: string) => Promise<string>;
|
||||
/** Function to close the prompt interface */
|
||||
close: () => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a readline prompt interface for interactive CLI input
|
||||
* @returns Promise resolving to prompt function and close function
|
||||
*/
|
||||
export async function createPrompt(): Promise<IPromptInterface> {
|
||||
const readline = await import('node:readline');
|
||||
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
});
|
||||
|
||||
const prompt = (question: string): Promise<string> => {
|
||||
return new Promise((resolve) => {
|
||||
rl.question(question, (answer: string) => {
|
||||
resolve(answer);
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
const close = (): void => {
|
||||
rl.close();
|
||||
process.stdin.destroy();
|
||||
};
|
||||
|
||||
return { prompt, close };
|
||||
}
|
||||
|
||||
/**
|
||||
* Run an async function with a prompt interface, ensuring cleanup
|
||||
* @param fn Function to run with the prompt interface
|
||||
* @returns Promise resolving to the function's return value
|
||||
*/
|
||||
export async function withPrompt<T>(
|
||||
fn: (prompt: (question: string) => Promise<string>) => Promise<T>,
|
||||
): Promise<T> {
|
||||
const { prompt, close } = await createPrompt();
|
||||
try {
|
||||
return await fn(prompt);
|
||||
} finally {
|
||||
close();
|
||||
}
|
||||
}
|
||||
22
ts/helpers/shortid.ts
Normal file
22
ts/helpers/shortid.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
/**
|
||||
* Generate a short unique ID of 6 alphanumeric characters
|
||||
* @returns A 6-character alphanumeric string
|
||||
*/
|
||||
export function shortId(): string {
|
||||
// Define the character set: a-z, A-Z, 0-9
|
||||
const chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
|
||||
|
||||
// Generate cryptographically secure random values
|
||||
const randomValues = new Uint8Array(6);
|
||||
crypto.getRandomValues(randomValues);
|
||||
|
||||
// Map each random value to a character in our set
|
||||
let result = '';
|
||||
for (let i = 0; i < 6; i++) {
|
||||
// Use modulo to map the random byte to a character index
|
||||
const index = randomValues[i] % chars.length;
|
||||
result += chars[index];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
40
ts/index.ts
Normal file
40
ts/index.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* ModelGrid - AI Infrastructure Management
|
||||
*
|
||||
* Main entry point for Node.js execution.
|
||||
*/
|
||||
|
||||
import { ModelGridCli } from './cli.ts';
|
||||
import { logger } from './logger.ts';
|
||||
import process from 'node:process';
|
||||
|
||||
/**
|
||||
* Main entry point for ModelGrid
|
||||
*/
|
||||
async function main() {
|
||||
const cli = new ModelGridCli();
|
||||
await cli.parseAndExecute(process.argv);
|
||||
}
|
||||
|
||||
// Run the main function and handle any errors
|
||||
main().catch((error) => {
|
||||
logger.error(`Error: ${error}`);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
// Export core classes for programmatic use
|
||||
export { ModelGrid } from './modelgrid.ts';
|
||||
export { ModelGridCli } from './cli.ts';
|
||||
export { Daemon } from './daemon.ts';
|
||||
export { Systemd } from './systemd.ts';
|
||||
|
||||
// Export modules
|
||||
export * from './interfaces/index.ts';
|
||||
export * from './hardware/index.ts';
|
||||
export * from './drivers/index.ts';
|
||||
export * from './docker/index.ts';
|
||||
export * from './containers/index.ts';
|
||||
export * from './models/index.ts';
|
||||
export * from './api/index.ts';
|
||||
329
ts/interfaces/api.ts
Normal file
329
ts/interfaces/api.ts
Normal file
@@ -0,0 +1,329 @@
|
||||
/**
|
||||
* ModelGrid API Interfaces
|
||||
*
|
||||
* OpenAI-compatible API types for the ModelGrid gateway.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Chat message role
|
||||
*/
|
||||
export type TChatRole = 'system' | 'user' | 'assistant' | 'tool';
|
||||
|
||||
/**
|
||||
* Chat message
|
||||
*/
|
||||
export interface IChatMessage {
|
||||
/** Message role */
|
||||
role: TChatRole;
|
||||
/** Message content */
|
||||
content: string;
|
||||
/** Name of the participant (optional) */
|
||||
name?: string;
|
||||
/** Tool calls made by the assistant (optional) */
|
||||
tool_calls?: IToolCall[];
|
||||
/** Tool call ID (for tool response messages) */
|
||||
tool_call_id?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tool call from assistant
|
||||
*/
|
||||
export interface IToolCall {
|
||||
/** Unique ID for this tool call */
|
||||
id: string;
|
||||
/** Type of tool call */
|
||||
type: 'function';
|
||||
/** Function call details */
|
||||
function: {
|
||||
/** Function name */
|
||||
name: string;
|
||||
/** Function arguments as JSON string */
|
||||
arguments: string;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Tool definition for function calling
|
||||
*/
|
||||
export interface ITool {
|
||||
/** Tool type */
|
||||
type: 'function';
|
||||
/** Function definition */
|
||||
function: {
|
||||
/** Function name */
|
||||
name: string;
|
||||
/** Function description */
|
||||
description: string;
|
||||
/** Function parameters (JSON Schema) */
|
||||
parameters: Record<string, unknown>;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Chat completion request (OpenAI-compatible)
|
||||
*/
|
||||
export interface IChatCompletionRequest {
|
||||
/** Model to use */
|
||||
model: string;
|
||||
/** Messages in the conversation */
|
||||
messages: IChatMessage[];
|
||||
/** Maximum tokens to generate */
|
||||
max_tokens?: number;
|
||||
/** Sampling temperature (0-2) */
|
||||
temperature?: number;
|
||||
/** Top-p sampling */
|
||||
top_p?: number;
|
||||
/** Number of completions to generate */
|
||||
n?: number;
|
||||
/** Whether to stream the response */
|
||||
stream?: boolean;
|
||||
/** Stop sequences */
|
||||
stop?: string | string[];
|
||||
/** Presence penalty (-2 to 2) */
|
||||
presence_penalty?: number;
|
||||
/** Frequency penalty (-2 to 2) */
|
||||
frequency_penalty?: number;
|
||||
/** User identifier */
|
||||
user?: string;
|
||||
/** Tools available for function calling */
|
||||
tools?: ITool[];
|
||||
/** Tool choice preference */
|
||||
tool_choice?: 'none' | 'auto' | { type: 'function'; function: { name: string } };
|
||||
}
|
||||
|
||||
/**
|
||||
* Chat completion choice
|
||||
*/
|
||||
export interface IChatCompletionChoice {
|
||||
/** Choice index */
|
||||
index: number;
|
||||
/** Generated message */
|
||||
message: IChatMessage;
|
||||
/** Finish reason */
|
||||
finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Token usage information
|
||||
*/
|
||||
export interface IUsage {
|
||||
/** Number of tokens in the prompt */
|
||||
prompt_tokens: number;
|
||||
/** Number of tokens in the completion */
|
||||
completion_tokens: number;
|
||||
/** Total tokens used */
|
||||
total_tokens: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Chat completion response (OpenAI-compatible)
|
||||
*/
|
||||
export interface IChatCompletionResponse {
|
||||
/** Unique ID for this completion */
|
||||
id: string;
|
||||
/** Object type */
|
||||
object: 'chat.completion';
|
||||
/** Creation timestamp */
|
||||
created: number;
|
||||
/** Model used */
|
||||
model: string;
|
||||
/** System fingerprint */
|
||||
system_fingerprint?: string;
|
||||
/** Generated choices */
|
||||
choices: IChatCompletionChoice[];
|
||||
/** Token usage */
|
||||
usage: IUsage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Chat completion chunk for streaming
|
||||
*/
|
||||
export interface IChatCompletionChunk {
|
||||
/** Unique ID for this completion */
|
||||
id: string;
|
||||
/** Object type */
|
||||
object: 'chat.completion.chunk';
|
||||
/** Creation timestamp */
|
||||
created: number;
|
||||
/** Model used */
|
||||
model: string;
|
||||
/** System fingerprint */
|
||||
system_fingerprint?: string;
|
||||
/** Delta choices */
|
||||
choices: IChatCompletionChunkChoice[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Streaming choice delta
|
||||
*/
|
||||
export interface IChatCompletionChunkChoice {
|
||||
/** Choice index */
|
||||
index: number;
|
||||
/** Delta content */
|
||||
delta: Partial<IChatMessage>;
|
||||
/** Finish reason */
|
||||
finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Text completion request (legacy endpoint)
|
||||
*/
|
||||
export interface ICompletionRequest {
|
||||
/** Model to use */
|
||||
model: string;
|
||||
/** Prompt text */
|
||||
prompt: string | string[];
|
||||
/** Maximum tokens to generate */
|
||||
max_tokens?: number;
|
||||
/** Sampling temperature */
|
||||
temperature?: number;
|
||||
/** Top-p sampling */
|
||||
top_p?: number;
|
||||
/** Number of completions */
|
||||
n?: number;
|
||||
/** Whether to stream */
|
||||
stream?: boolean;
|
||||
/** Stop sequences */
|
||||
stop?: string | string[];
|
||||
/** Echo prompt in response */
|
||||
echo?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Text completion response
|
||||
*/
|
||||
export interface ICompletionResponse {
|
||||
/** Unique ID */
|
||||
id: string;
|
||||
/** Object type */
|
||||
object: 'text_completion';
|
||||
/** Creation timestamp */
|
||||
created: number;
|
||||
/** Model used */
|
||||
model: string;
|
||||
/** Generated choices */
|
||||
choices: ICompletionChoice[];
|
||||
/** Token usage */
|
||||
usage: IUsage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Text completion choice
|
||||
*/
|
||||
export interface ICompletionChoice {
|
||||
/** Generated text */
|
||||
text: string;
|
||||
/** Choice index */
|
||||
index: number;
|
||||
/** Finish reason */
|
||||
finish_reason: 'stop' | 'length' | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Embeddings request
|
||||
*/
|
||||
export interface IEmbeddingsRequest {
|
||||
/** Model to use */
|
||||
model: string;
|
||||
/** Input text(s) */
|
||||
input: string | string[];
|
||||
/** User identifier */
|
||||
user?: string;
|
||||
/** Encoding format */
|
||||
encoding_format?: 'float' | 'base64';
|
||||
}
|
||||
|
||||
/**
|
||||
* Embeddings response
|
||||
*/
|
||||
export interface IEmbeddingsResponse {
|
||||
/** Object type */
|
||||
object: 'list';
|
||||
/** Embedding data */
|
||||
data: IEmbeddingData[];
|
||||
/** Model used */
|
||||
model: string;
|
||||
/** Token usage */
|
||||
usage: {
|
||||
prompt_tokens: number;
|
||||
total_tokens: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Single embedding data
|
||||
*/
|
||||
export interface IEmbeddingData {
|
||||
/** Object type */
|
||||
object: 'embedding';
|
||||
/** Embedding vector */
|
||||
embedding: number[];
|
||||
/** Index in the input array */
|
||||
index: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Model information (OpenAI-compatible)
|
||||
*/
|
||||
export interface IModelInfo {
|
||||
/** Model ID */
|
||||
id: string;
|
||||
/** Object type */
|
||||
object: 'model';
|
||||
/** Creation timestamp */
|
||||
created: number;
|
||||
/** Model owner/organization */
|
||||
owned_by: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* List models response
|
||||
*/
|
||||
export interface IListModelsResponse {
|
||||
/** Object type */
|
||||
object: 'list';
|
||||
/** Available models */
|
||||
data: IModelInfo[];
|
||||
}
|
||||
|
||||
/**
|
||||
* API error response
|
||||
*/
|
||||
export interface IApiError {
|
||||
/** Error details */
|
||||
error: {
|
||||
/** Error message */
|
||||
message: string;
|
||||
/** Error type */
|
||||
type: string;
|
||||
/** Parameter that caused the error */
|
||||
param?: string;
|
||||
/** Error code */
|
||||
code?: string;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Health check response
|
||||
*/
|
||||
export interface IHealthResponse {
|
||||
/** Status */
|
||||
status: 'ok' | 'degraded' | 'error';
|
||||
/** Version */
|
||||
version: string;
|
||||
/** Uptime in seconds */
|
||||
uptime: number;
|
||||
/** Number of active containers */
|
||||
containers: number;
|
||||
/** Number of available models */
|
||||
models: number;
|
||||
/** Number of available GPUs */
|
||||
gpus: number;
|
||||
/** Detailed status */
|
||||
details?: {
|
||||
/** Container health */
|
||||
containers: Record<string, 'healthy' | 'unhealthy'>;
|
||||
/** GPU status */
|
||||
gpus: Record<string, 'available' | 'in_use' | 'error'>;
|
||||
};
|
||||
}
|
||||
121
ts/interfaces/config.ts
Normal file
121
ts/interfaces/config.ts
Normal file
@@ -0,0 +1,121 @@
|
||||
/**
|
||||
* ModelGrid Configuration Interfaces
|
||||
*
|
||||
* Defines the configuration structure for the ModelGrid daemon.
|
||||
*/
|
||||
|
||||
import type { IContainerConfig } from './container.ts';
|
||||
|
||||
/**
|
||||
* API server configuration
|
||||
*/
|
||||
export interface IApiConfig {
|
||||
/** Port to listen on (default: 8080) */
|
||||
port: number;
|
||||
/** Host to bind to (default: '0.0.0.0') */
|
||||
host: string;
|
||||
/** Valid API keys for authentication */
|
||||
apiKeys: string[];
|
||||
/** Rate limit in requests per minute (optional) */
|
||||
rateLimit?: number;
|
||||
/** Enable CORS (default: false) */
|
||||
cors?: boolean;
|
||||
/** Allowed origins for CORS */
|
||||
corsOrigins?: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Docker/container runtime configuration
|
||||
*/
|
||||
export interface IDockerConfig {
|
||||
/** Docker network name (default: 'modelgrid') */
|
||||
networkName: string;
|
||||
/** Container runtime to use */
|
||||
runtime: 'docker' | 'podman';
|
||||
/** Path to docker/podman socket (optional) */
|
||||
socketPath?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* GPU assignment configuration
|
||||
*/
|
||||
export interface IGpuAssignmentConfig {
|
||||
/** Whether to auto-detect GPUs */
|
||||
autoDetect: boolean;
|
||||
/** Manual GPU to container assignments (gpuId -> containerId) */
|
||||
assignments: Record<string, string>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Model management configuration
|
||||
*/
|
||||
export interface IModelConfig {
|
||||
/** URL to fetch greenlit models list */
|
||||
greenlistUrl: string;
|
||||
/** Whether to auto-pull models when requested */
|
||||
autoPull: boolean;
|
||||
/** Default container type for new models */
|
||||
defaultContainer: 'ollama' | 'vllm' | 'tgi';
|
||||
/** Models to auto-load on startup */
|
||||
autoLoad: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Main ModelGrid configuration interface
|
||||
*/
|
||||
export interface IModelGridConfig {
|
||||
/** Configuration format version */
|
||||
version: string;
|
||||
/** API server configuration */
|
||||
api: IApiConfig;
|
||||
/** Docker configuration */
|
||||
docker: IDockerConfig;
|
||||
/** GPU configuration */
|
||||
gpus: IGpuAssignmentConfig;
|
||||
/** Container configurations */
|
||||
containers: IContainerConfig[];
|
||||
/** Model management configuration */
|
||||
models: IModelConfig;
|
||||
/** Health check interval in milliseconds */
|
||||
checkInterval: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Greenlit model entry from remote list
|
||||
*/
|
||||
export interface IGreenlitModel {
|
||||
/** Model name (e.g., "llama3:8b") */
|
||||
name: string;
|
||||
/** Preferred container type */
|
||||
container: 'ollama' | 'vllm' | 'tgi';
|
||||
/** Minimum VRAM required in GB */
|
||||
minVram: number;
|
||||
/** Optional tags for categorization */
|
||||
tags?: string[];
|
||||
/** Optional description */
|
||||
description?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Greenlit models list structure
|
||||
*/
|
||||
export interface IGreenlitModelsList {
|
||||
/** List version */
|
||||
version: string;
|
||||
/** Last updated timestamp */
|
||||
lastUpdated: string;
|
||||
/** List of greenlit models */
|
||||
models: IGreenlitModel[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Update status information
|
||||
*/
|
||||
export interface IUpdateStatus {
|
||||
/** Current installed version */
|
||||
currentVersion: string;
|
||||
/** Latest available version */
|
||||
latestVersion: string;
|
||||
/** Whether an update is available */
|
||||
updateAvailable: boolean;
|
||||
}
|
||||
176
ts/interfaces/container.ts
Normal file
176
ts/interfaces/container.ts
Normal file
@@ -0,0 +1,176 @@
|
||||
/**
|
||||
* ModelGrid Container Interfaces
|
||||
*
|
||||
* Defines types for container management (Ollama, vLLM, TGI).
|
||||
*/
|
||||
|
||||
/**
|
||||
* Container type
|
||||
*/
|
||||
export type TContainerType = 'ollama' | 'vllm' | 'tgi' | 'custom';
|
||||
|
||||
/**
|
||||
* Container health status
|
||||
*/
|
||||
export type TContainerHealth = 'healthy' | 'unhealthy' | 'starting' | 'unknown';
|
||||
|
||||
/**
|
||||
* Container run status
|
||||
*/
|
||||
export type TContainerRunStatus = 'running' | 'stopped' | 'starting' | 'stopping' | 'error';
|
||||
|
||||
/**
|
||||
* Container configuration
|
||||
*/
|
||||
export interface IContainerConfig {
|
||||
/** Unique identifier for this container */
|
||||
id: string;
|
||||
/** Container type */
|
||||
type: TContainerType;
|
||||
/** Friendly name for the container */
|
||||
name: string;
|
||||
/** Docker image to use */
|
||||
image: string;
|
||||
/** GPU IDs to assign to this container */
|
||||
gpuIds: string[];
|
||||
/** Internal port the container listens on */
|
||||
port: number;
|
||||
/** External port to expose (optional, uses internal port if not specified) */
|
||||
externalPort?: number;
|
||||
/** Models to pre-load in this container */
|
||||
models: string[];
|
||||
/** Environment variables */
|
||||
env?: Record<string, string>;
|
||||
/** Volume mounts (host:container format) */
|
||||
volumes?: string[];
|
||||
/** Whether to auto-start this container */
|
||||
autoStart: boolean;
|
||||
/** Restart policy */
|
||||
restartPolicy: 'no' | 'always' | 'on-failure' | 'unless-stopped';
|
||||
/** Maximum restart attempts (for on-failure policy) */
|
||||
maxRestarts?: number;
|
||||
/** Memory limit (e.g., "16g") */
|
||||
memoryLimit?: string;
|
||||
/** CPU limit (e.g., "4") */
|
||||
cpuLimit?: string;
|
||||
/** Custom command arguments */
|
||||
command?: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Container status information
|
||||
*/
|
||||
export interface IContainerStatus {
|
||||
/** Container ID */
|
||||
id: string;
|
||||
/** Docker container ID */
|
||||
dockerId?: string;
|
||||
/** Container name */
|
||||
name: string;
|
||||
/** Container type */
|
||||
type: TContainerType;
|
||||
/** Whether the container is running */
|
||||
running: boolean;
|
||||
/** Run status */
|
||||
runStatus: TContainerRunStatus;
|
||||
/** Health status */
|
||||
health: TContainerHealth;
|
||||
/** Health check message */
|
||||
healthMessage?: string;
|
||||
/** GPU utilization (if assigned) */
|
||||
gpuUtilization?: number;
|
||||
/** Memory usage in MB */
|
||||
memoryUsage?: number;
|
||||
/** CPU usage percentage */
|
||||
cpuUsage?: number;
|
||||
/** List of currently loaded models */
|
||||
loadedModels: string[];
|
||||
/** Container uptime in seconds */
|
||||
uptime?: number;
|
||||
/** Container start time */
|
||||
startTime?: number;
|
||||
/** Number of requests served */
|
||||
requestsServed?: number;
|
||||
/** Last error message (if any) */
|
||||
lastError?: string;
|
||||
/** Assigned GPU IDs */
|
||||
assignedGpus: string[];
|
||||
/** Internal endpoint URL */
|
||||
endpoint: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Model loaded in a container
|
||||
*/
|
||||
export interface ILoadedModel {
|
||||
/** Model name */
|
||||
name: string;
|
||||
/** Model size in bytes */
|
||||
size: number;
|
||||
/** Model format/quantization */
|
||||
format?: string;
|
||||
/** Whether the model is currently loaded in memory */
|
||||
loaded: boolean;
|
||||
/** Last used timestamp */
|
||||
lastUsed?: number;
|
||||
/** Number of requests served by this model */
|
||||
requestCount: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Container endpoint for API routing
|
||||
*/
|
||||
export interface IContainerEndpoint {
|
||||
/** Container ID */
|
||||
containerId: string;
|
||||
/** Container type */
|
||||
type: TContainerType;
|
||||
/** Endpoint URL */
|
||||
url: string;
|
||||
/** List of models available at this endpoint */
|
||||
models: string[];
|
||||
/** Whether the endpoint is healthy */
|
||||
healthy: boolean;
|
||||
/** Priority for load balancing (lower = higher priority) */
|
||||
priority: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Container creation options
|
||||
*/
|
||||
export interface IContainerCreateOptions {
|
||||
/** Container type */
|
||||
type: TContainerType;
|
||||
/** Friendly name */
|
||||
name: string;
|
||||
/** GPU IDs to assign */
|
||||
gpuIds: string[];
|
||||
/** Models to pre-load */
|
||||
models?: string[];
|
||||
/** Custom image (optional, uses default for type) */
|
||||
image?: string;
|
||||
/** Custom port (optional, uses default for type) */
|
||||
port?: number;
|
||||
/** Environment variables */
|
||||
env?: Record<string, string>;
|
||||
/** Volume mounts */
|
||||
volumes?: string[];
|
||||
/** Auto-start on daemon startup */
|
||||
autoStart?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Container logs options
|
||||
*/
|
||||
export interface IContainerLogsOptions {
|
||||
/** Container ID */
|
||||
containerId: string;
|
||||
/** Number of lines to return (default: 100) */
|
||||
lines?: number;
|
||||
/** Follow logs in real-time */
|
||||
follow?: boolean;
|
||||
/** Include timestamps */
|
||||
timestamps?: boolean;
|
||||
/** Filter by log level */
|
||||
level?: 'all' | 'error' | 'warn' | 'info' | 'debug';
|
||||
}
|
||||
132
ts/interfaces/gpu.ts
Normal file
132
ts/interfaces/gpu.ts
Normal file
@@ -0,0 +1,132 @@
|
||||
/**
|
||||
* ModelGrid GPU Interfaces
|
||||
*
|
||||
* Defines types for GPU detection and management.
|
||||
*/
|
||||
|
||||
/**
|
||||
* GPU vendor type
|
||||
*/
|
||||
export type TGpuVendor = 'nvidia' | 'amd' | 'intel' | 'unknown';
|
||||
|
||||
/**
|
||||
* GPU information detected from the system
|
||||
*/
|
||||
export interface IGpuInfo {
|
||||
/** Unique identifier for this GPU */
|
||||
id: string;
|
||||
/** GPU vendor */
|
||||
vendor: TGpuVendor;
|
||||
/** GPU model name (e.g., "NVIDIA GeForce RTX 4090") */
|
||||
model: string;
|
||||
/** Total VRAM in MB */
|
||||
vram: number;
|
||||
/** Driver version (if available) */
|
||||
driverVersion?: string;
|
||||
/** CUDA version (NVIDIA only) */
|
||||
cudaVersion?: string;
|
||||
/** Compute capability (NVIDIA only, e.g., "8.9") */
|
||||
computeCapability?: string;
|
||||
/** ROCm version (AMD only) */
|
||||
rocmVersion?: string;
|
||||
/** oneAPI version (Intel only) */
|
||||
oneApiVersion?: string;
|
||||
/** PCI slot identifier */
|
||||
pciSlot: string;
|
||||
/** PCI bus ID (e.g., "0000:01:00.0") */
|
||||
pciBusId?: string;
|
||||
/** GPU index in the system */
|
||||
index: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Real-time GPU status
|
||||
*/
|
||||
export interface IGpuStatus {
|
||||
/** GPU identifier */
|
||||
id: string;
|
||||
/** Current GPU utilization percentage (0-100) */
|
||||
utilization: number;
|
||||
/** Current memory usage in MB */
|
||||
memoryUsed: number;
|
||||
/** Total memory in MB */
|
||||
memoryTotal: number;
|
||||
/** Memory usage percentage */
|
||||
memoryPercent: number;
|
||||
/** Current temperature in Celsius */
|
||||
temperature: number;
|
||||
/** Current power usage in Watts */
|
||||
powerUsage: number;
|
||||
/** Power limit in Watts */
|
||||
powerLimit: number;
|
||||
/** Fan speed percentage (if available) */
|
||||
fanSpeed?: number;
|
||||
/** GPU clock speed in MHz */
|
||||
gpuClock?: number;
|
||||
/** Memory clock speed in MHz */
|
||||
memoryClock?: number;
|
||||
/** Last update timestamp */
|
||||
lastUpdate: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Combined GPU information and status
|
||||
*/
|
||||
export interface IGpuFullStatus extends IGpuInfo {
|
||||
/** Real-time status */
|
||||
status: IGpuStatus;
|
||||
/** Container ID assigned to this GPU (if any) */
|
||||
assignedContainer?: string;
|
||||
/** Whether the GPU is available for use */
|
||||
available: boolean;
|
||||
/** Health status */
|
||||
health: 'healthy' | 'warning' | 'error' | 'unknown';
|
||||
/** Health message (if warning or error) */
|
||||
healthMessage?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* System information including all GPUs
|
||||
*/
|
||||
export interface ISystemInfo {
|
||||
/** System hostname */
|
||||
hostname: string;
|
||||
/** CPU model name */
|
||||
cpuModel: string;
|
||||
/** Number of CPU cores */
|
||||
cpuCores: number;
|
||||
/** Total RAM in MB */
|
||||
ramTotal: number;
|
||||
/** Available RAM in MB */
|
||||
ramAvailable: number;
|
||||
/** Operating system */
|
||||
os: string;
|
||||
/** Kernel version */
|
||||
kernelVersion: string;
|
||||
/** List of detected GPUs */
|
||||
gpus: IGpuInfo[];
|
||||
/** Docker version (if installed) */
|
||||
dockerVersion?: string;
|
||||
/** NVIDIA Container Toolkit version (if installed) */
|
||||
nvidiaContainerVersion?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Driver status for a vendor
|
||||
*/
|
||||
export interface IDriverStatus {
|
||||
/** GPU vendor */
|
||||
vendor: TGpuVendor;
|
||||
/** Whether the driver is installed */
|
||||
installed: boolean;
|
||||
/** Driver version (if installed) */
|
||||
version?: string;
|
||||
/** CUDA/ROCm/oneAPI toolkit version (if installed) */
|
||||
toolkitVersion?: string;
|
||||
/** Container runtime support (e.g., nvidia-docker) */
|
||||
containerSupport: boolean;
|
||||
/** Container runtime version */
|
||||
containerRuntimeVersion?: string;
|
||||
/** List of detected issues */
|
||||
issues: string[];
|
||||
}
|
||||
11
ts/interfaces/index.ts
Normal file
11
ts/interfaces/index.ts
Normal file
@@ -0,0 +1,11 @@
|
||||
/**
|
||||
* ModelGrid Interfaces
|
||||
*
|
||||
* Central export for all TypeScript interfaces used throughout ModelGrid.
|
||||
*/
|
||||
|
||||
export * from './config.ts';
|
||||
export * from './gpu.ts';
|
||||
export * from './container.ts';
|
||||
export * from './api.ts';
|
||||
export * from './modelgrid-accessor.ts';
|
||||
31
ts/interfaces/modelgrid-accessor.ts
Normal file
31
ts/interfaces/modelgrid-accessor.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* ModelGrid Accessor Interface
|
||||
*
|
||||
* Interface to break circular dependencies between ModelGrid and its submodules.
|
||||
*/
|
||||
|
||||
import type { IUpdateStatus } from './config.ts';
|
||||
|
||||
/**
|
||||
* Interface for accessing ModelGrid instance from submodules
|
||||
* This breaks the circular dependency between ModelGrid and its managers
|
||||
*/
|
||||
export interface IModelGridAccessor {
|
||||
/**
|
||||
* Get the current version of ModelGrid
|
||||
* @returns The current version string
|
||||
*/
|
||||
getVersion(): string;
|
||||
|
||||
/**
|
||||
* Get the update status
|
||||
* @returns Object with current version, latest version, and update availability
|
||||
*/
|
||||
getUpdateStatus(): IUpdateStatus;
|
||||
|
||||
/**
|
||||
* Check for updates
|
||||
* @returns Promise resolving to true if an update is available
|
||||
*/
|
||||
checkForUpdates(): Promise<boolean>;
|
||||
}
|
||||
334
ts/logger.ts
Normal file
334
ts/logger.ts
Normal file
@@ -0,0 +1,334 @@
|
||||
import { symbols, theme } from './colors.ts';
|
||||
|
||||
/**
|
||||
* Table column alignment options
|
||||
*/
|
||||
export type TColumnAlign = 'left' | 'right' | 'center';
|
||||
|
||||
/**
|
||||
* Table column definition
|
||||
*/
|
||||
export interface ITableColumn {
|
||||
/** Column header text */
|
||||
header: string;
|
||||
/** Column key in data object */
|
||||
key: string;
|
||||
/** Column alignment (default: left) */
|
||||
align?: TColumnAlign;
|
||||
/** Column width (auto-calculated if not specified) */
|
||||
width?: number;
|
||||
/** Color function to apply to cell values */
|
||||
color?: (value: string) => string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Box style types with colors
|
||||
*/
|
||||
export type TBoxStyle = 'default' | 'success' | 'error' | 'warning' | 'info';
|
||||
|
||||
/**
|
||||
* A simple logger class that provides consistent formatting for log messages
|
||||
* including support for logboxes with title, lines, and closing
|
||||
*/
|
||||
export class Logger {
|
||||
private currentBoxWidth: number | null = null;
|
||||
private currentBoxStyle: TBoxStyle = 'default';
|
||||
private static instance: Logger;
|
||||
|
||||
/** Default width to use when no width is specified */
|
||||
private readonly DEFAULT_WIDTH = 60;
|
||||
|
||||
/**
|
||||
* Creates a new Logger instance
|
||||
*/
|
||||
constructor() {
|
||||
this.currentBoxWidth = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the singleton logger instance
|
||||
* @returns The singleton logger instance
|
||||
*/
|
||||
public static getInstance(): Logger {
|
||||
if (!Logger.instance) {
|
||||
Logger.instance = new Logger();
|
||||
}
|
||||
return Logger.instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a message
|
||||
* @param message Message to log
|
||||
*/
|
||||
public log(message: string): void {
|
||||
console.log(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Log an error message (red with X symbol)
|
||||
* @param message Error message to log
|
||||
*/
|
||||
public error(message: string): void {
|
||||
console.error(`${symbols.error} ${theme.error(message)}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a warning message (yellow with warning symbol)
|
||||
* @param message Warning message to log
|
||||
*/
|
||||
public warn(message: string): void {
|
||||
console.warn(`${symbols.warning} ${theme.warning(message)}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a success message (green with checkmark symbol)
|
||||
* @param message Success message to log
|
||||
*/
|
||||
public success(message: string): void {
|
||||
console.log(`${symbols.success} ${theme.success(message)}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Log an info message (cyan with info symbol)
|
||||
* @param message Info message to log
|
||||
*/
|
||||
public info(message: string): void {
|
||||
console.log(`${symbols.info} ${theme.info(message)}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a dim/secondary message
|
||||
* @param message Message to log in dim style
|
||||
*/
|
||||
public dim(message: string): void {
|
||||
console.log(theme.dim(message));
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a highlighted/bold message
|
||||
* @param message Message to highlight
|
||||
*/
|
||||
public highlight(message: string): void {
|
||||
console.log(theme.highlight(message));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get color function for box based on style
|
||||
*/
|
||||
private getBoxColor(style: TBoxStyle): (text: string) => string {
|
||||
switch (style) {
|
||||
case 'success':
|
||||
return theme.borderSuccess;
|
||||
case 'error':
|
||||
return theme.borderError;
|
||||
case 'warning':
|
||||
return theme.borderWarning;
|
||||
case 'info':
|
||||
return theme.borderInfo;
|
||||
case 'default':
|
||||
default:
|
||||
return theme.borderDefault;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a logbox title and set the current box width
|
||||
* @param title Title of the logbox
|
||||
* @param width Width of the logbox (including borders), defaults to DEFAULT_WIDTH
|
||||
* @param style Box style for coloring (default, success, error, warning, info)
|
||||
*/
|
||||
public logBoxTitle(title: string, width?: number, style?: TBoxStyle): void {
|
||||
this.currentBoxWidth = width || this.DEFAULT_WIDTH;
|
||||
this.currentBoxStyle = style || 'default';
|
||||
|
||||
const colorFn = this.getBoxColor(this.currentBoxStyle);
|
||||
|
||||
// Create the title line with appropriate padding
|
||||
const paddedTitle = ` ${title} `;
|
||||
const remainingSpace = this.currentBoxWidth - 3 - paddedTitle.length;
|
||||
|
||||
// Title line: +- Title ---+
|
||||
const titleLine = `┌─${paddedTitle}${'─'.repeat(Math.max(0, remainingSpace))}┐`;
|
||||
|
||||
console.log(colorFn(titleLine));
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a logbox line
|
||||
* @param content Content of the line
|
||||
* @param width Optional width override. If not provided, uses the current box width or DEFAULT_WIDTH.
|
||||
*/
|
||||
public logBoxLine(content: string, width?: number): void {
|
||||
if (!this.currentBoxWidth && !width) {
|
||||
// No current width and no width provided, use default width
|
||||
this.logBoxTitle('', this.DEFAULT_WIDTH);
|
||||
}
|
||||
|
||||
const boxWidth = width || this.currentBoxWidth || this.DEFAULT_WIDTH;
|
||||
const colorFn = this.getBoxColor(this.currentBoxStyle);
|
||||
|
||||
// Calculate the available space for content (use visible length)
|
||||
const availableSpace = boxWidth - 2; // Account for left and right borders
|
||||
const visibleLen = this.visibleLength(content);
|
||||
|
||||
if (visibleLen <= availableSpace - 1) {
|
||||
// If content fits with at least one space for the right border stripe
|
||||
const padding = availableSpace - visibleLen - 1;
|
||||
const line = `│ ${content}${' '.repeat(padding)}│`;
|
||||
console.log(colorFn(line));
|
||||
} else {
|
||||
// Content is too long, let it flow out of boundaries.
|
||||
const line = `│ ${content}`;
|
||||
console.log(colorFn(line));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a logbox end
|
||||
* @param width Optional width override. If not provided, uses the current box width or DEFAULT_WIDTH.
|
||||
*/
|
||||
public logBoxEnd(width?: number): void {
|
||||
const boxWidth = width || this.currentBoxWidth || this.DEFAULT_WIDTH;
|
||||
const colorFn = this.getBoxColor(this.currentBoxStyle);
|
||||
|
||||
// Create the bottom border
|
||||
const bottomLine = `└${'─'.repeat(boxWidth - 2)}┘`;
|
||||
console.log(colorFn(bottomLine));
|
||||
|
||||
// Reset the current box width and style
|
||||
this.currentBoxWidth = null;
|
||||
this.currentBoxStyle = 'default';
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a complete logbox with title, content lines, and ending
|
||||
* @param title Title of the logbox
|
||||
* @param lines Array of content lines
|
||||
* @param width Width of the logbox, defaults to DEFAULT_WIDTH
|
||||
* @param style Box style for coloring
|
||||
*/
|
||||
public logBox(title: string, lines: string[], width?: number, style?: TBoxStyle): void {
|
||||
this.logBoxTitle(title, width || this.DEFAULT_WIDTH, style);
|
||||
|
||||
for (const line of lines) {
|
||||
this.logBoxLine(line);
|
||||
}
|
||||
|
||||
this.logBoxEnd();
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a divider line
|
||||
* @param width Width of the divider, defaults to DEFAULT_WIDTH
|
||||
* @param character Character to use for the divider (default: -)
|
||||
*/
|
||||
public logDivider(width?: number, character: string = '─'): void {
|
||||
console.log(character.repeat(width || this.DEFAULT_WIDTH));
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip ANSI color codes from string for accurate length calculation
|
||||
*/
|
||||
private stripAnsi(text: string): string {
|
||||
// Remove ANSI escape codes (intentional control character regex)
|
||||
// deno-lint-ignore no-control-regex
|
||||
return text.replace(/\x1b\[[0-9;]*m/g, '');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get visible length of string (excluding ANSI codes)
|
||||
*/
|
||||
private visibleLength(text: string): number {
|
||||
return this.stripAnsi(text).length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Align text within a column (handles ANSI color codes correctly)
|
||||
*/
|
||||
private alignText(text: string, width: number, align: TColumnAlign = 'left'): string {
|
||||
const visibleLen = this.visibleLength(text);
|
||||
|
||||
if (visibleLen >= width) {
|
||||
// Text is too long, truncate the visible part
|
||||
const stripped = this.stripAnsi(text);
|
||||
return stripped.substring(0, width);
|
||||
}
|
||||
|
||||
const padding = width - visibleLen;
|
||||
|
||||
switch (align) {
|
||||
case 'right':
|
||||
return ' '.repeat(padding) + text;
|
||||
case 'center': {
|
||||
const leftPad = Math.floor(padding / 2);
|
||||
const rightPad = padding - leftPad;
|
||||
return ' '.repeat(leftPad) + text + ' '.repeat(rightPad);
|
||||
}
|
||||
case 'left':
|
||||
default:
|
||||
return text + ' '.repeat(padding);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Log a formatted table
|
||||
* @param columns Column definitions
|
||||
* @param rows Array of data objects
|
||||
* @param title Optional table title
|
||||
*/
|
||||
public logTable(columns: ITableColumn[], rows: Record<string, string>[], title?: string): void {
|
||||
if (rows.length === 0) {
|
||||
this.dim('No data to display');
|
||||
return;
|
||||
}
|
||||
|
||||
// Calculate column widths
|
||||
const columnWidths = columns.map((col) => {
|
||||
if (col.width) return col.width;
|
||||
|
||||
// Auto-calculate width based on header and data (use visible length)
|
||||
let maxWidth = this.visibleLength(col.header);
|
||||
for (const row of rows) {
|
||||
const value = String(row[col.key] || '');
|
||||
maxWidth = Math.max(maxWidth, this.visibleLength(value));
|
||||
}
|
||||
return maxWidth;
|
||||
});
|
||||
|
||||
// Calculate total table width
|
||||
const totalWidth = columnWidths.reduce((sum, w) => sum + w, 0) + (columns.length * 3) + 1;
|
||||
|
||||
// Print title if provided
|
||||
if (title) {
|
||||
this.logBoxTitle(title, totalWidth);
|
||||
} else {
|
||||
// Print top border
|
||||
console.log('┌' + columnWidths.map((w) => '─'.repeat(w + 2)).join('┬') + '┐');
|
||||
}
|
||||
|
||||
// Print header row
|
||||
const headerCells = columns.map((col, i) =>
|
||||
theme.highlight(this.alignText(col.header, columnWidths[i], col.align))
|
||||
);
|
||||
console.log('│ ' + headerCells.join(' │ ') + ' │');
|
||||
|
||||
// Print separator
|
||||
console.log('├' + columnWidths.map((w) => '─'.repeat(w + 2)).join('┼') + '┤');
|
||||
|
||||
// Print data rows
|
||||
for (const row of rows) {
|
||||
const cells = columns.map((col, i) => {
|
||||
const value = String(row[col.key] || '');
|
||||
const aligned = this.alignText(value, columnWidths[i], col.align);
|
||||
return col.color ? col.color(aligned) : aligned;
|
||||
});
|
||||
console.log('│ ' + cells.join(' │ ') + ' │');
|
||||
}
|
||||
|
||||
// Print bottom border
|
||||
console.log('└' + columnWidths.map((w) => '─'.repeat(w + 2)).join('┴') + '┘');
|
||||
}
|
||||
}
|
||||
|
||||
// Export a singleton instance for easy use
|
||||
export const logger = Logger.getInstance();
|
||||
260
ts/modelgrid.ts
Normal file
260
ts/modelgrid.ts
Normal file
@@ -0,0 +1,260 @@
|
||||
/**
|
||||
* ModelGrid
|
||||
*
|
||||
* Main coordinator class for the ModelGrid system.
|
||||
*/
|
||||
|
||||
import type { IModelGridConfig } from './interfaces/config.ts';
|
||||
import { logger } from './logger.ts';
|
||||
import { PATHS, VERSION } from './constants.ts';
|
||||
import { Systemd } from './systemd.ts';
|
||||
import { Daemon } from './daemon.ts';
|
||||
import { GpuDetector } from './hardware/gpu-detector.ts';
|
||||
import { SystemInfo } from './hardware/system-info.ts';
|
||||
import { DriverManager } from './drivers/driver-manager.ts';
|
||||
import { DockerManager } from './docker/docker-manager.ts';
|
||||
import { ContainerManager } from './containers/container-manager.ts';
|
||||
import { ModelRegistry } from './models/registry.ts';
|
||||
import { ModelLoader } from './models/loader.ts';
|
||||
import { GpuHandler } from './cli/gpu-handler.ts';
|
||||
import { ContainerHandler } from './cli/container-handler.ts';
|
||||
import { ModelHandler } from './cli/model-handler.ts';
|
||||
import { ConfigHandler } from './cli/config-handler.ts';
|
||||
import { ServiceHandler } from './cli/service-handler.ts';
|
||||
import * as fs from 'node:fs/promises';
|
||||
|
||||
/**
|
||||
* ModelGrid - Main application coordinator
|
||||
*/
|
||||
export class ModelGrid {
|
||||
private config?: IModelGridConfig;
|
||||
private systemd: Systemd;
|
||||
private daemon: Daemon;
|
||||
private gpuDetector: GpuDetector;
|
||||
private systemInfo: SystemInfo;
|
||||
private driverManager: DriverManager;
|
||||
private dockerManager: DockerManager;
|
||||
private containerManager: ContainerManager;
|
||||
private modelRegistry: ModelRegistry;
|
||||
private modelLoader?: ModelLoader;
|
||||
|
||||
// CLI Handlers
|
||||
private gpuHandler: GpuHandler;
|
||||
private containerHandler: ContainerHandler;
|
||||
private modelHandler: ModelHandler;
|
||||
private configHandler: ConfigHandler;
|
||||
private serviceHandler: ServiceHandler;
|
||||
|
||||
constructor() {
|
||||
// Initialize core components
|
||||
this.gpuDetector = new GpuDetector();
|
||||
this.systemInfo = new SystemInfo();
|
||||
this.driverManager = new DriverManager();
|
||||
this.dockerManager = new DockerManager();
|
||||
this.containerManager = new ContainerManager();
|
||||
this.modelRegistry = new ModelRegistry();
|
||||
this.systemd = new Systemd();
|
||||
this.daemon = new Daemon(this);
|
||||
|
||||
// Initialize CLI handlers
|
||||
this.gpuHandler = new GpuHandler();
|
||||
this.containerHandler = new ContainerHandler(this.containerManager);
|
||||
this.modelHandler = new ModelHandler(this.containerManager, this.modelRegistry);
|
||||
this.configHandler = new ConfigHandler();
|
||||
this.serviceHandler = new ServiceHandler(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load configuration from file
|
||||
*/
|
||||
public async loadConfig(): Promise<void> {
|
||||
try {
|
||||
const configContent = await fs.readFile(PATHS.CONFIG_FILE, 'utf-8');
|
||||
this.config = JSON.parse(configContent) as IModelGridConfig;
|
||||
logger.dim(`Configuration loaded from ${PATHS.CONFIG_FILE}`);
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
throw new Error(`Configuration file not found: ${PATHS.CONFIG_FILE}`);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save configuration to file
|
||||
*/
|
||||
public async saveConfig(): Promise<void> {
|
||||
if (!this.config) {
|
||||
throw new Error('No configuration to save');
|
||||
}
|
||||
|
||||
await fs.mkdir(PATHS.CONFIG_DIR, { recursive: true });
|
||||
await fs.writeFile(PATHS.CONFIG_FILE, JSON.stringify(this.config, null, 2));
|
||||
logger.dim(`Configuration saved to ${PATHS.CONFIG_FILE}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current configuration
|
||||
*/
|
||||
public getConfig(): IModelGridConfig | undefined {
|
||||
return this.config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set configuration
|
||||
*/
|
||||
public setConfig(config: IModelGridConfig): void {
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get version string
|
||||
*/
|
||||
public getVersion(): string {
|
||||
return VERSION;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Systemd instance
|
||||
*/
|
||||
public getSystemd(): Systemd {
|
||||
return this.systemd;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Daemon instance
|
||||
*/
|
||||
public getDaemon(): Daemon {
|
||||
return this.daemon;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get GPU Detector instance
|
||||
*/
|
||||
public getGpuDetector(): GpuDetector {
|
||||
return this.gpuDetector;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get System Info instance
|
||||
*/
|
||||
public getSystemInfo(): SystemInfo {
|
||||
return this.systemInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Driver Manager instance
|
||||
*/
|
||||
public getDriverManager(): DriverManager {
|
||||
return this.driverManager;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Docker Manager instance
|
||||
*/
|
||||
public getDockerManager(): DockerManager {
|
||||
return this.dockerManager;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Container Manager instance
|
||||
*/
|
||||
public getContainerManager(): ContainerManager {
|
||||
return this.containerManager;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Model Registry instance
|
||||
*/
|
||||
public getModelRegistry(): ModelRegistry {
|
||||
return this.modelRegistry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Model Loader instance
|
||||
*/
|
||||
public getModelLoader(): ModelLoader {
|
||||
if (!this.modelLoader) {
|
||||
this.modelLoader = new ModelLoader(this.modelRegistry, this.containerManager);
|
||||
}
|
||||
return this.modelLoader;
|
||||
}
|
||||
|
||||
// CLI Handlers
|
||||
|
||||
/**
|
||||
* Get GPU Handler
|
||||
*/
|
||||
public getGpuHandler(): GpuHandler {
|
||||
return this.gpuHandler;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Container Handler
|
||||
*/
|
||||
public getContainerHandler(): ContainerHandler {
|
||||
return this.containerHandler;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Model Handler
|
||||
*/
|
||||
public getModelHandler(): ModelHandler {
|
||||
return this.modelHandler;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Config Handler
|
||||
*/
|
||||
public getConfigHandler(): ConfigHandler {
|
||||
return this.configHandler;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Service Handler
|
||||
*/
|
||||
public getServiceHandler(): ServiceHandler {
|
||||
return this.serviceHandler;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the ModelGrid system
|
||||
*/
|
||||
public async initialize(): Promise<void> {
|
||||
// Load configuration
|
||||
await this.loadConfig();
|
||||
|
||||
if (!this.config) {
|
||||
throw new Error('Failed to load configuration');
|
||||
}
|
||||
|
||||
// Initialize containers from config
|
||||
for (const containerConfig of this.config.containers) {
|
||||
await this.containerManager.addContainer(containerConfig);
|
||||
}
|
||||
|
||||
// Initialize model registry
|
||||
this.modelRegistry.setGreenlistUrl(this.config.models.greenlistUrl);
|
||||
|
||||
// Create model loader
|
||||
this.modelLoader = new ModelLoader(
|
||||
this.modelRegistry,
|
||||
this.containerManager,
|
||||
this.config.models.autoPull,
|
||||
);
|
||||
|
||||
logger.success('ModelGrid initialized');
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown the ModelGrid system
|
||||
*/
|
||||
public async shutdown(): Promise<void> {
|
||||
logger.info('Shutting down ModelGrid...');
|
||||
|
||||
// Stop all containers
|
||||
await this.containerManager.stopAll();
|
||||
|
||||
logger.success('ModelGrid shutdown complete');
|
||||
}
|
||||
}
|
||||
8
ts/models/index.ts
Normal file
8
ts/models/index.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
/**
|
||||
* Model Management Module
|
||||
*
|
||||
* Exports model registry and loader functionality.
|
||||
*/
|
||||
|
||||
export { ModelRegistry } from './registry.ts';
|
||||
export { ModelLoader } from './loader.ts';
|
||||
291
ts/models/loader.ts
Normal file
291
ts/models/loader.ts
Normal file
@@ -0,0 +1,291 @@
|
||||
/**
|
||||
* Model Loader
|
||||
*
|
||||
* Handles automatic model loading with greenlist validation.
|
||||
*/
|
||||
|
||||
import type { TContainerType } from '../interfaces/container.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
import { ModelRegistry } from './registry.ts';
|
||||
import { ContainerManager } from '../containers/container-manager.ts';
|
||||
import { GpuDetector } from '../hardware/gpu-detector.ts';
|
||||
|
||||
/**
|
||||
* Model load result
|
||||
*/
|
||||
export interface IModelLoadResult {
|
||||
success: boolean;
|
||||
model: string;
|
||||
container?: string;
|
||||
error?: string;
|
||||
alreadyLoaded?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Model loader with greenlist validation
|
||||
*/
|
||||
export class ModelLoader {
|
||||
private registry: ModelRegistry;
|
||||
private containerManager: ContainerManager;
|
||||
private gpuDetector: GpuDetector;
|
||||
private autoPull: boolean;
|
||||
|
||||
constructor(
|
||||
registry: ModelRegistry,
|
||||
containerManager: ContainerManager,
|
||||
autoPull: boolean = true,
|
||||
) {
|
||||
this.registry = registry;
|
||||
this.containerManager = containerManager;
|
||||
this.gpuDetector = new GpuDetector();
|
||||
this.autoPull = autoPull;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a model with greenlist validation
|
||||
*/
|
||||
public async loadModel(modelName: string): Promise<IModelLoadResult> {
|
||||
logger.info(`Loading model: ${modelName}`);
|
||||
|
||||
// Step 1: Check if model is already loaded in any container
|
||||
const container = await this.containerManager.findContainerForModel(modelName);
|
||||
if (container) {
|
||||
logger.dim(`Model ${modelName} is already available in container ${container.getConfig().id}`);
|
||||
return {
|
||||
success: true,
|
||||
model: modelName,
|
||||
container: container.getConfig().id,
|
||||
alreadyLoaded: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Step 2: Check if model is greenlit
|
||||
const isGreenlit = await this.registry.isModelGreenlit(modelName);
|
||||
if (!isGreenlit) {
|
||||
logger.error(`Model ${modelName} is not in the greenlit list`);
|
||||
logger.info('Only greenlit models can be auto-pulled for security reasons.');
|
||||
logger.info('Contact your administrator to add this model to the greenlist.');
|
||||
return {
|
||||
success: false,
|
||||
model: modelName,
|
||||
error: `Model "${modelName}" is not greenlit. Request via admin or add to greenlist.`,
|
||||
};
|
||||
}
|
||||
|
||||
// Step 3: Get model info from greenlist
|
||||
const modelInfo = await this.registry.getGreenlitModel(modelName);
|
||||
if (!modelInfo) {
|
||||
return {
|
||||
success: false,
|
||||
model: modelName,
|
||||
error: 'Failed to get model info from greenlist',
|
||||
};
|
||||
}
|
||||
|
||||
// Step 4: Check VRAM requirements
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
const totalVram = gpus.reduce((sum, gpu) => sum + gpu.vram, 0);
|
||||
const totalVramGb = Math.round(totalVram / 1024);
|
||||
|
||||
if (modelInfo.minVram > totalVramGb) {
|
||||
logger.error(`Insufficient VRAM for model ${modelName}`);
|
||||
logger.info(`Required: ${modelInfo.minVram}GB, Available: ${totalVramGb}GB`);
|
||||
return {
|
||||
success: false,
|
||||
model: modelName,
|
||||
error: `Insufficient VRAM. Required: ${modelInfo.minVram}GB, Available: ${totalVramGb}GB`,
|
||||
};
|
||||
}
|
||||
|
||||
// Step 5: Find or create appropriate container
|
||||
const containerType = modelInfo.container;
|
||||
let targetContainer = await this.findAvailableContainer(containerType);
|
||||
|
||||
if (!targetContainer) {
|
||||
logger.warn(`No ${containerType} container available`);
|
||||
|
||||
// Could auto-create container here if desired
|
||||
return {
|
||||
success: false,
|
||||
model: modelName,
|
||||
error: `No ${containerType} container available to load model`,
|
||||
};
|
||||
}
|
||||
|
||||
// Step 6: Pull the model if auto-pull is enabled
|
||||
if (this.autoPull) {
|
||||
logger.info(`Pulling model ${modelName} to ${containerType} container...`);
|
||||
|
||||
const pullSuccess = await targetContainer.pullModel(modelName, (progress) => {
|
||||
const percent = progress.percent !== undefined ? ` (${progress.percent}%)` : '';
|
||||
logger.dim(` ${progress.status}${percent}`);
|
||||
});
|
||||
|
||||
if (!pullSuccess) {
|
||||
return {
|
||||
success: false,
|
||||
model: modelName,
|
||||
error: 'Failed to pull model',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
logger.success(`Model ${modelName} loaded successfully`);
|
||||
return {
|
||||
success: true,
|
||||
model: modelName,
|
||||
container: targetContainer.getConfig().id,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Find an available container of the specified type
|
||||
*/
|
||||
private async findAvailableContainer(
|
||||
containerType: TContainerType,
|
||||
): Promise<import('../containers/base-container.ts').BaseContainer | null> {
|
||||
const containers = this.containerManager.getAllContainers();
|
||||
|
||||
for (const container of containers) {
|
||||
if (container.type !== containerType) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const status = await container.getStatus();
|
||||
if (status.running) {
|
||||
return container;
|
||||
}
|
||||
}
|
||||
|
||||
// No running container found, try to start one
|
||||
for (const container of containers) {
|
||||
if (container.type !== containerType) {
|
||||
continue;
|
||||
}
|
||||
|
||||
logger.info(`Starting ${containerType} container: ${container.getConfig().name}`);
|
||||
const started = await container.start();
|
||||
if (started) {
|
||||
return container;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preload a list of models
|
||||
*/
|
||||
public async preloadModels(modelNames: string[]): Promise<Map<string, IModelLoadResult>> {
|
||||
const results = new Map<string, IModelLoadResult>();
|
||||
|
||||
for (const modelName of modelNames) {
|
||||
const result = await this.loadModel(modelName);
|
||||
results.set(modelName, result);
|
||||
|
||||
if (!result.success) {
|
||||
logger.warn(`Failed to preload model: ${modelName}`);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unload a model from a container
|
||||
*/
|
||||
public async unloadModel(modelName: string): Promise<boolean> {
|
||||
const container = await this.containerManager.findContainerForModel(modelName);
|
||||
if (!container) {
|
||||
logger.warn(`Model ${modelName} not found in any container`);
|
||||
return false;
|
||||
}
|
||||
|
||||
return container.removeModel(modelName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if auto-pull is enabled
|
||||
*/
|
||||
public isAutoPullEnabled(): boolean {
|
||||
return this.autoPull;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable or disable auto-pull
|
||||
*/
|
||||
public setAutoPull(enabled: boolean): void {
|
||||
this.autoPull = enabled;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get loading recommendations for available VRAM
|
||||
*/
|
||||
public async getRecommendations(): Promise<{
|
||||
canLoad: string[];
|
||||
cannotLoad: string[];
|
||||
loaded: string[];
|
||||
}> {
|
||||
const gpus = await this.gpuDetector.detectGpus();
|
||||
const totalVramGb = Math.round(gpus.reduce((sum, gpu) => sum + gpu.vram, 0) / 1024);
|
||||
|
||||
const allModels = await this.registry.getAllGreenlitModels();
|
||||
const availableModels = await this.containerManager.getAllAvailableModels();
|
||||
const loadedNames = new Set(availableModels.keys());
|
||||
|
||||
const canLoad: string[] = [];
|
||||
const cannotLoad: string[] = [];
|
||||
const loaded: string[] = [];
|
||||
|
||||
for (const model of allModels) {
|
||||
if (loadedNames.has(model.name)) {
|
||||
loaded.push(model.name);
|
||||
} else if (model.minVram <= totalVramGb) {
|
||||
canLoad.push(model.name);
|
||||
} else {
|
||||
cannotLoad.push(model.name);
|
||||
}
|
||||
}
|
||||
|
||||
return { canLoad, cannotLoad, loaded };
|
||||
}
|
||||
|
||||
/**
|
||||
* Print loading status
|
||||
*/
|
||||
public async printStatus(): Promise<void> {
|
||||
const recommendations = await this.getRecommendations();
|
||||
|
||||
logger.logBoxTitle('Model Loading Status', 60, 'info');
|
||||
|
||||
logger.logBoxLine(`Loaded Models (${recommendations.loaded.length}):`);
|
||||
if (recommendations.loaded.length > 0) {
|
||||
for (const model of recommendations.loaded) {
|
||||
logger.logBoxLine(` - ${model}`);
|
||||
}
|
||||
} else {
|
||||
logger.logBoxLine(' None');
|
||||
}
|
||||
|
||||
logger.logBoxLine('');
|
||||
logger.logBoxLine(`Available to Load (${recommendations.canLoad.length}):`);
|
||||
for (const model of recommendations.canLoad.slice(0, 5)) {
|
||||
logger.logBoxLine(` - ${model}`);
|
||||
}
|
||||
if (recommendations.canLoad.length > 5) {
|
||||
logger.logBoxLine(` ... and ${recommendations.canLoad.length - 5} more`);
|
||||
}
|
||||
|
||||
logger.logBoxLine('');
|
||||
logger.logBoxLine(`Insufficient VRAM (${recommendations.cannotLoad.length}):`);
|
||||
for (const model of recommendations.cannotLoad.slice(0, 3)) {
|
||||
const info = await this.registry.getGreenlitModel(model);
|
||||
logger.logBoxLine(` - ${model} (needs ${info?.minVram || '?'}GB)`);
|
||||
}
|
||||
if (recommendations.cannotLoad.length > 3) {
|
||||
logger.logBoxLine(` ... and ${recommendations.cannotLoad.length - 3} more`);
|
||||
}
|
||||
|
||||
logger.logBoxEnd();
|
||||
}
|
||||
}
|
||||
252
ts/models/registry.ts
Normal file
252
ts/models/registry.ts
Normal file
@@ -0,0 +1,252 @@
|
||||
/**
|
||||
* Model Registry
|
||||
*
|
||||
* Manages the greenlit model list and model availability.
|
||||
*/
|
||||
|
||||
import type { IGreenlitModel, IGreenlitModelsList } from '../interfaces/config.ts';
|
||||
import type { TContainerType } from '../interfaces/container.ts';
|
||||
import { MODEL_REGISTRY, TIMING } from '../constants.ts';
|
||||
import { logger } from '../logger.ts';
|
||||
|
||||
/**
|
||||
* Model registry for managing greenlit models
|
||||
*/
|
||||
export class ModelRegistry {
|
||||
private greenlistUrl: string;
|
||||
private cachedGreenlist: IGreenlitModelsList | null = null;
|
||||
private cacheTime: number = 0;
|
||||
|
||||
constructor(greenlistUrl: string = MODEL_REGISTRY.DEFAULT_GREENLIST_URL) {
|
||||
this.greenlistUrl = greenlistUrl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the greenlist URL
|
||||
*/
|
||||
public setGreenlistUrl(url: string): void {
|
||||
this.greenlistUrl = url;
|
||||
this.cachedGreenlist = null;
|
||||
this.cacheTime = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch the greenlit model list from remote URL
|
||||
*/
|
||||
public async fetchGreenlist(forceRefresh: boolean = false): Promise<IGreenlitModelsList> {
|
||||
// Return cached data if still valid
|
||||
if (
|
||||
!forceRefresh &&
|
||||
this.cachedGreenlist &&
|
||||
Date.now() - this.cacheTime < TIMING.GREENLIST_CACHE_DURATION_MS
|
||||
) {
|
||||
return this.cachedGreenlist;
|
||||
}
|
||||
|
||||
try {
|
||||
logger.dim(`Fetching greenlit models from: ${this.greenlistUrl}`);
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), 30000);
|
||||
|
||||
const response = await fetch(this.greenlistUrl, {
|
||||
signal: controller.signal,
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
'User-Agent': 'ModelGrid/1.0',
|
||||
},
|
||||
});
|
||||
|
||||
clearTimeout(timeout);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const greenlist = await response.json() as IGreenlitModelsList;
|
||||
|
||||
// Validate structure
|
||||
if (!greenlist.models || !Array.isArray(greenlist.models)) {
|
||||
throw new Error('Invalid greenlist format: missing models array');
|
||||
}
|
||||
|
||||
// Cache the result
|
||||
this.cachedGreenlist = greenlist;
|
||||
this.cacheTime = Date.now();
|
||||
|
||||
logger.dim(`Loaded ${greenlist.models.length} greenlit models`);
|
||||
return greenlist;
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to fetch greenlist: ${error instanceof Error ? error.message : String(error)}`);
|
||||
|
||||
// Return fallback if we have no cache
|
||||
if (!this.cachedGreenlist) {
|
||||
logger.dim('Using fallback greenlist');
|
||||
return this.getFallbackGreenlist();
|
||||
}
|
||||
|
||||
// Return stale cache
|
||||
return this.cachedGreenlist;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get fallback greenlist
|
||||
*/
|
||||
private getFallbackGreenlist(): IGreenlitModelsList {
|
||||
return {
|
||||
version: '1.0',
|
||||
lastUpdated: new Date().toISOString(),
|
||||
models: MODEL_REGISTRY.FALLBACK_GREENLIST as unknown as IGreenlitModel[],
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a model is greenlit
|
||||
*/
|
||||
public async isModelGreenlit(modelName: string): Promise<boolean> {
|
||||
const greenlist = await this.fetchGreenlist();
|
||||
return greenlist.models.some((m) => this.normalizeModelName(m.name) === this.normalizeModelName(modelName));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get greenlit model info
|
||||
*/
|
||||
public async getGreenlitModel(modelName: string): Promise<IGreenlitModel | null> {
|
||||
const greenlist = await this.fetchGreenlist();
|
||||
const normalized = this.normalizeModelName(modelName);
|
||||
return greenlist.models.find((m) => this.normalizeModelName(m.name) === normalized) || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all greenlit models
|
||||
*/
|
||||
public async getAllGreenlitModels(): Promise<IGreenlitModel[]> {
|
||||
const greenlist = await this.fetchGreenlist();
|
||||
return greenlist.models;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get greenlit models by container type
|
||||
*/
|
||||
public async getModelsByContainer(containerType: TContainerType): Promise<IGreenlitModel[]> {
|
||||
const greenlist = await this.fetchGreenlist();
|
||||
return greenlist.models.filter((m) => m.container === containerType);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get greenlit models that fit within VRAM limit
|
||||
*/
|
||||
public async getModelsWithinVram(maxVramGb: number): Promise<IGreenlitModel[]> {
|
||||
const greenlist = await this.fetchGreenlist();
|
||||
return greenlist.models.filter((m) => m.minVram <= maxVramGb);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get recommended container type for a model
|
||||
*/
|
||||
public async getRecommendedContainer(modelName: string): Promise<TContainerType | null> {
|
||||
const model = await this.getGreenlitModel(modelName);
|
||||
return model ? model.container : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get minimum VRAM required for a model
|
||||
*/
|
||||
public async getMinVram(modelName: string): Promise<number | null> {
|
||||
const model = await this.getGreenlitModel(modelName);
|
||||
return model ? model.minVram : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if model fits in available VRAM
|
||||
*/
|
||||
public async modelFitsInVram(modelName: string, availableVramGb: number): Promise<boolean> {
|
||||
const minVram = await this.getMinVram(modelName);
|
||||
if (minVram === null) {
|
||||
// Model not in greenlist, assume it might fit
|
||||
return true;
|
||||
}
|
||||
return availableVramGb >= minVram;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize model name for comparison
|
||||
* Handles variations like "llama3:8b" vs "llama3:8B" vs "meta-llama/llama-3-8b"
|
||||
*/
|
||||
private normalizeModelName(name: string): string {
|
||||
return name
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9:.-]/g, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Search models by name pattern
|
||||
*/
|
||||
public async searchModels(pattern: string): Promise<IGreenlitModel[]> {
|
||||
const greenlist = await this.fetchGreenlist();
|
||||
const normalizedPattern = pattern.toLowerCase();
|
||||
|
||||
return greenlist.models.filter((m) =>
|
||||
m.name.toLowerCase().includes(normalizedPattern) ||
|
||||
m.description?.toLowerCase().includes(normalizedPattern) ||
|
||||
m.tags?.some((t) => t.toLowerCase().includes(normalizedPattern))
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get models by tags
|
||||
*/
|
||||
public async getModelsByTags(tags: string[]): Promise<IGreenlitModel[]> {
|
||||
const greenlist = await this.fetchGreenlist();
|
||||
const normalizedTags = tags.map((t) => t.toLowerCase());
|
||||
|
||||
return greenlist.models.filter((m) =>
|
||||
m.tags?.some((t) => normalizedTags.includes(t.toLowerCase()))
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the cached greenlist
|
||||
*/
|
||||
public clearCache(): void {
|
||||
this.cachedGreenlist = null;
|
||||
this.cacheTime = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print greenlist summary
|
||||
*/
|
||||
public async printSummary(): Promise<void> {
|
||||
const greenlist = await this.fetchGreenlist();
|
||||
|
||||
// Group by container type
|
||||
const byContainer = new Map<string, IGreenlitModel[]>();
|
||||
for (const model of greenlist.models) {
|
||||
if (!byContainer.has(model.container)) {
|
||||
byContainer.set(model.container, []);
|
||||
}
|
||||
byContainer.get(model.container)!.push(model);
|
||||
}
|
||||
|
||||
logger.logBoxTitle('Greenlit Models', 60, 'info');
|
||||
logger.logBoxLine(`Version: ${greenlist.version}`);
|
||||
logger.logBoxLine(`Last Updated: ${greenlist.lastUpdated}`);
|
||||
logger.logBoxLine(`Total Models: ${greenlist.models.length}`);
|
||||
logger.logBoxLine('');
|
||||
|
||||
for (const [container, models] of byContainer) {
|
||||
logger.logBoxLine(`${container.toUpperCase()} (${models.length}):`);
|
||||
for (const model of models.slice(0, 5)) {
|
||||
logger.logBoxLine(` - ${model.name} (${model.minVram}GB VRAM)`);
|
||||
}
|
||||
if (models.length > 5) {
|
||||
logger.logBoxLine(` ... and ${models.length - 5} more`);
|
||||
}
|
||||
logger.logBoxLine('');
|
||||
}
|
||||
|
||||
logger.logBoxEnd();
|
||||
}
|
||||
}
|
||||
283
ts/systemd.ts
Normal file
283
ts/systemd.ts
Normal file
@@ -0,0 +1,283 @@
|
||||
/**
|
||||
* ModelGrid Systemd Service Manager
|
||||
*
|
||||
* Handles installation, removal, and control of the ModelGrid systemd service.
|
||||
*/
|
||||
|
||||
import process from 'node:process';
|
||||
import { promises as fs } from 'node:fs';
|
||||
import { execSync } from 'node:child_process';
|
||||
import { logger } from './logger.ts';
|
||||
import { theme, symbols } from './colors.ts';
|
||||
import { PATHS, VERSION } from './constants.ts';
|
||||
|
||||
/**
|
||||
* Systemd service manager for ModelGrid
|
||||
*/
|
||||
export class Systemd {
|
||||
private readonly serviceFilePath = '/etc/systemd/system/modelgrid.service';
|
||||
|
||||
private readonly serviceTemplate = `[Unit]
|
||||
Description=ModelGrid - AI Infrastructure Management
|
||||
After=network.target docker.service
|
||||
Requires=docker.service
|
||||
|
||||
[Service]
|
||||
ExecStart=/usr/local/bin/modelgrid service start-daemon
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
User=root
|
||||
Group=root
|
||||
Environment=PATH=/usr/bin:/usr/local/bin
|
||||
WorkingDirectory=/opt/modelgrid
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
`;
|
||||
|
||||
/**
|
||||
* Install the systemd service
|
||||
*/
|
||||
public async install(): Promise<void> {
|
||||
try {
|
||||
// Check if configuration exists
|
||||
await this.checkConfigExists();
|
||||
|
||||
// Write service file
|
||||
await fs.writeFile(this.serviceFilePath, this.serviceTemplate);
|
||||
logger.logBoxTitle('Service Installation', 50);
|
||||
logger.logBoxLine(`Service file created at ${this.serviceFilePath}`);
|
||||
|
||||
// Reload systemd
|
||||
execSync('systemctl daemon-reload');
|
||||
logger.logBoxLine('Systemd daemon reloaded');
|
||||
|
||||
// Enable service
|
||||
execSync('systemctl enable modelgrid.service');
|
||||
logger.logBoxLine('Service enabled to start on boot');
|
||||
logger.logBoxEnd();
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message === 'Configuration not found') {
|
||||
throw error;
|
||||
}
|
||||
logger.error(`Failed to install systemd service: ${error}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the systemd service
|
||||
*/
|
||||
public async start(): Promise<void> {
|
||||
try {
|
||||
await this.checkConfigExists();
|
||||
execSync('systemctl start modelgrid.service');
|
||||
logger.logBoxTitle('Service Status', 45);
|
||||
logger.logBoxLine('ModelGrid service started successfully');
|
||||
logger.logBoxEnd();
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message === 'Configuration not found') {
|
||||
process.exit(1);
|
||||
}
|
||||
logger.error(`Failed to start service: ${error}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the systemd service
|
||||
*/
|
||||
public stop(): void {
|
||||
try {
|
||||
execSync('systemctl stop modelgrid.service');
|
||||
logger.success('ModelGrid service stopped');
|
||||
} catch (error) {
|
||||
logger.error(`Failed to stop service: ${error}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get status of the systemd service
|
||||
*/
|
||||
public async getStatus(): Promise<void> {
|
||||
try {
|
||||
// Display version
|
||||
logger.log('');
|
||||
logger.log(`${theme.dim('ModelGrid')} ${theme.dim('v' + VERSION)}`);
|
||||
|
||||
// Check if config exists
|
||||
try {
|
||||
await this.checkConfigExists();
|
||||
} catch (_error) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Display service status
|
||||
await this.displayServiceStatus();
|
||||
|
||||
// Display container status
|
||||
await this.displayContainerStatus();
|
||||
|
||||
// Display GPU status
|
||||
await this.displayGpuStatus();
|
||||
} catch (error) {
|
||||
logger.error(`Failed to get status: ${error instanceof Error ? error.message : String(error)}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Display systemd service status
|
||||
*/
|
||||
private async displayServiceStatus(): Promise<void> {
|
||||
try {
|
||||
const serviceStatus = execSync('systemctl status modelgrid.service').toString();
|
||||
const lines = serviceStatus.split('\n');
|
||||
|
||||
let isActive = false;
|
||||
let pid = '';
|
||||
let memory = '';
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.includes('Active:')) {
|
||||
isActive = line.includes('active (running)');
|
||||
} else if (line.includes('Main PID:')) {
|
||||
const match = line.match(/Main PID:\s+(\d+)/);
|
||||
if (match) pid = match[1];
|
||||
} else if (line.includes('Memory:')) {
|
||||
const match = line.match(/Memory:\s+([\d.]+[A-Z])/);
|
||||
if (match) memory = match[1];
|
||||
}
|
||||
}
|
||||
|
||||
logger.log('');
|
||||
if (isActive) {
|
||||
logger.log(`${symbols.running} ${theme.success('Service:')} ${theme.statusActive('active (running)')}`);
|
||||
} else {
|
||||
logger.log(`${symbols.stopped} ${theme.dim('Service:')} ${theme.statusInactive('inactive')}`);
|
||||
}
|
||||
|
||||
if (pid || memory) {
|
||||
const details = [];
|
||||
if (pid) details.push(`PID: ${theme.dim(pid)}`);
|
||||
if (memory) details.push(`Memory: ${theme.dim(memory)}`);
|
||||
logger.log(` ${details.join(' ')}`);
|
||||
}
|
||||
logger.log('');
|
||||
} catch (_error) {
|
||||
logger.log('');
|
||||
logger.log(`${symbols.stopped} ${theme.dim('Service:')} ${theme.statusInactive('not installed')}`);
|
||||
logger.log('');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Display container status
|
||||
*/
|
||||
private async displayContainerStatus(): Promise<void> {
|
||||
try {
|
||||
// Try to get container info from docker
|
||||
const output = execSync('docker ps --filter "name=modelgrid" --format "{{.Names}}\\t{{.Status}}"', { encoding: 'utf-8' });
|
||||
const lines = output.trim().split('\n').filter(l => l.trim());
|
||||
|
||||
if (lines.length === 0) {
|
||||
logger.info('Containers: None running');
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`Containers (${lines.length}):`);
|
||||
|
||||
for (const line of lines) {
|
||||
const [name, status] = line.split('\t');
|
||||
const isUp = status?.toLowerCase().includes('up');
|
||||
|
||||
logger.log(` ${isUp ? symbols.running : symbols.stopped} ${theme.highlight(name)} - ${isUp ? theme.success(status) : theme.dim(status)}`);
|
||||
}
|
||||
logger.log('');
|
||||
} catch (_error) {
|
||||
// Docker might not be running
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Display GPU status
|
||||
*/
|
||||
private async displayGpuStatus(): Promise<void> {
|
||||
try {
|
||||
// Try nvidia-smi
|
||||
const output = execSync('nvidia-smi --query-gpu=name,utilization.gpu,memory.used,memory.total --format=csv,noheader,nounits', { encoding: 'utf-8' });
|
||||
const lines = output.trim().split('\n');
|
||||
|
||||
if (lines.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`GPUs (${lines.length}):`);
|
||||
|
||||
for (const line of lines) {
|
||||
const [name, util, memUsed, memTotal] = line.split(',').map(s => s.trim());
|
||||
const memPercent = Math.round((parseInt(memUsed) / parseInt(memTotal)) * 100);
|
||||
|
||||
logger.log(` ${symbols.info} ${theme.gpuNvidia(name)}`);
|
||||
logger.log(` Utilization: ${theme.highlight(util + '%')} Memory: ${theme.info(memUsed)}/${memTotal} MB (${memPercent}%)`);
|
||||
}
|
||||
logger.log('');
|
||||
} catch (_error) {
|
||||
// nvidia-smi might not be available
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Disable and uninstall the service
|
||||
*/
|
||||
public async disable(): Promise<void> {
|
||||
try {
|
||||
// Stop if running
|
||||
try {
|
||||
execSync('systemctl stop modelgrid.service');
|
||||
logger.log('Service stopped');
|
||||
} catch (_error) {
|
||||
// Might not be running
|
||||
}
|
||||
|
||||
// Disable
|
||||
try {
|
||||
execSync('systemctl disable modelgrid.service');
|
||||
logger.log('Service disabled');
|
||||
} catch (_error) {
|
||||
// Might not be enabled
|
||||
}
|
||||
|
||||
// Remove service file
|
||||
try {
|
||||
await fs.unlink(this.serviceFilePath);
|
||||
logger.log('Service file removed');
|
||||
} catch (_error) {
|
||||
// Might not exist
|
||||
}
|
||||
|
||||
// Reload systemd
|
||||
execSync('systemctl daemon-reload');
|
||||
logger.success('ModelGrid service uninstalled');
|
||||
} catch (error) {
|
||||
logger.error(`Failed to disable service: ${error}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if configuration file exists
|
||||
*/
|
||||
private async checkConfigExists(): Promise<void> {
|
||||
try {
|
||||
await fs.access(PATHS.CONFIG_FILE);
|
||||
} catch (_error) {
|
||||
logger.log('');
|
||||
logger.error('No configuration found');
|
||||
logger.log(` ${theme.dim('Config file:')} ${PATHS.CONFIG_FILE}`);
|
||||
logger.log(` ${theme.dim('Run')} ${theme.command('modelgrid config init')} ${theme.dim('to create one')}`);
|
||||
logger.log('');
|
||||
throw new Error('Configuration not found');
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user