2026-01-30 03:16:57 +00:00
|
|
|
/**
|
|
|
|
|
* ModelGrid Constants
|
|
|
|
|
*
|
|
|
|
|
* Central location for all timeout, interval, and configuration values.
|
|
|
|
|
* This makes configuration easier and code more self-documenting.
|
|
|
|
|
*/
|
|
|
|
|
|
2026-04-20 23:00:50 +00:00
|
|
|
export const VERSION = '1.0.1';
|
|
|
|
|
|
2026-01-30 03:16:57 +00:00
|
|
|
/**
|
|
|
|
|
* Default timing values in milliseconds
|
|
|
|
|
*/
|
|
|
|
|
export const TIMING = {
|
|
|
|
|
/** Default interval between container health checks (30 seconds) */
|
|
|
|
|
CHECK_INTERVAL_MS: 30000,
|
|
|
|
|
|
|
|
|
|
/** Interval for idle monitoring mode (60 seconds) */
|
|
|
|
|
IDLE_CHECK_INTERVAL_MS: 60000,
|
|
|
|
|
|
|
|
|
|
/** Interval for checking config file changes (60 seconds) */
|
|
|
|
|
CONFIG_CHECK_INTERVAL_MS: 60000,
|
|
|
|
|
|
|
|
|
|
/** Interval for logging periodic status updates (5 minutes) */
|
|
|
|
|
LOG_INTERVAL_MS: 5 * 60 * 1000,
|
|
|
|
|
|
|
|
|
|
/** Timeout for GPU driver detection (10 seconds) */
|
|
|
|
|
GPU_DETECTION_TIMEOUT_MS: 10000,
|
|
|
|
|
|
|
|
|
|
/** Timeout for Docker commands (30 seconds) */
|
|
|
|
|
DOCKER_COMMAND_TIMEOUT_MS: 30000,
|
|
|
|
|
|
|
|
|
|
/** Timeout for container startup (2 minutes) */
|
|
|
|
|
CONTAINER_STARTUP_TIMEOUT_MS: 2 * 60 * 1000,
|
|
|
|
|
|
|
|
|
|
/** Timeout for model loading (10 minutes) */
|
|
|
|
|
MODEL_LOAD_TIMEOUT_MS: 10 * 60 * 1000,
|
|
|
|
|
|
|
|
|
|
/** Greenlit model list cache duration (1 hour) */
|
|
|
|
|
GREENLIST_CACHE_DURATION_MS: 60 * 60 * 1000,
|
|
|
|
|
} as const;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* API Server constants
|
|
|
|
|
*/
|
|
|
|
|
export const API_SERVER = {
|
|
|
|
|
/** Default API server port */
|
|
|
|
|
DEFAULT_PORT: 8080,
|
|
|
|
|
|
|
|
|
|
/** Default API server host */
|
|
|
|
|
DEFAULT_HOST: '0.0.0.0',
|
|
|
|
|
|
|
|
|
|
/** Default rate limit (requests per minute) */
|
|
|
|
|
DEFAULT_RATE_LIMIT: 60,
|
|
|
|
|
|
|
|
|
|
/** Request timeout (30 seconds) */
|
|
|
|
|
REQUEST_TIMEOUT_MS: 30000,
|
|
|
|
|
|
|
|
|
|
/** Stream keep-alive interval (15 seconds) */
|
|
|
|
|
STREAM_KEEPALIVE_MS: 15000,
|
|
|
|
|
} as const;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Docker/Container constants
|
|
|
|
|
*/
|
|
|
|
|
export const DOCKER = {
|
|
|
|
|
/** Default Docker network name */
|
|
|
|
|
DEFAULT_NETWORK: 'modelgrid',
|
|
|
|
|
|
|
|
|
|
/** Container health check interval (10 seconds) */
|
|
|
|
|
HEALTH_CHECK_INTERVAL_MS: 10000,
|
|
|
|
|
|
|
|
|
|
/** Container restart delay (5 seconds) */
|
|
|
|
|
RESTART_DELAY_MS: 5000,
|
|
|
|
|
|
|
|
|
|
/** Maximum container restart attempts */
|
|
|
|
|
MAX_RESTART_ATTEMPTS: 3,
|
|
|
|
|
} as const;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* GPU-related constants
|
|
|
|
|
*/
|
|
|
|
|
export const GPU = {
|
|
|
|
|
/** Minimum VRAM for most models (8GB) */
|
|
|
|
|
MIN_VRAM_GB: 8,
|
|
|
|
|
|
|
|
|
|
/** Recommended VRAM for larger models (24GB) */
|
|
|
|
|
RECOMMENDED_VRAM_GB: 24,
|
|
|
|
|
|
|
|
|
|
/** GPU utilization threshold for load balancing (80%) */
|
|
|
|
|
UTILIZATION_THRESHOLD_PERCENT: 80,
|
|
|
|
|
} as const;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Container port mapping defaults
|
|
|
|
|
*/
|
|
|
|
|
export const CONTAINER_PORTS = {
|
|
|
|
|
/** Ollama default port */
|
|
|
|
|
OLLAMA: 11434,
|
|
|
|
|
|
|
|
|
|
/** vLLM default port */
|
|
|
|
|
VLLM: 8000,
|
|
|
|
|
|
|
|
|
|
/** TGI (Text Generation Inference) default port */
|
|
|
|
|
TGI: 8080,
|
|
|
|
|
} as const;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Container image defaults
|
|
|
|
|
*/
|
|
|
|
|
export const CONTAINER_IMAGES = {
|
|
|
|
|
/** vLLM official image */
|
|
|
|
|
VLLM: 'vllm/vllm-openai:latest',
|
|
|
|
|
|
|
|
|
|
/** TGI official image */
|
|
|
|
|
TGI: 'ghcr.io/huggingface/text-generation-inference:latest',
|
|
|
|
|
} as const;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Model registry constants
|
|
|
|
|
*/
|
|
|
|
|
export const MODEL_REGISTRY = {
|
2026-04-20 23:00:50 +00:00
|
|
|
/** Default public catalog URL */
|
|
|
|
|
DEFAULT_CATALOG_URL: 'https://list.modelgrid.com/catalog/models.json',
|
|
|
|
|
|
|
|
|
|
/** Fallback catalog if remote fetch fails */
|
|
|
|
|
FALLBACK_CATALOG: [
|
|
|
|
|
{
|
|
|
|
|
id: 'Qwen/Qwen2.5-7B-Instruct',
|
|
|
|
|
aliases: ['qwen2.5-7b-instruct'],
|
|
|
|
|
engine: 'vllm',
|
|
|
|
|
source: {
|
|
|
|
|
repo: 'Qwen/Qwen2.5-7B-Instruct',
|
|
|
|
|
license: 'apache-2.0',
|
|
|
|
|
},
|
|
|
|
|
capabilities: {
|
|
|
|
|
chat: true,
|
|
|
|
|
completions: true,
|
|
|
|
|
tools: true,
|
|
|
|
|
},
|
|
|
|
|
requirements: {
|
|
|
|
|
minVramGb: 16,
|
|
|
|
|
recommendedVramGb: 24,
|
|
|
|
|
minGpuCount: 1,
|
|
|
|
|
},
|
|
|
|
|
metadata: {
|
|
|
|
|
family: 'Qwen2.5',
|
|
|
|
|
parameterCount: '7B',
|
|
|
|
|
contextWindow: 131072,
|
|
|
|
|
summary: 'General purpose instruct model for chat and tool use.',
|
|
|
|
|
tags: ['chat', 'tool-use', 'instruct'],
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
id: 'meta-llama/Llama-3.1-8B-Instruct',
|
|
|
|
|
aliases: ['llama-3.1-8b-instruct'],
|
|
|
|
|
engine: 'vllm',
|
|
|
|
|
source: {
|
|
|
|
|
repo: 'meta-llama/Llama-3.1-8B-Instruct',
|
|
|
|
|
license: 'llama3.1',
|
|
|
|
|
},
|
|
|
|
|
capabilities: {
|
|
|
|
|
chat: true,
|
|
|
|
|
completions: true,
|
|
|
|
|
tools: true,
|
|
|
|
|
},
|
|
|
|
|
requirements: {
|
|
|
|
|
minVramGb: 18,
|
|
|
|
|
recommendedVramGb: 24,
|
|
|
|
|
minGpuCount: 1,
|
|
|
|
|
},
|
|
|
|
|
metadata: {
|
|
|
|
|
family: 'Llama 3.1',
|
|
|
|
|
parameterCount: '8B',
|
|
|
|
|
contextWindow: 131072,
|
|
|
|
|
summary: 'High quality instruct model with good ecosystem support.',
|
|
|
|
|
tags: ['chat', 'tool-use', 'instruct'],
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
id: 'BAAI/bge-m3',
|
|
|
|
|
aliases: ['bge-m3'],
|
|
|
|
|
engine: 'vllm',
|
|
|
|
|
source: {
|
|
|
|
|
repo: 'BAAI/bge-m3',
|
|
|
|
|
license: 'mit',
|
|
|
|
|
},
|
|
|
|
|
capabilities: {
|
|
|
|
|
embeddings: true,
|
|
|
|
|
},
|
|
|
|
|
requirements: {
|
|
|
|
|
minVramGb: 8,
|
|
|
|
|
recommendedVramGb: 12,
|
|
|
|
|
minGpuCount: 1,
|
|
|
|
|
},
|
|
|
|
|
metadata: {
|
|
|
|
|
family: 'BGE',
|
|
|
|
|
summary: 'Multilingual embedding model for retrieval workloads.',
|
|
|
|
|
tags: ['embeddings', 'retrieval', 'multilingual'],
|
|
|
|
|
},
|
|
|
|
|
},
|
2026-01-30 03:16:57 +00:00
|
|
|
],
|
|
|
|
|
} as const;
|
|
|
|
|
|
2026-04-20 23:00:50 +00:00
|
|
|
export const CLUSTER = {
|
|
|
|
|
DEFAULT_BIND_HOST: '0.0.0.0',
|
|
|
|
|
DEFAULT_GOSSIP_PORT: 7946,
|
|
|
|
|
DEFAULT_HEARTBEAT_INTERVAL_MS: 5000,
|
|
|
|
|
NODE_STALE_AFTER_MS: 20000,
|
|
|
|
|
AUTH_HEADER_NAME: 'x-modelgrid-cluster-secret',
|
|
|
|
|
} as const;
|
|
|
|
|
|
2026-01-30 03:16:57 +00:00
|
|
|
/**
|
|
|
|
|
* Configuration paths
|
|
|
|
|
*/
|
|
|
|
|
export const PATHS = {
|
|
|
|
|
/** Default configuration directory */
|
|
|
|
|
CONFIG_DIR: '/etc/modelgrid',
|
|
|
|
|
|
|
|
|
|
/** Default configuration file */
|
|
|
|
|
CONFIG_FILE: '/etc/modelgrid/config.json',
|
|
|
|
|
|
|
|
|
|
/** Default data directory */
|
|
|
|
|
DATA_DIR: '/var/lib/modelgrid',
|
|
|
|
|
|
|
|
|
|
/** Default log directory */
|
|
|
|
|
LOG_DIR: '/var/log/modelgrid',
|
|
|
|
|
|
|
|
|
|
/** Systemd service file path */
|
|
|
|
|
SYSTEMD_SERVICE: '/etc/systemd/system/modelgrid.service',
|
|
|
|
|
|
|
|
|
|
/** Binary installation path */
|
|
|
|
|
BINARY_PATH: '/usr/local/bin/modelgrid',
|
|
|
|
|
|
|
|
|
|
/** Working directory */
|
|
|
|
|
WORK_DIR: '/opt/modelgrid',
|
|
|
|
|
} as const;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* UI/Display constants
|
|
|
|
|
*/
|
|
|
|
|
export const UI = {
|
|
|
|
|
/** Default width for log boxes */
|
|
|
|
|
DEFAULT_BOX_WIDTH: 50,
|
|
|
|
|
|
|
|
|
|
/** Wide box width for status displays */
|
|
|
|
|
WIDE_BOX_WIDTH: 65,
|
|
|
|
|
|
|
|
|
|
/** Extra wide box width for detailed info */
|
|
|
|
|
EXTRA_WIDE_BOX_WIDTH: 80,
|
|
|
|
|
} as const;
|