modelgrid/ts/constants.ts

/**
 * ModelGrid Constants
 *
 * Central location for all timeout, interval, and configuration values.
 * This makes configuration easier and code more self-documenting.
 */

export const VERSION = '1.0.1';

/**
 * Default timing values in milliseconds
 */
export const TIMING = {
  /** Default interval between container health checks (30 seconds) */
  CHECK_INTERVAL_MS: 30000,

  /** Interval for idle monitoring mode (60 seconds) */
  IDLE_CHECK_INTERVAL_MS: 60000,

  /** Interval for checking config file changes (60 seconds) */
  CONFIG_CHECK_INTERVAL_MS: 60000,

  /** Interval for logging periodic status updates (5 minutes) */
  LOG_INTERVAL_MS: 5 * 60 * 1000,

  /** Timeout for GPU driver detection (10 seconds) */
  GPU_DETECTION_TIMEOUT_MS: 10000,

  /** Timeout for Docker commands (30 seconds) */
  DOCKER_COMMAND_TIMEOUT_MS: 30000,

  /** Timeout for container startup (2 minutes) */
  CONTAINER_STARTUP_TIMEOUT_MS: 2 * 60 * 1000,

  /** Timeout for model loading (10 minutes) */
  MODEL_LOAD_TIMEOUT_MS: 10 * 60 * 1000,

  /** Greenlit model list cache duration (1 hour) */
  GREENLIST_CACHE_DURATION_MS: 60 * 60 * 1000,
} as const;

/**
 * API Server constants
 */
export const API_SERVER = {
  /** Default API server port */
  DEFAULT_PORT: 8080,

  /** Default API server host */
  DEFAULT_HOST: '0.0.0.0',

  /** Default rate limit (requests per minute) */
  DEFAULT_RATE_LIMIT: 60,

  /** Request timeout (30 seconds) */
  REQUEST_TIMEOUT_MS: 30000,

  /** Stream keep-alive interval (15 seconds) */
  STREAM_KEEPALIVE_MS: 15000,
} as const;

/**
 * Docker/Container constants
 */
export const DOCKER = {
  /** Default Docker network name */
  DEFAULT_NETWORK: 'modelgrid',

  /** Container health check interval (10 seconds) */
  HEALTH_CHECK_INTERVAL_MS: 10000,

  /** Container restart delay (5 seconds) */
  RESTART_DELAY_MS: 5000,

  /** Maximum container restart attempts */
  MAX_RESTART_ATTEMPTS: 3,
} as const;

/**
 * GPU-related constants
 */
export const GPU = {
  /** Minimum VRAM for most models (8GB) */
  MIN_VRAM_GB: 8,

  /** Recommended VRAM for larger models (24GB) */
  RECOMMENDED_VRAM_GB: 24,

  /** GPU utilization threshold for load balancing (80%) */
  UTILIZATION_THRESHOLD_PERCENT: 80,
} as const;

/**
 * Container port mapping defaults
 */
export const CONTAINER_PORTS = {
  /** Ollama default port */
  OLLAMA: 11434,

  /** vLLM default port */
  VLLM: 8000,

  /** TGI (Text Generation Inference) default port */
  TGI: 8080,
} as const;

/**
 * Container image defaults
 */
export const CONTAINER_IMAGES = {
  /** vLLM official image */
  VLLM: 'vllm/vllm-openai:latest',

  /** TGI official image */
  TGI: 'ghcr.io/huggingface/text-generation-inference:latest',
} as const;

/**
 * Model registry constants
 */
export const MODEL_REGISTRY = {
  /** Default public catalog URL */
  DEFAULT_CATALOG_URL: 'https://list.modelgrid.com/catalog/models.json',

  /** Fallback catalog if remote fetch fails */
  FALLBACK_CATALOG: [
    {
      id: 'Qwen/Qwen2.5-7B-Instruct',
      aliases: ['qwen2.5-7b-instruct'],
      engine: 'vllm',
      source: {
        repo: 'Qwen/Qwen2.5-7B-Instruct',
        license: 'apache-2.0',
      },
      capabilities: {
        chat: true,
        completions: true,
        tools: true,
      },
      requirements: {
        minVramGb: 16,
        recommendedVramGb: 24,
        minGpuCount: 1,
      },
      metadata: {
        family: 'Qwen2.5',
        parameterCount: '7B',
        contextWindow: 131072,
        summary: 'General purpose instruct model for chat and tool use.',
        tags: ['chat', 'tool-use', 'instruct'],
      },
    },
    {
      id: 'meta-llama/Llama-3.1-8B-Instruct',
      aliases: ['llama-3.1-8b-instruct'],
      engine: 'vllm',
      source: {
        repo: 'meta-llama/Llama-3.1-8B-Instruct',
        license: 'llama3.1',
      },
      capabilities: {
        chat: true,
        completions: true,
        tools: true,
      },
      requirements: {
        minVramGb: 18,
        recommendedVramGb: 24,
        minGpuCount: 1,
      },
      metadata: {
        family: 'Llama 3.1',
        parameterCount: '8B',
        contextWindow: 131072,
        summary: 'High quality instruct model with good ecosystem support.',
        tags: ['chat', 'tool-use', 'instruct'],
      },
    },
    {
      id: 'BAAI/bge-m3',
      aliases: ['bge-m3'],
      engine: 'vllm',
      source: {
        repo: 'BAAI/bge-m3',
        license: 'mit',
      },
      capabilities: {
        embeddings: true,
      },
      requirements: {
        minVramGb: 8,
        recommendedVramGb: 12,
        minGpuCount: 1,
      },
      metadata: {
        family: 'BGE',
        summary: 'Multilingual embedding model for retrieval workloads.',
        tags: ['embeddings', 'retrieval', 'multilingual'],
      },
    },
  ],
} as const;

export const CLUSTER = {
  DEFAULT_BIND_HOST: '0.0.0.0',
  DEFAULT_GOSSIP_PORT: 7946,
  DEFAULT_HEARTBEAT_INTERVAL_MS: 5000,
  NODE_STALE_AFTER_MS: 20000,
  AUTH_HEADER_NAME: 'x-modelgrid-cluster-secret',
} as const;

/**
 * Configuration paths
 */
export const PATHS = {
  /** Default configuration directory */
  CONFIG_DIR: '/etc/modelgrid',

  /** Default configuration file */
  CONFIG_FILE: '/etc/modelgrid/config.json',

  /** Default data directory */
  DATA_DIR: '/var/lib/modelgrid',

  /** Default log directory */
  LOG_DIR: '/var/log/modelgrid',

  /** Systemd service file path */
  SYSTEMD_SERVICE: '/etc/systemd/system/modelgrid.service',

  /** Binary installation path */
  BINARY_PATH: '/usr/local/bin/modelgrid',

  /** Working directory */
  WORK_DIR: '/opt/modelgrid',
} as const;

/**
 * UI/Display constants
 */
export const UI = {
  /** Default width for log boxes */
  DEFAULT_BOX_WIDTH: 50,

  /** Wide box width for status displays */
  WIDE_BOX_WIDTH: 65,

  /** Extra wide box width for detailed info */
  EXTRA_WIDE_BOX_WIDTH: 80,
} as const;