initial

2026-01-30 03:16:57 +00:00
commit daaf6559e3
80 changed files with 14430 additions and 0 deletions
--- a/ts/interfaces/api.ts
+++ b/ts/interfaces/api.ts
@@ -0,0 +1,329 @@
+/**
+ * ModelGrid API Interfaces
+ *
+ * OpenAI-compatible API types for the ModelGrid gateway.
+ */
+
+/**
+ * Chat message role
+ */
+export type TChatRole = 'system' | 'user' | 'assistant' | 'tool';
+
+/**
+ * Chat message
+ */
+export interface IChatMessage {
+  /** Message role */
+  role: TChatRole;
+  /** Message content */
+  content: string;
+  /** Name of the participant (optional) */
+  name?: string;
+  /** Tool calls made by the assistant (optional) */
+  tool_calls?: IToolCall[];
+  /** Tool call ID (for tool response messages) */
+  tool_call_id?: string;
+}
+
+/**
+ * Tool call from assistant
+ */
+export interface IToolCall {
+  /** Unique ID for this tool call */
+  id: string;
+  /** Type of tool call */
+  type: 'function';
+  /** Function call details */
+  function: {
+    /** Function name */
+    name: string;
+    /** Function arguments as JSON string */
+    arguments: string;
+  };
+}
+
+/**
+ * Tool definition for function calling
+ */
+export interface ITool {
+  /** Tool type */
+  type: 'function';
+  /** Function definition */
+  function: {
+    /** Function name */
+    name: string;
+    /** Function description */
+    description: string;
+    /** Function parameters (JSON Schema) */
+    parameters: Record<string, unknown>;
+  };
+}
+
+/**
+ * Chat completion request (OpenAI-compatible)
+ */
+export interface IChatCompletionRequest {
+  /** Model to use */
+  model: string;
+  /** Messages in the conversation */
+  messages: IChatMessage[];
+  /** Maximum tokens to generate */
+  max_tokens?: number;
+  /** Sampling temperature (0-2) */
+  temperature?: number;
+  /** Top-p sampling */
+  top_p?: number;
+  /** Number of completions to generate */
+  n?: number;
+  /** Whether to stream the response */
+  stream?: boolean;
+  /** Stop sequences */
+  stop?: string | string[];
+  /** Presence penalty (-2 to 2) */
+  presence_penalty?: number;
+  /** Frequency penalty (-2 to 2) */
+  frequency_penalty?: number;
+  /** User identifier */
+  user?: string;
+  /** Tools available for function calling */
+  tools?: ITool[];
+  /** Tool choice preference */
+  tool_choice?: 'none' | 'auto' | { type: 'function'; function: { name: string } };
+}
+
+/**
+ * Chat completion choice
+ */
+export interface IChatCompletionChoice {
+  /** Choice index */
+  index: number;
+  /** Generated message */
+  message: IChatMessage;
+  /** Finish reason */
+  finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | null;
+}
+
+/**
+ * Token usage information
+ */
+export interface IUsage {
+  /** Number of tokens in the prompt */
+  prompt_tokens: number;
+  /** Number of tokens in the completion */
+  completion_tokens: number;
+  /** Total tokens used */
+  total_tokens: number;
+}
+
+/**
+ * Chat completion response (OpenAI-compatible)
+ */
+export interface IChatCompletionResponse {
+  /** Unique ID for this completion */
+  id: string;
+  /** Object type */
+  object: 'chat.completion';
+  /** Creation timestamp */
+  created: number;
+  /** Model used */
+  model: string;
+  /** System fingerprint */
+  system_fingerprint?: string;
+  /** Generated choices */
+  choices: IChatCompletionChoice[];
+  /** Token usage */
+  usage: IUsage;
+}
+
+/**
+ * Chat completion chunk for streaming
+ */
+export interface IChatCompletionChunk {
+  /** Unique ID for this completion */
+  id: string;
+  /** Object type */
+  object: 'chat.completion.chunk';
+  /** Creation timestamp */
+  created: number;
+  /** Model used */
+  model: string;
+  /** System fingerprint */
+  system_fingerprint?: string;
+  /** Delta choices */
+  choices: IChatCompletionChunkChoice[];
+}
+
+/**
+ * Streaming choice delta
+ */
+export interface IChatCompletionChunkChoice {
+  /** Choice index */
+  index: number;
+  /** Delta content */
+  delta: Partial<IChatMessage>;
+  /** Finish reason */
+  finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | null;
+}
+
+/**
+ * Text completion request (legacy endpoint)
+ */
+export interface ICompletionRequest {
+  /** Model to use */
+  model: string;
+  /** Prompt text */
+  prompt: string | string[];
+  /** Maximum tokens to generate */
+  max_tokens?: number;
+  /** Sampling temperature */
+  temperature?: number;
+  /** Top-p sampling */
+  top_p?: number;
+  /** Number of completions */
+  n?: number;
+  /** Whether to stream */
+  stream?: boolean;
+  /** Stop sequences */
+  stop?: string | string[];
+  /** Echo prompt in response */
+  echo?: boolean;
+}
+
+/**
+ * Text completion response
+ */
+export interface ICompletionResponse {
+  /** Unique ID */
+  id: string;
+  /** Object type */
+  object: 'text_completion';
+  /** Creation timestamp */
+  created: number;
+  /** Model used */
+  model: string;
+  /** Generated choices */
+  choices: ICompletionChoice[];
+  /** Token usage */
+  usage: IUsage;
+}
+
+/**
+ * Text completion choice
+ */
+export interface ICompletionChoice {
+  /** Generated text */
+  text: string;
+  /** Choice index */
+  index: number;
+  /** Finish reason */
+  finish_reason: 'stop' | 'length' | null;
+}
+
+/**
+ * Embeddings request
+ */
+export interface IEmbeddingsRequest {
+  /** Model to use */
+  model: string;
+  /** Input text(s) */
+  input: string | string[];
+  /** User identifier */
+  user?: string;
+  /** Encoding format */
+  encoding_format?: 'float' | 'base64';
+}
+
+/**
+ * Embeddings response
+ */
+export interface IEmbeddingsResponse {
+  /** Object type */
+  object: 'list';
+  /** Embedding data */
+  data: IEmbeddingData[];
+  /** Model used */
+  model: string;
+  /** Token usage */
+  usage: {
+    prompt_tokens: number;
+    total_tokens: number;
+  };
+}
+
+/**
+ * Single embedding data
+ */
+export interface IEmbeddingData {
+  /** Object type */
+  object: 'embedding';
+  /** Embedding vector */
+  embedding: number[];
+  /** Index in the input array */
+  index: number;
+}
+
+/**
+ * Model information (OpenAI-compatible)
+ */
+export interface IModelInfo {
+  /** Model ID */
+  id: string;
+  /** Object type */
+  object: 'model';
+  /** Creation timestamp */
+  created: number;
+  /** Model owner/organization */
+  owned_by: string;
+}
+
+/**
+ * List models response
+ */
+export interface IListModelsResponse {
+  /** Object type */
+  object: 'list';
+  /** Available models */
+  data: IModelInfo[];
+}
+
+/**
+ * API error response
+ */
+export interface IApiError {
+  /** Error details */
+  error: {
+    /** Error message */
+    message: string;
+    /** Error type */
+    type: string;
+    /** Parameter that caused the error */
+    param?: string;
+    /** Error code */
+    code?: string;
+  };
+}
+
+/**
+ * Health check response
+ */
+export interface IHealthResponse {
+  /** Status */
+  status: 'ok' | 'degraded' | 'error';
+  /** Version */
+  version: string;
+  /** Uptime in seconds */
+  uptime: number;
+  /** Number of active containers */
+  containers: number;
+  /** Number of available models */
+  models: number;
+  /** Number of available GPUs */
+  gpus: number;
+  /** Detailed status */
+  details?: {
+    /** Container health */
+    containers: Record<string, 'healthy' | 'unhealthy'>;
+    /** GPU status */
+    gpus: Record<string, 'available' | 'in_use' | 'error'>;
+  };
+}
--- a/ts/interfaces/config.ts
+++ b/ts/interfaces/config.ts
@@ -0,0 +1,121 @@
+/**
+ * ModelGrid Configuration Interfaces
+ *
+ * Defines the configuration structure for the ModelGrid daemon.
+ */
+
+import type { IContainerConfig } from './container.ts';
+
+/**
+ * API server configuration
+ */
+export interface IApiConfig {
+  /** Port to listen on (default: 8080) */
+  port: number;
+  /** Host to bind to (default: '0.0.0.0') */
+  host: string;
+  /** Valid API keys for authentication */
+  apiKeys: string[];
+  /** Rate limit in requests per minute (optional) */
+  rateLimit?: number;
+  /** Enable CORS (default: false) */
+  cors?: boolean;
+  /** Allowed origins for CORS */
+  corsOrigins?: string[];
+}
+
+/**
+ * Docker/container runtime configuration
+ */
+export interface IDockerConfig {
+  /** Docker network name (default: 'modelgrid') */
+  networkName: string;
+  /** Container runtime to use */
+  runtime: 'docker' | 'podman';
+  /** Path to docker/podman socket (optional) */
+  socketPath?: string;
+}
+
+/**
+ * GPU assignment configuration
+ */
+export interface IGpuAssignmentConfig {
+  /** Whether to auto-detect GPUs */
+  autoDetect: boolean;
+  /** Manual GPU to container assignments (gpuId -> containerId) */
+  assignments: Record<string, string>;
+}
+
+/**
+ * Model management configuration
+ */
+export interface IModelConfig {
+  /** URL to fetch greenlit models list */
+  greenlistUrl: string;
+  /** Whether to auto-pull models when requested */
+  autoPull: boolean;
+  /** Default container type for new models */
+  defaultContainer: 'ollama' | 'vllm' | 'tgi';
+  /** Models to auto-load on startup */
+  autoLoad: string[];
+}
+
+/**
+ * Main ModelGrid configuration interface
+ */
+export interface IModelGridConfig {
+  /** Configuration format version */
+  version: string;
+  /** API server configuration */
+  api: IApiConfig;
+  /** Docker configuration */
+  docker: IDockerConfig;
+  /** GPU configuration */
+  gpus: IGpuAssignmentConfig;
+  /** Container configurations */
+  containers: IContainerConfig[];
+  /** Model management configuration */
+  models: IModelConfig;
+  /** Health check interval in milliseconds */
+  checkInterval: number;
+}
+
+/**
+ * Greenlit model entry from remote list
+ */
+export interface IGreenlitModel {
+  /** Model name (e.g., "llama3:8b") */
+  name: string;
+  /** Preferred container type */
+  container: 'ollama' | 'vllm' | 'tgi';
+  /** Minimum VRAM required in GB */
+  minVram: number;
+  /** Optional tags for categorization */
+  tags?: string[];
+  /** Optional description */
+  description?: string;
+}
+
+/**
+ * Greenlit models list structure
+ */
+export interface IGreenlitModelsList {
+  /** List version */
+  version: string;
+  /** Last updated timestamp */
+  lastUpdated: string;
+  /** List of greenlit models */
+  models: IGreenlitModel[];
+}
+
+/**
+ * Update status information
+ */
+export interface IUpdateStatus {
+  /** Current installed version */
+  currentVersion: string;
+  /** Latest available version */
+  latestVersion: string;
+  /** Whether an update is available */
+  updateAvailable: boolean;
+}
--- a/ts/interfaces/container.ts
+++ b/ts/interfaces/container.ts
@@ -0,0 +1,176 @@
+/**
+ * ModelGrid Container Interfaces
+ *
+ * Defines types for container management (Ollama, vLLM, TGI).
+ */
+
+/**
+ * Container type
+ */
+export type TContainerType = 'ollama' | 'vllm' | 'tgi' | 'custom';
+
+/**
+ * Container health status
+ */
+export type TContainerHealth = 'healthy' | 'unhealthy' | 'starting' | 'unknown';
+
+/**
+ * Container run status
+ */
+export type TContainerRunStatus = 'running' | 'stopped' | 'starting' | 'stopping' | 'error';
+
+/**
+ * Container configuration
+ */
+export interface IContainerConfig {
+  /** Unique identifier for this container */
+  id: string;
+  /** Container type */
+  type: TContainerType;
+  /** Friendly name for the container */
+  name: string;
+  /** Docker image to use */
+  image: string;
+  /** GPU IDs to assign to this container */
+  gpuIds: string[];
+  /** Internal port the container listens on */
+  port: number;
+  /** External port to expose (optional, uses internal port if not specified) */
+  externalPort?: number;
+  /** Models to pre-load in this container */
+  models: string[];
+  /** Environment variables */
+  env?: Record<string, string>;
+  /** Volume mounts (host:container format) */
+  volumes?: string[];
+  /** Whether to auto-start this container */
+  autoStart: boolean;
+  /** Restart policy */
+  restartPolicy: 'no' | 'always' | 'on-failure' | 'unless-stopped';
+  /** Maximum restart attempts (for on-failure policy) */
+  maxRestarts?: number;
+  /** Memory limit (e.g., "16g") */
+  memoryLimit?: string;
+  /** CPU limit (e.g., "4") */
+  cpuLimit?: string;
+  /** Custom command arguments */
+  command?: string[];
+}
+
+/**
+ * Container status information
+ */
+export interface IContainerStatus {
+  /** Container ID */
+  id: string;
+  /** Docker container ID */
+  dockerId?: string;
+  /** Container name */
+  name: string;
+  /** Container type */
+  type: TContainerType;
+  /** Whether the container is running */
+  running: boolean;
+  /** Run status */
+  runStatus: TContainerRunStatus;
+  /** Health status */
+  health: TContainerHealth;
+  /** Health check message */
+  healthMessage?: string;
+  /** GPU utilization (if assigned) */
+  gpuUtilization?: number;
+  /** Memory usage in MB */
+  memoryUsage?: number;
+  /** CPU usage percentage */
+  cpuUsage?: number;
+  /** List of currently loaded models */
+  loadedModels: string[];
+  /** Container uptime in seconds */
+  uptime?: number;
+  /** Container start time */
+  startTime?: number;
+  /** Number of requests served */
+  requestsServed?: number;
+  /** Last error message (if any) */
+  lastError?: string;
+  /** Assigned GPU IDs */
+  assignedGpus: string[];
+  /** Internal endpoint URL */
+  endpoint: string;
+}
+
+/**
+ * Model loaded in a container
+ */
+export interface ILoadedModel {
+  /** Model name */
+  name: string;
+  /** Model size in bytes */
+  size: number;
+  /** Model format/quantization */
+  format?: string;
+  /** Whether the model is currently loaded in memory */
+  loaded: boolean;
+  /** Last used timestamp */
+  lastUsed?: number;
+  /** Number of requests served by this model */
+  requestCount: number;
+}
+
+/**
+ * Container endpoint for API routing
+ */
+export interface IContainerEndpoint {
+  /** Container ID */
+  containerId: string;
+  /** Container type */
+  type: TContainerType;
+  /** Endpoint URL */
+  url: string;
+  /** List of models available at this endpoint */
+  models: string[];
+  /** Whether the endpoint is healthy */
+  healthy: boolean;
+  /** Priority for load balancing (lower = higher priority) */
+  priority: number;
+}
+
+/**
+ * Container creation options
+ */
+export interface IContainerCreateOptions {
+  /** Container type */
+  type: TContainerType;
+  /** Friendly name */
+  name: string;
+  /** GPU IDs to assign */
+  gpuIds: string[];
+  /** Models to pre-load */
+  models?: string[];
+  /** Custom image (optional, uses default for type) */
+  image?: string;
+  /** Custom port (optional, uses default for type) */
+  port?: number;
+  /** Environment variables */
+  env?: Record<string, string>;
+  /** Volume mounts */
+  volumes?: string[];
+  /** Auto-start on daemon startup */
+  autoStart?: boolean;
+}
+
+/**
+ * Container logs options
+ */
+export interface IContainerLogsOptions {
+  /** Container ID */
+  containerId: string;
+  /** Number of lines to return (default: 100) */
+  lines?: number;
+  /** Follow logs in real-time */
+  follow?: boolean;
+  /** Include timestamps */
+  timestamps?: boolean;
+  /** Filter by log level */
+  level?: 'all' | 'error' | 'warn' | 'info' | 'debug';
+}
--- a/ts/interfaces/gpu.ts
+++ b/ts/interfaces/gpu.ts
@@ -0,0 +1,132 @@
+/**
+ * ModelGrid GPU Interfaces
+ *
+ * Defines types for GPU detection and management.
+ */
+
+/**
+ * GPU vendor type
+ */
+export type TGpuVendor = 'nvidia' | 'amd' | 'intel' | 'unknown';
+
+/**
+ * GPU information detected from the system
+ */
+export interface IGpuInfo {
+  /** Unique identifier for this GPU */
+  id: string;
+  /** GPU vendor */
+  vendor: TGpuVendor;
+  /** GPU model name (e.g., "NVIDIA GeForce RTX 4090") */
+  model: string;
+  /** Total VRAM in MB */
+  vram: number;
+  /** Driver version (if available) */
+  driverVersion?: string;
+  /** CUDA version (NVIDIA only) */
+  cudaVersion?: string;
+  /** Compute capability (NVIDIA only, e.g., "8.9") */
+  computeCapability?: string;
+  /** ROCm version (AMD only) */
+  rocmVersion?: string;
+  /** oneAPI version (Intel only) */
+  oneApiVersion?: string;
+  /** PCI slot identifier */
+  pciSlot: string;
+  /** PCI bus ID (e.g., "0000:01:00.0") */
+  pciBusId?: string;
+  /** GPU index in the system */
+  index: number;
+}
+
+/**
+ * Real-time GPU status
+ */
+export interface IGpuStatus {
+  /** GPU identifier */
+  id: string;
+  /** Current GPU utilization percentage (0-100) */
+  utilization: number;
+  /** Current memory usage in MB */
+  memoryUsed: number;
+  /** Total memory in MB */
+  memoryTotal: number;
+  /** Memory usage percentage */
+  memoryPercent: number;
+  /** Current temperature in Celsius */
+  temperature: number;
+  /** Current power usage in Watts */
+  powerUsage: number;
+  /** Power limit in Watts */
+  powerLimit: number;
+  /** Fan speed percentage (if available) */
+  fanSpeed?: number;
+  /** GPU clock speed in MHz */
+  gpuClock?: number;
+  /** Memory clock speed in MHz */
+  memoryClock?: number;
+  /** Last update timestamp */
+  lastUpdate: number;
+}
+
+/**
+ * Combined GPU information and status
+ */
+export interface IGpuFullStatus extends IGpuInfo {
+  /** Real-time status */
+  status: IGpuStatus;
+  /** Container ID assigned to this GPU (if any) */
+  assignedContainer?: string;
+  /** Whether the GPU is available for use */
+  available: boolean;
+  /** Health status */
+  health: 'healthy' | 'warning' | 'error' | 'unknown';
+  /** Health message (if warning or error) */
+  healthMessage?: string;
+}
+
+/**
+ * System information including all GPUs
+ */
+export interface ISystemInfo {
+  /** System hostname */
+  hostname: string;
+  /** CPU model name */
+  cpuModel: string;
+  /** Number of CPU cores */
+  cpuCores: number;
+  /** Total RAM in MB */
+  ramTotal: number;
+  /** Available RAM in MB */
+  ramAvailable: number;
+  /** Operating system */
+  os: string;
+  /** Kernel version */
+  kernelVersion: string;
+  /** List of detected GPUs */
+  gpus: IGpuInfo[];
+  /** Docker version (if installed) */
+  dockerVersion?: string;
+  /** NVIDIA Container Toolkit version (if installed) */
+  nvidiaContainerVersion?: string;
+}
+
+/**
+ * Driver status for a vendor
+ */
+export interface IDriverStatus {
+  /** GPU vendor */
+  vendor: TGpuVendor;
+  /** Whether the driver is installed */
+  installed: boolean;
+  /** Driver version (if installed) */
+  version?: string;
+  /** CUDA/ROCm/oneAPI toolkit version (if installed) */
+  toolkitVersion?: string;
+  /** Container runtime support (e.g., nvidia-docker) */
+  containerSupport: boolean;
+  /** Container runtime version */
+  containerRuntimeVersion?: string;
+  /** List of detected issues */
+  issues: string[];
+}
--- a/ts/interfaces/index.ts
+++ b/ts/interfaces/index.ts
@@ -0,0 +1,11 @@
+/**
+ * ModelGrid Interfaces
+ *
+ * Central export for all TypeScript interfaces used throughout ModelGrid.
+ */
+
+export * from './config.ts';
+export * from './gpu.ts';
+export * from './container.ts';
+export * from './api.ts';
+export * from './modelgrid-accessor.ts';
--- a/ts/interfaces/modelgrid-accessor.ts
+++ b/ts/interfaces/modelgrid-accessor.ts
@@ -0,0 +1,31 @@
+/**
+ * ModelGrid Accessor Interface
+ *
+ * Interface to break circular dependencies between ModelGrid and its submodules.
+ */
+
+import type { IUpdateStatus } from './config.ts';
+
+/**
+ * Interface for accessing ModelGrid instance from submodules
+ * This breaks the circular dependency between ModelGrid and its managers
+ */
+export interface IModelGridAccessor {
+  /**
+   * Get the current version of ModelGrid
+   * @returns The current version string
+   */
+  getVersion(): string;
+
+  /**
+   * Get the update status
+   * @returns Object with current version, latest version, and update availability
+   */
+  getUpdateStatus(): IUpdateStatus;
+
+  /**
+   * Check for updates
+   * @returns Promise resolving to true if an update is available
+   */
+  checkForUpdates(): Promise<boolean>;
+}