initial

2026-01-30 03:16:57 +00:00
commit daaf6559e3
80 changed files with 14430 additions and 0 deletions
--- a/ts/containers/ollama.ts
+++ b/ts/containers/ollama.ts
@@ -0,0 +1,387 @@
+/**
+ * Ollama Container
+ *
+ * Manages Ollama containers for running local LLMs.
+ */
+
+import type {
+  IContainerConfig,
+  ILoadedModel,
+  TContainerType,
+} from '../interfaces/container.ts';
+import type {
+  IChatCompletionRequest,
+  IChatCompletionResponse,
+  IChatCompletionChoice,
+  IChatMessage,
+} from '../interfaces/api.ts';
+import { CONTAINER_IMAGES, CONTAINER_PORTS } from '../constants.ts';
+import { logger } from '../logger.ts';
+import { BaseContainer, type TModelPullProgress } from './base-container.ts';
+
+/**
+ * Ollama API response types
+ */
+interface IOllamaTagsResponse {
+  models: Array<{
+    name: string;
+    size: number;
+    digest: string;
+    modified_at: string;
+  }>;
+}
+
+interface IOllamaChatRequest {
+  model: string;
+  messages: Array<{
+    role: string;
+    content: string;
+  }>;
+  stream?: boolean;
+  options?: {
+    temperature?: number;
+    top_p?: number;
+    num_predict?: number;
+    stop?: string[];
+  };
+}
+
+interface IOllamaChatResponse {
+  model: string;
+  created_at: string;
+  message: {
+    role: string;
+    content: string;
+  };
+  done: boolean;
+  total_duration?: number;
+  load_duration?: number;
+  prompt_eval_count?: number;
+  eval_count?: number;
+}
+
+interface IOllamaPullResponse {
+  status: string;
+  digest?: string;
+  total?: number;
+  completed?: number;
+}
+
+/**
+ * Ollama container implementation
+ */
+export class OllamaContainer extends BaseContainer {
+  public readonly type: TContainerType = 'ollama';
+  public readonly displayName = 'Ollama';
+  public readonly defaultImage = CONTAINER_IMAGES.OLLAMA;
+  public readonly defaultPort = CONTAINER_PORTS.OLLAMA;
+
+  constructor(config: IContainerConfig) {
+    super(config);
+
+    // Set defaults if not provided
+    if (!config.image) {
+      config.image = this.defaultImage;
+    }
+    if (!config.port) {
+      config.port = this.defaultPort;
+    }
+
+    // Add default volume for model storage
+    if (!config.volumes || config.volumes.length === 0) {
+      config.volumes = [`modelgrid-ollama-${config.id}:/root/.ollama`];
+    }
+  }
+
+  /**
+   * Create Ollama container configuration
+   */
+  public static createConfig(
+    id: string,
+    name: string,
+    gpuIds: string[],
+    options: Partial<IContainerConfig> = {},
+  ): IContainerConfig {
+    return {
+      id,
+      name,
+      type: 'ollama',
+      image: options.image || CONTAINER_IMAGES.OLLAMA,
+      gpuIds,
+      port: options.port || CONTAINER_PORTS.OLLAMA,
+      externalPort: options.externalPort,
+      models: options.models || [],
+      env: options.env,
+      volumes: options.volumes || [`modelgrid-ollama-${id}:/root/.ollama`],
+      autoStart: options.autoStart ?? true,
+      restartPolicy: options.restartPolicy || 'unless-stopped',
+      memoryLimit: options.memoryLimit,
+      cpuLimit: options.cpuLimit,
+      command: options.command,
+    };
+  }
+
+  /**
+   * Check if Ollama is healthy
+   */
+  public async isHealthy(): Promise<boolean> {
+    try {
+      const response = await this.fetch('/api/tags', { timeout: 5000 });
+      return response.ok;
+    } catch {
+      return false;
+    }
+  }
+
+  /**
+   * List available models
+   */
+  public async listModels(): Promise<string[]> {
+    try {
+      const data = await this.fetchJson<IOllamaTagsResponse>('/api/tags');
+      return (data.models || []).map((m) => m.name);
+    } catch (error) {
+      logger.warn(`Failed to list Ollama models: ${error instanceof Error ? error.message : String(error)}`);
+      return [];
+    }
+  }
+
+  /**
+   * Get loaded models with details
+   */
+  public async getLoadedModels(): Promise<ILoadedModel[]> {
+    try {
+      const data = await this.fetchJson<IOllamaTagsResponse>('/api/tags');
+      return (data.models || []).map((m) => ({
+        name: m.name,
+        size: m.size,
+        format: m.digest.substring(0, 12),
+        loaded: true, // Ollama doesn't distinguish loaded vs available
+        requestCount: 0,
+      }));
+    } catch {
+      return [];
+    }
+  }
+
+  /**
+   * Pull a model
+   */
+  public async pullModel(modelName: string, onProgress?: TModelPullProgress): Promise<boolean> {
+    try {
+      logger.info(`Pulling model: ${modelName}`);
+
+      const response = await this.fetch('/api/pull', {
+        method: 'POST',
+        body: { name: modelName },
+        timeout: 3600000, // 1 hour for large models
+      });
+
+      if (!response.ok) {
+        throw new Error(`HTTP ${response.status}`);
+      }
+
+      // Read streaming response
+      const reader = response.body?.getReader();
+      if (!reader) {
+        throw new Error('No response body');
+      }
+
+      const decoder = new TextDecoder();
+      let lastStatus = '';
+
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+
+        const text = decoder.decode(value);
+        const lines = text.split('\n').filter((l) => l.trim());
+
+        for (const line of lines) {
+          try {
+            const data = JSON.parse(line) as IOllamaPullResponse;
+            const status = data.status;
+
+            if (status !== lastStatus) {
+              lastStatus = status;
+              let percent: number | undefined;
+
+              if (data.total && data.completed) {
+                percent = Math.round((data.completed / data.total) * 100);
+              }
+
+              if (onProgress) {
+                onProgress({ model: modelName, status, percent });
+              } else {
+                const progressStr = percent !== undefined ? ` (${percent}%)` : '';
+                logger.dim(`  ${status}${progressStr}`);
+              }
+            }
+          } catch {
+            // Invalid JSON line, skip
+          }
+        }
+      }
+
+      logger.success(`Model ${modelName} pulled successfully`);
+      return true;
+    } catch (error) {
+      logger.error(`Failed to pull model ${modelName}: ${error instanceof Error ? error.message : String(error)}`);
+      return false;
+    }
+  }
+
+  /**
+   * Remove a model
+   */
+  public async removeModel(modelName: string): Promise<boolean> {
+    try {
+      const response = await this.fetch('/api/delete', {
+        method: 'DELETE',
+        body: { name: modelName },
+      });
+
+      if (response.ok) {
+        logger.success(`Model ${modelName} removed`);
+        return true;
+      }
+
+      throw new Error(`HTTP ${response.status}`);
+    } catch (error) {
+      logger.error(`Failed to remove model ${modelName}: ${error instanceof Error ? error.message : String(error)}`);
+      return false;
+    }
+  }
+
+  /**
+   * Send a chat completion request
+   */
+  public async chatCompletion(request: IChatCompletionRequest): Promise<IChatCompletionResponse> {
+    const ollamaRequest: IOllamaChatRequest = {
+      model: request.model,
+      messages: request.messages.map((m) => ({
+        role: m.role,
+        content: m.content,
+      })),
+      stream: false,
+      options: {
+        temperature: request.temperature,
+        top_p: request.top_p,
+        num_predict: request.max_tokens,
+        stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined,
+      },
+    };
+
+    const response = await this.fetchJson<IOllamaChatResponse>('/api/chat', {
+      method: 'POST',
+      body: ollamaRequest,
+      timeout: 300000, // 5 minutes
+    });
+
+    // Convert to OpenAI format
+    const created = Math.floor(Date.now() / 1000);
+
+    const choice: IChatCompletionChoice = {
+      index: 0,
+      message: {
+        role: 'assistant',
+        content: response.message.content,
+      },
+      finish_reason: response.done ? 'stop' : null,
+    };
+
+    return {
+      id: this.generateRequestId(),
+      object: 'chat.completion',
+      created,
+      model: request.model,
+      choices: [choice],
+      usage: {
+        prompt_tokens: response.prompt_eval_count || 0,
+        completion_tokens: response.eval_count || 0,
+        total_tokens: (response.prompt_eval_count || 0) + (response.eval_count || 0),
+      },
+    };
+  }
+
+  /**
+   * Stream a chat completion request
+   */
+  public async chatCompletionStream(
+    request: IChatCompletionRequest,
+    onChunk: (chunk: string) => void,
+  ): Promise<void> {
+    const ollamaRequest: IOllamaChatRequest = {
+      model: request.model,
+      messages: request.messages.map((m) => ({
+        role: m.role,
+        content: m.content,
+      })),
+      stream: true,
+      options: {
+        temperature: request.temperature,
+        top_p: request.top_p,
+        num_predict: request.max_tokens,
+        stop: Array.isArray(request.stop) ? request.stop : request.stop ? [request.stop] : undefined,
+      },
+    };
+
+    const response = await this.fetch('/api/chat', {
+      method: 'POST',
+      body: ollamaRequest,
+      timeout: 300000,
+    });
+
+    if (!response.ok) {
+      throw new Error(`HTTP ${response.status}`);
+    }
+
+    const reader = response.body?.getReader();
+    if (!reader) {
+      throw new Error('No response body');
+    }
+
+    const decoder = new TextDecoder();
+    const requestId = this.generateRequestId();
+    const created = Math.floor(Date.now() / 1000);
+
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+
+      const text = decoder.decode(value);
+      const lines = text.split('\n').filter((l) => l.trim());
+
+      for (const line of lines) {
+        try {
+          const data = JSON.parse(line) as IOllamaChatResponse;
+
+          // Convert to OpenAI streaming format
+          const chunk = {
+            id: requestId,
+            object: 'chat.completion.chunk',
+            created,
+            model: request.model,
+            choices: [
+              {
+                index: 0,
+                delta: {
+                  content: data.message.content,
+                } as Partial<IChatMessage>,
+                finish_reason: data.done ? 'stop' : null,
+              },
+            ],
+          };
+
+          onChunk(`data: ${JSON.stringify(chunk)}\n\n`);
+
+          if (data.done) {
+            onChunk('data: [DONE]\n\n');
+          }
+        } catch {
+          // Invalid JSON, skip
+        }
+      }
+    }
+  }
+}