feat(OllamaProvider): add model options, streaming support, and thinking tokens

- Add IOllamaModelOptions interface for runtime options (num_ctx, temperature, etc.) - Extend IOllamaProviderOptions with defaultOptions and defaultTimeout - Add IOllamaChatOptions for per-request overrides - Add IOllamaStreamChunk and IOllamaChatResponse interfaces - Add chatStreamResponse() for async iteration with options - Add collectStreamResponse() for streaming with progress callback - Add chatWithOptions() for non-streaming with full options - Update chat() to use defaultOptions and defaultTimeout
2026-01-20 00:02:45 +00:00
parent a556053510
commit 126e9b239b
12 changed files with 320 additions and 74 deletions
--- a/ts/provider.ollama.ts
+++ b/ts/provider.ollama.ts
@@ -12,10 +12,60 @@ import type {
  ImageResponse
 } from './abstract.classes.multimodal.js';

+/**
+ * Ollama model runtime options
+ * @see https://github.com/ollama/ollama/blob/main/docs/modelfile.md
+ */
+export interface IOllamaModelOptions {
+  num_ctx?: number;       // Context window (default: 2048)
+  temperature?: number;   // 0 = deterministic (default: 0.8)
+  top_k?: number;         // Top-k sampling (default: 40)
+  top_p?: number;         // Nucleus sampling (default: 0.9)
+  repeat_penalty?: number;// Repeat penalty (default: 1.1)
+  num_predict?: number;   // Max tokens to predict
+  stop?: string[];        // Stop sequences
+  seed?: number;          // Random seed for reproducibility
+}
+
 export interface IOllamaProviderOptions {
  baseUrl?: string;
  model?: string;
  visionModel?: string; // Model to use for vision tasks (e.g. 'llava')
+  defaultOptions?: IOllamaModelOptions;  // Default model options
+  defaultTimeout?: number;               // Default timeout in ms (default: 120000)
+}
+
+/**
+ * Extended chat options with Ollama-specific settings
+ */
+export interface IOllamaChatOptions extends ChatOptions {
+  options?: IOllamaModelOptions;  // Per-request model options
+  timeout?: number;               // Per-request timeout in ms
+  model?: string;                 // Per-request model override
+}
+
+/**
+ * Chunk emitted during streaming
+ */
+export interface IOllamaStreamChunk {
+  content: string;
+  thinking?: string;  // For models with extended thinking
+  done: boolean;
+  stats?: {
+    totalDuration?: number;
+    evalCount?: number;
+  };
+}
+
+/**
+ * Extended chat response with Ollama-specific fields
+ */
+export interface IOllamaChatResponse extends ChatResponse {
+  thinking?: string;
+  stats?: {
+    totalDuration?: number;
+    evalCount?: number;
+  };
 }

 export class OllamaProvider extends MultiModalModel {
@@ -23,6 +73,8 @@ export class OllamaProvider extends MultiModalModel {
  private baseUrl: string;
  private model: string;
  private visionModel: string;
+  private defaultOptions: IOllamaModelOptions;
+  private defaultTimeout: number;

  constructor(optionsArg: IOllamaProviderOptions = {}) {
    super();
@@ -30,6 +82,8 @@ export class OllamaProvider extends MultiModalModel {
    this.baseUrl = optionsArg.baseUrl || 'http://localhost:11434';
    this.model = optionsArg.model || 'llama2';
    this.visionModel = optionsArg.visionModel || 'llava';
+    this.defaultOptions = optionsArg.defaultOptions || {};
+    this.defaultTimeout = optionsArg.defaultTimeout || 120000;
  }

  async start() {
@@ -154,7 +208,7 @@ export class OllamaProvider extends MultiModalModel {
      { role: 'user', content: optionsArg.userMessage }
    ];

-    // Make API call to Ollama
+    // Make API call to Ollama with defaultOptions and timeout
    const response = await fetch(`${this.baseUrl}/api/chat`, {
      method: 'POST',
      headers: {
@@ -163,8 +217,10 @@ export class OllamaProvider extends MultiModalModel {
      body: JSON.stringify({
        model: this.model,
        messages: messages,
-        stream: false
+        stream: false,
+        options: this.defaultOptions,
      }),
+      signal: AbortSignal.timeout(this.defaultTimeout),
    });

    if (!response.ok) {
@@ -172,13 +228,150 @@ export class OllamaProvider extends MultiModalModel {
    }

    const result = await response.json();
-    
+
    return {
      role: 'assistant' as const,
      message: result.message.content,
    };
  }

+  /**
+   * Streaming chat with async iteration and options support
+   */
+  public async chatStreamResponse(
+    optionsArg: IOllamaChatOptions
+  ): Promise<AsyncIterable<IOllamaStreamChunk>> {
+    const model = optionsArg.model || this.model;
+    const timeout = optionsArg.timeout || this.defaultTimeout;
+    const modelOptions = { ...this.defaultOptions, ...optionsArg.options };
+
+    const messages = [
+      { role: 'system', content: optionsArg.systemMessage },
+      ...optionsArg.messageHistory,
+      { role: 'user', content: optionsArg.userMessage }
+    ];
+
+    const response = await fetch(`${this.baseUrl}/api/chat`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        model,
+        messages,
+        stream: true,
+        options: modelOptions,
+      }),
+      signal: AbortSignal.timeout(timeout),
+    });
+
+    if (!response.ok) {
+      throw new Error(`Ollama API error: ${response.status}`);
+    }
+
+    const reader = response.body!.getReader();
+    const decoder = new TextDecoder();
+
+    return {
+      [Symbol.asyncIterator]: async function* () {
+        let buffer = '';
+        try {
+          while (true) {
+            const { done, value } = await reader.read();
+            if (done) break;
+            buffer += decoder.decode(value, { stream: true });
+            const lines = buffer.split('\n');
+            buffer = lines.pop() || '';
+            for (const line of lines) {
+              if (!line.trim()) continue;
+              try {
+                const json = JSON.parse(line);
+                yield {
+                  content: json.message?.content || '',
+                  thinking: json.message?.thinking,
+                  done: json.done || false,
+                  stats: json.done ? {
+                    totalDuration: json.total_duration,
+                    evalCount: json.eval_count,
+                  } : undefined,
+                } as IOllamaStreamChunk;
+              } catch { /* skip malformed */ }
+            }
+          }
+        } finally {
+          reader.releaseLock();
+        }
+      }
+    };
+  }
+
+  /**
+   * Stream and collect full response with optional progress callback
+   */
+  public async collectStreamResponse(
+    optionsArg: IOllamaChatOptions,
+    onChunk?: (chunk: IOllamaStreamChunk) => void
+  ): Promise<IOllamaChatResponse> {
+    const stream = await this.chatStreamResponse(optionsArg);
+    let content = '';
+    let thinking = '';
+    let stats: IOllamaChatResponse['stats'];
+
+    for await (const chunk of stream) {
+      if (chunk.content) content += chunk.content;
+      if (chunk.thinking) thinking += chunk.thinking;
+      if (chunk.stats) stats = chunk.stats;
+      if (onChunk) onChunk(chunk);
+    }
+
+    return {
+      role: 'assistant' as const,
+      message: content,
+      thinking: thinking || undefined,
+      stats,
+    };
+  }
+
+  /**
+   * Non-streaming chat with full options support
+   */
+  public async chatWithOptions(optionsArg: IOllamaChatOptions): Promise<IOllamaChatResponse> {
+    const model = optionsArg.model || this.model;
+    const timeout = optionsArg.timeout || this.defaultTimeout;
+    const modelOptions = { ...this.defaultOptions, ...optionsArg.options };
+
+    const messages = [
+      { role: 'system', content: optionsArg.systemMessage },
+      ...optionsArg.messageHistory,
+      { role: 'user', content: optionsArg.userMessage }
+    ];
+
+    const response = await fetch(`${this.baseUrl}/api/chat`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        model,
+        messages,
+        stream: false,
+        options: modelOptions,
+      }),
+      signal: AbortSignal.timeout(timeout),
+    });
+
+    if (!response.ok) {
+      throw new Error(`Ollama API error: ${response.statusText}`);
+    }
+
+    const result = await response.json();
+    return {
+      role: 'assistant' as const,
+      message: result.message.content,
+      thinking: result.message.thinking,
+      stats: {
+        totalDuration: result.total_duration,
+        evalCount: result.eval_count,
+      },
+    };
+  }
+
  public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
    throw new Error('Audio generation is not supported by Ollama.');
  }