smartai/ts/provider.ollama.ts

import * as plugins from './plugins.js';
import * as paths from './paths.js';
import { MultiModalModel } from './abstract.classes.multimodal.js';
import type {
  ChatOptions,
  ChatResponse,
  ChatMessage,
  ResearchOptions,
  ResearchResponse,
  ImageGenerateOptions,
  ImageEditOptions,
  ImageResponse
} from './abstract.classes.multimodal.js';

/**
 * Ollama model runtime options
 * @see https://github.com/ollama/ollama/blob/main/docs/modelfile.md
 */
export interface IOllamaModelOptions {
  num_ctx?: number;       // Context window (default: 2048)
  temperature?: number;   // 0 = deterministic (default: 0.8)
  top_k?: number;         // Top-k sampling (default: 40)
  top_p?: number;         // Nucleus sampling (default: 0.9)
  repeat_penalty?: number;// Repeat penalty (default: 1.1)
  num_predict?: number;   // Max tokens to predict
  stop?: string[];        // Stop sequences
  seed?: number;          // Random seed for reproducibility
}

export interface IOllamaProviderOptions {
  baseUrl?: string;
  model?: string;
  visionModel?: string; // Model to use for vision tasks (e.g. 'llava')
  defaultOptions?: IOllamaModelOptions;  // Default model options
  defaultTimeout?: number;               // Default timeout in ms (default: 120000)
}

/**
 * Extended chat options with Ollama-specific settings
 */
export interface IOllamaChatOptions extends ChatOptions {
  options?: IOllamaModelOptions;  // Per-request model options
  timeout?: number;               // Per-request timeout in ms
  model?: string;                 // Per-request model override
}

/**
 * Chunk emitted during streaming
 */
export interface IOllamaStreamChunk {
  content: string;
  thinking?: string;  // For models with extended thinking
  done: boolean;
  stats?: {
    totalDuration?: number;
    evalCount?: number;
  };
}

/**
 * Extended chat response with Ollama-specific fields
 */
export interface IOllamaChatResponse extends ChatResponse {
  thinking?: string;
  stats?: {
    totalDuration?: number;
    evalCount?: number;
  };
}

export class OllamaProvider extends MultiModalModel {
  private options: IOllamaProviderOptions;
  private baseUrl: string;
  private model: string;
  private visionModel: string;
  private defaultOptions: IOllamaModelOptions;
  private defaultTimeout: number;

  constructor(optionsArg: IOllamaProviderOptions = {}) {
    super();
    this.options = optionsArg;
    this.baseUrl = optionsArg.baseUrl || 'http://localhost:11434';
    this.model = optionsArg.model || 'llama2';
    this.visionModel = optionsArg.visionModel || 'llava';
    this.defaultOptions = optionsArg.defaultOptions || {};
    this.defaultTimeout = optionsArg.defaultTimeout || 120000;
  }

  async start() {
    await super.start();
    // Verify Ollama is running
    try {
      const response = await fetch(`${this.baseUrl}/api/tags`);
      if (!response.ok) {
        throw new Error('Failed to connect to Ollama server');
      }
    } catch (error) {
      throw new Error(`Failed to connect to Ollama server at ${this.baseUrl}: ${error.message}`);
    }
  }

  async stop() {
    await super.stop();
  }

  public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
    // Create a TextDecoder to handle incoming chunks
    const decoder = new TextDecoder();
    let buffer = '';
    let currentMessage: { role: string; content: string; } | null = null;

    // Create a TransformStream to process the input
    const transform = new TransformStream<Uint8Array, string>({
      transform: async (chunk, controller) => {
        buffer += decoder.decode(chunk, { stream: true });

        // Try to parse complete JSON messages from the buffer
        while (true) {
          const newlineIndex = buffer.indexOf('\n');
          if (newlineIndex === -1) break;

          const line = buffer.slice(0, newlineIndex);
          buffer = buffer.slice(newlineIndex + 1);

          if (line.trim()) {
            try {
              const message = JSON.parse(line);
              currentMessage = {
                role: message.role || 'user',
                content: message.content || '',
              };
            } catch (e) {
              console.error('Failed to parse message:', e);
            }
          }
        }

        // If we have a complete message, send it to Ollama
        if (currentMessage) {
          const response = await fetch(`${this.baseUrl}/api/chat`, {
            method: 'POST',
            headers: {
              'Content-Type': 'application/json',
            },
            body: JSON.stringify({
              model: this.model,
              messages: [{ role: currentMessage.role, content: currentMessage.content }],
              stream: true,
            }),
          });

          // Process each chunk from Ollama
          const reader = response.body?.getReader();
          if (reader) {
            try {
              while (true) {
                const { done, value } = await reader.read();
                if (done) break;

                const chunk = new TextDecoder().decode(value);
                const lines = chunk.split('\n');

                for (const line of lines) {
                  if (line.trim()) {
                    try {
                      const parsed = JSON.parse(line);
                      const content = parsed.message?.content;
                      if (content) {
                        controller.enqueue(content);
                      }
                    } catch (e) {
                      console.error('Failed to parse Ollama response:', e);
                    }
                  }
                }
              }
            } finally {
              reader.releaseLock();
            }
          }

          currentMessage = null;
        }
      },

      flush(controller) {
        if (buffer) {
          try {
            const message = JSON.parse(buffer);
            controller.enqueue(message.content || '');
          } catch (e) {
            console.error('Failed to parse remaining buffer:', e);
          }
        }
      }
    });

    // Connect the input to our transform stream
    return input.pipeThrough(transform);
  }

  // Implementing the synchronous chat interaction
  public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
    // Format messages for Ollama
    const messages = [
      { role: 'system', content: optionsArg.systemMessage },
      ...optionsArg.messageHistory,
      { role: 'user', content: optionsArg.userMessage }
    ];

    // Make API call to Ollama with defaultOptions and timeout
    const response = await fetch(`${this.baseUrl}/api/chat`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
      body: JSON.stringify({
        model: this.model,
        messages: messages,
        stream: false,
        options: this.defaultOptions,
      }),
      signal: AbortSignal.timeout(this.defaultTimeout),
    });

    if (!response.ok) {
      throw new Error(`Ollama API error: ${response.statusText}`);
    }

    const result = await response.json();

    return {
      role: 'assistant' as const,
      message: result.message.content,
    };
  }

  /**
   * Streaming chat with async iteration and options support
   */
  public async chatStreamResponse(
    optionsArg: IOllamaChatOptions
  ): Promise<AsyncIterable<IOllamaStreamChunk>> {
    const model = optionsArg.model || this.model;
    const timeout = optionsArg.timeout || this.defaultTimeout;
    const modelOptions = { ...this.defaultOptions, ...optionsArg.options };

    const messages = [
      { role: 'system', content: optionsArg.systemMessage },
      ...optionsArg.messageHistory,
      { role: 'user', content: optionsArg.userMessage }
    ];

    const response = await fetch(`${this.baseUrl}/api/chat`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({
        model,
        messages,
        stream: true,
        options: modelOptions,
      }),
      signal: AbortSignal.timeout(timeout),
    });

    if (!response.ok) {
      throw new Error(`Ollama API error: ${response.status}`);
    }

    const reader = response.body!.getReader();
    const decoder = new TextDecoder();

    return {
      [Symbol.asyncIterator]: async function* () {
        let buffer = '';
        try {
          while (true) {
            const { done, value } = await reader.read();
            if (done) break;
            buffer += decoder.decode(value, { stream: true });
            const lines = buffer.split('\n');
            buffer = lines.pop() || '';
            for (const line of lines) {
              if (!line.trim()) continue;
              try {
                const json = JSON.parse(line);
                yield {
                  content: json.message?.content || '',
                  thinking: json.message?.thinking,
                  done: json.done || false,
                  stats: json.done ? {
                    totalDuration: json.total_duration,
                    evalCount: json.eval_count,
                  } : undefined,
                } as IOllamaStreamChunk;
              } catch { /* skip malformed */ }
            }
          }
        } finally {
          reader.releaseLock();
        }
      }
    };
  }

  /**
   * Stream and collect full response with optional progress callback
   */
  public async collectStreamResponse(
    optionsArg: IOllamaChatOptions,
    onChunk?: (chunk: IOllamaStreamChunk) => void
  ): Promise<IOllamaChatResponse> {
    const stream = await this.chatStreamResponse(optionsArg);
    let content = '';
    let thinking = '';
    let stats: IOllamaChatResponse['stats'];

    for await (const chunk of stream) {
      if (chunk.content) content += chunk.content;
      if (chunk.thinking) thinking += chunk.thinking;
      if (chunk.stats) stats = chunk.stats;
      if (onChunk) onChunk(chunk);
    }

    return {
      role: 'assistant' as const,
      message: content,
      thinking: thinking || undefined,
      stats,
    };
  }

  /**
   * Non-streaming chat with full options support
   */
  public async chatWithOptions(optionsArg: IOllamaChatOptions): Promise<IOllamaChatResponse> {
    const model = optionsArg.model || this.model;
    const timeout = optionsArg.timeout || this.defaultTimeout;
    const modelOptions = { ...this.defaultOptions, ...optionsArg.options };

    const messages = [
      { role: 'system', content: optionsArg.systemMessage },
      ...optionsArg.messageHistory,
      { role: 'user', content: optionsArg.userMessage }
    ];

    const response = await fetch(`${this.baseUrl}/api/chat`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({
        model,
        messages,
        stream: false,
        options: modelOptions,
      }),
      signal: AbortSignal.timeout(timeout),
    });

    if (!response.ok) {
      throw new Error(`Ollama API error: ${response.statusText}`);
    }

    const result = await response.json();
    return {
      role: 'assistant' as const,
      message: result.message.content,
      thinking: result.message.thinking,
      stats: {
        totalDuration: result.total_duration,
        evalCount: result.eval_count,
      },
    };
  }

  public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
    throw new Error('Audio generation is not supported by Ollama.');
  }

  public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
    const base64Image = optionsArg.image.toString('base64');

    const response = await fetch(`${this.baseUrl}/api/chat`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
      body: JSON.stringify({
        model: this.visionModel,
        messages: [{
          role: 'user',
          content: optionsArg.prompt,
          images: [base64Image]
        }],
        stream: false
      }),
    });

    if (!response.ok) {
      throw new Error(`Ollama API error: ${response.statusText}`);
    }

    const result = await response.json();
    return result.message.content;
  }

  public async document(optionsArg: {
    systemMessage: string;
    userMessage: string;
    pdfDocuments: Uint8Array[];
    messageHistory: ChatMessage[];
  }): Promise<{ message: any }> {
    // Ensure SmartPdf is initialized before processing documents
    await this.ensureSmartpdfReady();

    // Convert PDF documents to images using SmartPDF
    let documentImageBytesArray: Uint8Array[] = [];

    for (const pdfDocument of optionsArg.pdfDocuments) {
      const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
      documentImageBytesArray = documentImageBytesArray.concat(documentImageArray);
    }

    // Convert images to base64
    const base64Images = documentImageBytesArray.map(bytes => Buffer.from(bytes).toString('base64'));

    // Send request to Ollama with images
    const response = await fetch(`${this.baseUrl}/api/chat`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
      body: JSON.stringify({
        model: this.visionModel,
        messages: [
          { role: 'system', content: optionsArg.systemMessage },
          ...optionsArg.messageHistory,
          {
            role: 'user',
            content: optionsArg.userMessage,
            images: base64Images
          }
        ],
        stream: false
      }),
    });

    if (!response.ok) {
      throw new Error(`Ollama API error: ${response.statusText}`);
    }

    const result = await response.json();
    return {
      message: {
        role: 'assistant',
        content: result.message.content
      }
    };
  }

  public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
    throw new Error('Research capabilities are not yet supported by Ollama provider.');
  }

  /**
   * Image generation is not supported by Ollama
   */
  public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
    throw new Error('Image generation is not supported by Ollama. Please use OpenAI provider for image generation.');
  }

  /**
   * Image editing is not supported by Ollama
   */
  public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
    throw new Error('Image editing is not supported by Ollama. Please use OpenAI provider for image editing.');
  }
}