Files
smartai/ts/provider.ollama.ts
Juergen Kunz 126e9b239b feat(OllamaProvider): add model options, streaming support, and thinking tokens
- Add IOllamaModelOptions interface for runtime options (num_ctx, temperature, etc.)
- Extend IOllamaProviderOptions with defaultOptions and defaultTimeout
- Add IOllamaChatOptions for per-request overrides
- Add IOllamaStreamChunk and IOllamaChatResponse interfaces
- Add chatStreamResponse() for async iteration with options
- Add collectStreamResponse() for streaming with progress callback
- Add chatWithOptions() for non-streaming with full options
- Update chat() to use defaultOptions and defaultTimeout
2026-01-20 00:02:45 +00:00

477 lines
14 KiB
TypeScript

import * as plugins from './plugins.js';
import * as paths from './paths.js';
import { MultiModalModel } from './abstract.classes.multimodal.js';
import type {
ChatOptions,
ChatResponse,
ChatMessage,
ResearchOptions,
ResearchResponse,
ImageGenerateOptions,
ImageEditOptions,
ImageResponse
} from './abstract.classes.multimodal.js';
/**
* Ollama model runtime options
* @see https://github.com/ollama/ollama/blob/main/docs/modelfile.md
*/
export interface IOllamaModelOptions {
num_ctx?: number; // Context window (default: 2048)
temperature?: number; // 0 = deterministic (default: 0.8)
top_k?: number; // Top-k sampling (default: 40)
top_p?: number; // Nucleus sampling (default: 0.9)
repeat_penalty?: number;// Repeat penalty (default: 1.1)
num_predict?: number; // Max tokens to predict
stop?: string[]; // Stop sequences
seed?: number; // Random seed for reproducibility
}
export interface IOllamaProviderOptions {
baseUrl?: string;
model?: string;
visionModel?: string; // Model to use for vision tasks (e.g. 'llava')
defaultOptions?: IOllamaModelOptions; // Default model options
defaultTimeout?: number; // Default timeout in ms (default: 120000)
}
/**
* Extended chat options with Ollama-specific settings
*/
export interface IOllamaChatOptions extends ChatOptions {
options?: IOllamaModelOptions; // Per-request model options
timeout?: number; // Per-request timeout in ms
model?: string; // Per-request model override
}
/**
* Chunk emitted during streaming
*/
export interface IOllamaStreamChunk {
content: string;
thinking?: string; // For models with extended thinking
done: boolean;
stats?: {
totalDuration?: number;
evalCount?: number;
};
}
/**
* Extended chat response with Ollama-specific fields
*/
export interface IOllamaChatResponse extends ChatResponse {
thinking?: string;
stats?: {
totalDuration?: number;
evalCount?: number;
};
}
export class OllamaProvider extends MultiModalModel {
private options: IOllamaProviderOptions;
private baseUrl: string;
private model: string;
private visionModel: string;
private defaultOptions: IOllamaModelOptions;
private defaultTimeout: number;
constructor(optionsArg: IOllamaProviderOptions = {}) {
super();
this.options = optionsArg;
this.baseUrl = optionsArg.baseUrl || 'http://localhost:11434';
this.model = optionsArg.model || 'llama2';
this.visionModel = optionsArg.visionModel || 'llava';
this.defaultOptions = optionsArg.defaultOptions || {};
this.defaultTimeout = optionsArg.defaultTimeout || 120000;
}
async start() {
await super.start();
// Verify Ollama is running
try {
const response = await fetch(`${this.baseUrl}/api/tags`);
if (!response.ok) {
throw new Error('Failed to connect to Ollama server');
}
} catch (error) {
throw new Error(`Failed to connect to Ollama server at ${this.baseUrl}: ${error.message}`);
}
}
async stop() {
await super.stop();
}
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
// Create a TextDecoder to handle incoming chunks
const decoder = new TextDecoder();
let buffer = '';
let currentMessage: { role: string; content: string; } | null = null;
// Create a TransformStream to process the input
const transform = new TransformStream<Uint8Array, string>({
transform: async (chunk, controller) => {
buffer += decoder.decode(chunk, { stream: true });
// Try to parse complete JSON messages from the buffer
while (true) {
const newlineIndex = buffer.indexOf('\n');
if (newlineIndex === -1) break;
const line = buffer.slice(0, newlineIndex);
buffer = buffer.slice(newlineIndex + 1);
if (line.trim()) {
try {
const message = JSON.parse(line);
currentMessage = {
role: message.role || 'user',
content: message.content || '',
};
} catch (e) {
console.error('Failed to parse message:', e);
}
}
}
// If we have a complete message, send it to Ollama
if (currentMessage) {
const response = await fetch(`${this.baseUrl}/api/chat`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: this.model,
messages: [{ role: currentMessage.role, content: currentMessage.content }],
stream: true,
}),
});
// Process each chunk from Ollama
const reader = response.body?.getReader();
if (reader) {
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = new TextDecoder().decode(value);
const lines = chunk.split('\n');
for (const line of lines) {
if (line.trim()) {
try {
const parsed = JSON.parse(line);
const content = parsed.message?.content;
if (content) {
controller.enqueue(content);
}
} catch (e) {
console.error('Failed to parse Ollama response:', e);
}
}
}
}
} finally {
reader.releaseLock();
}
}
currentMessage = null;
}
},
flush(controller) {
if (buffer) {
try {
const message = JSON.parse(buffer);
controller.enqueue(message.content || '');
} catch (e) {
console.error('Failed to parse remaining buffer:', e);
}
}
}
});
// Connect the input to our transform stream
return input.pipeThrough(transform);
}
// Implementing the synchronous chat interaction
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
// Format messages for Ollama
const messages = [
{ role: 'system', content: optionsArg.systemMessage },
...optionsArg.messageHistory,
{ role: 'user', content: optionsArg.userMessage }
];
// Make API call to Ollama with defaultOptions and timeout
const response = await fetch(`${this.baseUrl}/api/chat`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: this.model,
messages: messages,
stream: false,
options: this.defaultOptions,
}),
signal: AbortSignal.timeout(this.defaultTimeout),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.statusText}`);
}
const result = await response.json();
return {
role: 'assistant' as const,
message: result.message.content,
};
}
/**
* Streaming chat with async iteration and options support
*/
public async chatStreamResponse(
optionsArg: IOllamaChatOptions
): Promise<AsyncIterable<IOllamaStreamChunk>> {
const model = optionsArg.model || this.model;
const timeout = optionsArg.timeout || this.defaultTimeout;
const modelOptions = { ...this.defaultOptions, ...optionsArg.options };
const messages = [
{ role: 'system', content: optionsArg.systemMessage },
...optionsArg.messageHistory,
{ role: 'user', content: optionsArg.userMessage }
];
const response = await fetch(`${this.baseUrl}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
messages,
stream: true,
options: modelOptions,
}),
signal: AbortSignal.timeout(timeout),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.status}`);
}
const reader = response.body!.getReader();
const decoder = new TextDecoder();
return {
[Symbol.asyncIterator]: async function* () {
let buffer = '';
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
if (!line.trim()) continue;
try {
const json = JSON.parse(line);
yield {
content: json.message?.content || '',
thinking: json.message?.thinking,
done: json.done || false,
stats: json.done ? {
totalDuration: json.total_duration,
evalCount: json.eval_count,
} : undefined,
} as IOllamaStreamChunk;
} catch { /* skip malformed */ }
}
}
} finally {
reader.releaseLock();
}
}
};
}
/**
* Stream and collect full response with optional progress callback
*/
public async collectStreamResponse(
optionsArg: IOllamaChatOptions,
onChunk?: (chunk: IOllamaStreamChunk) => void
): Promise<IOllamaChatResponse> {
const stream = await this.chatStreamResponse(optionsArg);
let content = '';
let thinking = '';
let stats: IOllamaChatResponse['stats'];
for await (const chunk of stream) {
if (chunk.content) content += chunk.content;
if (chunk.thinking) thinking += chunk.thinking;
if (chunk.stats) stats = chunk.stats;
if (onChunk) onChunk(chunk);
}
return {
role: 'assistant' as const,
message: content,
thinking: thinking || undefined,
stats,
};
}
/**
* Non-streaming chat with full options support
*/
public async chatWithOptions(optionsArg: IOllamaChatOptions): Promise<IOllamaChatResponse> {
const model = optionsArg.model || this.model;
const timeout = optionsArg.timeout || this.defaultTimeout;
const modelOptions = { ...this.defaultOptions, ...optionsArg.options };
const messages = [
{ role: 'system', content: optionsArg.systemMessage },
...optionsArg.messageHistory,
{ role: 'user', content: optionsArg.userMessage }
];
const response = await fetch(`${this.baseUrl}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
messages,
stream: false,
options: modelOptions,
}),
signal: AbortSignal.timeout(timeout),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.statusText}`);
}
const result = await response.json();
return {
role: 'assistant' as const,
message: result.message.content,
thinking: result.message.thinking,
stats: {
totalDuration: result.total_duration,
evalCount: result.eval_count,
},
};
}
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
throw new Error('Audio generation is not supported by Ollama.');
}
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
const base64Image = optionsArg.image.toString('base64');
const response = await fetch(`${this.baseUrl}/api/chat`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: this.visionModel,
messages: [{
role: 'user',
content: optionsArg.prompt,
images: [base64Image]
}],
stream: false
}),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.statusText}`);
}
const result = await response.json();
return result.message.content;
}
public async document(optionsArg: {
systemMessage: string;
userMessage: string;
pdfDocuments: Uint8Array[];
messageHistory: ChatMessage[];
}): Promise<{ message: any }> {
// Ensure SmartPdf is initialized before processing documents
await this.ensureSmartpdfReady();
// Convert PDF documents to images using SmartPDF
let documentImageBytesArray: Uint8Array[] = [];
for (const pdfDocument of optionsArg.pdfDocuments) {
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
documentImageBytesArray = documentImageBytesArray.concat(documentImageArray);
}
// Convert images to base64
const base64Images = documentImageBytesArray.map(bytes => Buffer.from(bytes).toString('base64'));
// Send request to Ollama with images
const response = await fetch(`${this.baseUrl}/api/chat`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: this.visionModel,
messages: [
{ role: 'system', content: optionsArg.systemMessage },
...optionsArg.messageHistory,
{
role: 'user',
content: optionsArg.userMessage,
images: base64Images
}
],
stream: false
}),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.statusText}`);
}
const result = await response.json();
return {
message: {
role: 'assistant',
content: result.message.content
}
};
}
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
throw new Error('Research capabilities are not yet supported by Ollama provider.');
}
/**
* Image generation is not supported by Ollama
*/
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
throw new Error('Image generation is not supported by Ollama. Please use OpenAI provider for image generation.');
}
/**
* Image editing is not supported by Ollama
*/
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
throw new Error('Image editing is not supported by Ollama. Please use OpenAI provider for image editing.');
}
}