feat(OllamaProvider): add model options, streaming support, and thinking tokens
- Add IOllamaModelOptions interface for runtime options (num_ctx, temperature, etc.) - Extend IOllamaProviderOptions with defaultOptions and defaultTimeout - Add IOllamaChatOptions for per-request overrides - Add IOllamaStreamChunk and IOllamaChatResponse interfaces - Add chatStreamResponse() for async iteration with options - Add collectStreamResponse() for streaming with progress callback - Add chatWithOptions() for non-streaming with full options - Update chat() to use defaultOptions and defaultTimeout
This commit is contained in:
@@ -12,10 +12,60 @@ import type {
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
|
||||
/**
|
||||
* Ollama model runtime options
|
||||
* @see https://github.com/ollama/ollama/blob/main/docs/modelfile.md
|
||||
*/
|
||||
export interface IOllamaModelOptions {
|
||||
num_ctx?: number; // Context window (default: 2048)
|
||||
temperature?: number; // 0 = deterministic (default: 0.8)
|
||||
top_k?: number; // Top-k sampling (default: 40)
|
||||
top_p?: number; // Nucleus sampling (default: 0.9)
|
||||
repeat_penalty?: number;// Repeat penalty (default: 1.1)
|
||||
num_predict?: number; // Max tokens to predict
|
||||
stop?: string[]; // Stop sequences
|
||||
seed?: number; // Random seed for reproducibility
|
||||
}
|
||||
|
||||
export interface IOllamaProviderOptions {
|
||||
baseUrl?: string;
|
||||
model?: string;
|
||||
visionModel?: string; // Model to use for vision tasks (e.g. 'llava')
|
||||
defaultOptions?: IOllamaModelOptions; // Default model options
|
||||
defaultTimeout?: number; // Default timeout in ms (default: 120000)
|
||||
}
|
||||
|
||||
/**
|
||||
* Extended chat options with Ollama-specific settings
|
||||
*/
|
||||
export interface IOllamaChatOptions extends ChatOptions {
|
||||
options?: IOllamaModelOptions; // Per-request model options
|
||||
timeout?: number; // Per-request timeout in ms
|
||||
model?: string; // Per-request model override
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk emitted during streaming
|
||||
*/
|
||||
export interface IOllamaStreamChunk {
|
||||
content: string;
|
||||
thinking?: string; // For models with extended thinking
|
||||
done: boolean;
|
||||
stats?: {
|
||||
totalDuration?: number;
|
||||
evalCount?: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extended chat response with Ollama-specific fields
|
||||
*/
|
||||
export interface IOllamaChatResponse extends ChatResponse {
|
||||
thinking?: string;
|
||||
stats?: {
|
||||
totalDuration?: number;
|
||||
evalCount?: number;
|
||||
};
|
||||
}
|
||||
|
||||
export class OllamaProvider extends MultiModalModel {
|
||||
@@ -23,6 +73,8 @@ export class OllamaProvider extends MultiModalModel {
|
||||
private baseUrl: string;
|
||||
private model: string;
|
||||
private visionModel: string;
|
||||
private defaultOptions: IOllamaModelOptions;
|
||||
private defaultTimeout: number;
|
||||
|
||||
constructor(optionsArg: IOllamaProviderOptions = {}) {
|
||||
super();
|
||||
@@ -30,6 +82,8 @@ export class OllamaProvider extends MultiModalModel {
|
||||
this.baseUrl = optionsArg.baseUrl || 'http://localhost:11434';
|
||||
this.model = optionsArg.model || 'llama2';
|
||||
this.visionModel = optionsArg.visionModel || 'llava';
|
||||
this.defaultOptions = optionsArg.defaultOptions || {};
|
||||
this.defaultTimeout = optionsArg.defaultTimeout || 120000;
|
||||
}
|
||||
|
||||
async start() {
|
||||
@@ -154,7 +208,7 @@ export class OllamaProvider extends MultiModalModel {
|
||||
{ role: 'user', content: optionsArg.userMessage }
|
||||
];
|
||||
|
||||
// Make API call to Ollama
|
||||
// Make API call to Ollama with defaultOptions and timeout
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
@@ -163,8 +217,10 @@ export class OllamaProvider extends MultiModalModel {
|
||||
body: JSON.stringify({
|
||||
model: this.model,
|
||||
messages: messages,
|
||||
stream: false
|
||||
stream: false,
|
||||
options: this.defaultOptions,
|
||||
}),
|
||||
signal: AbortSignal.timeout(this.defaultTimeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
@@ -172,13 +228,150 @@ export class OllamaProvider extends MultiModalModel {
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
|
||||
return {
|
||||
role: 'assistant' as const,
|
||||
message: result.message.content,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Streaming chat with async iteration and options support
|
||||
*/
|
||||
public async chatStreamResponse(
|
||||
optionsArg: IOllamaChatOptions
|
||||
): Promise<AsyncIterable<IOllamaStreamChunk>> {
|
||||
const model = optionsArg.model || this.model;
|
||||
const timeout = optionsArg.timeout || this.defaultTimeout;
|
||||
const modelOptions = { ...this.defaultOptions, ...optionsArg.options };
|
||||
|
||||
const messages = [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...optionsArg.messageHistory,
|
||||
{ role: 'user', content: optionsArg.userMessage }
|
||||
];
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages,
|
||||
stream: true,
|
||||
options: modelOptions,
|
||||
}),
|
||||
signal: AbortSignal.timeout(timeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const reader = response.body!.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
return {
|
||||
[Symbol.asyncIterator]: async function* () {
|
||||
let buffer = '';
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() || '';
|
||||
for (const line of lines) {
|
||||
if (!line.trim()) continue;
|
||||
try {
|
||||
const json = JSON.parse(line);
|
||||
yield {
|
||||
content: json.message?.content || '',
|
||||
thinking: json.message?.thinking,
|
||||
done: json.done || false,
|
||||
stats: json.done ? {
|
||||
totalDuration: json.total_duration,
|
||||
evalCount: json.eval_count,
|
||||
} : undefined,
|
||||
} as IOllamaStreamChunk;
|
||||
} catch { /* skip malformed */ }
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream and collect full response with optional progress callback
|
||||
*/
|
||||
public async collectStreamResponse(
|
||||
optionsArg: IOllamaChatOptions,
|
||||
onChunk?: (chunk: IOllamaStreamChunk) => void
|
||||
): Promise<IOllamaChatResponse> {
|
||||
const stream = await this.chatStreamResponse(optionsArg);
|
||||
let content = '';
|
||||
let thinking = '';
|
||||
let stats: IOllamaChatResponse['stats'];
|
||||
|
||||
for await (const chunk of stream) {
|
||||
if (chunk.content) content += chunk.content;
|
||||
if (chunk.thinking) thinking += chunk.thinking;
|
||||
if (chunk.stats) stats = chunk.stats;
|
||||
if (onChunk) onChunk(chunk);
|
||||
}
|
||||
|
||||
return {
|
||||
role: 'assistant' as const,
|
||||
message: content,
|
||||
thinking: thinking || undefined,
|
||||
stats,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Non-streaming chat with full options support
|
||||
*/
|
||||
public async chatWithOptions(optionsArg: IOllamaChatOptions): Promise<IOllamaChatResponse> {
|
||||
const model = optionsArg.model || this.model;
|
||||
const timeout = optionsArg.timeout || this.defaultTimeout;
|
||||
const modelOptions = { ...this.defaultOptions, ...optionsArg.options };
|
||||
|
||||
const messages = [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...optionsArg.messageHistory,
|
||||
{ role: 'user', content: optionsArg.userMessage }
|
||||
];
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages,
|
||||
stream: false,
|
||||
options: modelOptions,
|
||||
}),
|
||||
signal: AbortSignal.timeout(timeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
return {
|
||||
role: 'assistant' as const,
|
||||
message: result.message.content,
|
||||
thinking: result.message.thinking,
|
||||
stats: {
|
||||
totalDuration: result.total_duration,
|
||||
evalCount: result.eval_count,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
throw new Error('Audio generation is not supported by Ollama.');
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user