- Add IOllamaModelOptions interface for runtime options (num_ctx, temperature, etc.) - Extend IOllamaProviderOptions with defaultOptions and defaultTimeout - Add IOllamaChatOptions for per-request overrides - Add IOllamaStreamChunk and IOllamaChatResponse interfaces - Add chatStreamResponse() for async iteration with options - Add collectStreamResponse() for streaming with progress callback - Add chatWithOptions() for non-streaming with full options - Update chat() to use defaultOptions and defaultTimeout
477 lines
14 KiB
TypeScript
477 lines
14 KiB
TypeScript
import * as plugins from './plugins.js';
|
|
import * as paths from './paths.js';
|
|
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
|
import type {
|
|
ChatOptions,
|
|
ChatResponse,
|
|
ChatMessage,
|
|
ResearchOptions,
|
|
ResearchResponse,
|
|
ImageGenerateOptions,
|
|
ImageEditOptions,
|
|
ImageResponse
|
|
} from './abstract.classes.multimodal.js';
|
|
|
|
/**
|
|
* Ollama model runtime options
|
|
* @see https://github.com/ollama/ollama/blob/main/docs/modelfile.md
|
|
*/
|
|
export interface IOllamaModelOptions {
|
|
num_ctx?: number; // Context window (default: 2048)
|
|
temperature?: number; // 0 = deterministic (default: 0.8)
|
|
top_k?: number; // Top-k sampling (default: 40)
|
|
top_p?: number; // Nucleus sampling (default: 0.9)
|
|
repeat_penalty?: number;// Repeat penalty (default: 1.1)
|
|
num_predict?: number; // Max tokens to predict
|
|
stop?: string[]; // Stop sequences
|
|
seed?: number; // Random seed for reproducibility
|
|
}
|
|
|
|
export interface IOllamaProviderOptions {
|
|
baseUrl?: string;
|
|
model?: string;
|
|
visionModel?: string; // Model to use for vision tasks (e.g. 'llava')
|
|
defaultOptions?: IOllamaModelOptions; // Default model options
|
|
defaultTimeout?: number; // Default timeout in ms (default: 120000)
|
|
}
|
|
|
|
/**
|
|
* Extended chat options with Ollama-specific settings
|
|
*/
|
|
export interface IOllamaChatOptions extends ChatOptions {
|
|
options?: IOllamaModelOptions; // Per-request model options
|
|
timeout?: number; // Per-request timeout in ms
|
|
model?: string; // Per-request model override
|
|
}
|
|
|
|
/**
|
|
* Chunk emitted during streaming
|
|
*/
|
|
export interface IOllamaStreamChunk {
|
|
content: string;
|
|
thinking?: string; // For models with extended thinking
|
|
done: boolean;
|
|
stats?: {
|
|
totalDuration?: number;
|
|
evalCount?: number;
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Extended chat response with Ollama-specific fields
|
|
*/
|
|
export interface IOllamaChatResponse extends ChatResponse {
|
|
thinking?: string;
|
|
stats?: {
|
|
totalDuration?: number;
|
|
evalCount?: number;
|
|
};
|
|
}
|
|
|
|
export class OllamaProvider extends MultiModalModel {
|
|
private options: IOllamaProviderOptions;
|
|
private baseUrl: string;
|
|
private model: string;
|
|
private visionModel: string;
|
|
private defaultOptions: IOllamaModelOptions;
|
|
private defaultTimeout: number;
|
|
|
|
constructor(optionsArg: IOllamaProviderOptions = {}) {
|
|
super();
|
|
this.options = optionsArg;
|
|
this.baseUrl = optionsArg.baseUrl || 'http://localhost:11434';
|
|
this.model = optionsArg.model || 'llama2';
|
|
this.visionModel = optionsArg.visionModel || 'llava';
|
|
this.defaultOptions = optionsArg.defaultOptions || {};
|
|
this.defaultTimeout = optionsArg.defaultTimeout || 120000;
|
|
}
|
|
|
|
async start() {
|
|
await super.start();
|
|
// Verify Ollama is running
|
|
try {
|
|
const response = await fetch(`${this.baseUrl}/api/tags`);
|
|
if (!response.ok) {
|
|
throw new Error('Failed to connect to Ollama server');
|
|
}
|
|
} catch (error) {
|
|
throw new Error(`Failed to connect to Ollama server at ${this.baseUrl}: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
async stop() {
|
|
await super.stop();
|
|
}
|
|
|
|
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
|
// Create a TextDecoder to handle incoming chunks
|
|
const decoder = new TextDecoder();
|
|
let buffer = '';
|
|
let currentMessage: { role: string; content: string; } | null = null;
|
|
|
|
// Create a TransformStream to process the input
|
|
const transform = new TransformStream<Uint8Array, string>({
|
|
transform: async (chunk, controller) => {
|
|
buffer += decoder.decode(chunk, { stream: true });
|
|
|
|
// Try to parse complete JSON messages from the buffer
|
|
while (true) {
|
|
const newlineIndex = buffer.indexOf('\n');
|
|
if (newlineIndex === -1) break;
|
|
|
|
const line = buffer.slice(0, newlineIndex);
|
|
buffer = buffer.slice(newlineIndex + 1);
|
|
|
|
if (line.trim()) {
|
|
try {
|
|
const message = JSON.parse(line);
|
|
currentMessage = {
|
|
role: message.role || 'user',
|
|
content: message.content || '',
|
|
};
|
|
} catch (e) {
|
|
console.error('Failed to parse message:', e);
|
|
}
|
|
}
|
|
}
|
|
|
|
// If we have a complete message, send it to Ollama
|
|
if (currentMessage) {
|
|
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
model: this.model,
|
|
messages: [{ role: currentMessage.role, content: currentMessage.content }],
|
|
stream: true,
|
|
}),
|
|
});
|
|
|
|
// Process each chunk from Ollama
|
|
const reader = response.body?.getReader();
|
|
if (reader) {
|
|
try {
|
|
while (true) {
|
|
const { done, value } = await reader.read();
|
|
if (done) break;
|
|
|
|
const chunk = new TextDecoder().decode(value);
|
|
const lines = chunk.split('\n');
|
|
|
|
for (const line of lines) {
|
|
if (line.trim()) {
|
|
try {
|
|
const parsed = JSON.parse(line);
|
|
const content = parsed.message?.content;
|
|
if (content) {
|
|
controller.enqueue(content);
|
|
}
|
|
} catch (e) {
|
|
console.error('Failed to parse Ollama response:', e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} finally {
|
|
reader.releaseLock();
|
|
}
|
|
}
|
|
|
|
currentMessage = null;
|
|
}
|
|
},
|
|
|
|
flush(controller) {
|
|
if (buffer) {
|
|
try {
|
|
const message = JSON.parse(buffer);
|
|
controller.enqueue(message.content || '');
|
|
} catch (e) {
|
|
console.error('Failed to parse remaining buffer:', e);
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
// Connect the input to our transform stream
|
|
return input.pipeThrough(transform);
|
|
}
|
|
|
|
// Implementing the synchronous chat interaction
|
|
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
|
|
// Format messages for Ollama
|
|
const messages = [
|
|
{ role: 'system', content: optionsArg.systemMessage },
|
|
...optionsArg.messageHistory,
|
|
{ role: 'user', content: optionsArg.userMessage }
|
|
];
|
|
|
|
// Make API call to Ollama with defaultOptions and timeout
|
|
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
model: this.model,
|
|
messages: messages,
|
|
stream: false,
|
|
options: this.defaultOptions,
|
|
}),
|
|
signal: AbortSignal.timeout(this.defaultTimeout),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`Ollama API error: ${response.statusText}`);
|
|
}
|
|
|
|
const result = await response.json();
|
|
|
|
return {
|
|
role: 'assistant' as const,
|
|
message: result.message.content,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Streaming chat with async iteration and options support
|
|
*/
|
|
public async chatStreamResponse(
|
|
optionsArg: IOllamaChatOptions
|
|
): Promise<AsyncIterable<IOllamaStreamChunk>> {
|
|
const model = optionsArg.model || this.model;
|
|
const timeout = optionsArg.timeout || this.defaultTimeout;
|
|
const modelOptions = { ...this.defaultOptions, ...optionsArg.options };
|
|
|
|
const messages = [
|
|
{ role: 'system', content: optionsArg.systemMessage },
|
|
...optionsArg.messageHistory,
|
|
{ role: 'user', content: optionsArg.userMessage }
|
|
];
|
|
|
|
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
model,
|
|
messages,
|
|
stream: true,
|
|
options: modelOptions,
|
|
}),
|
|
signal: AbortSignal.timeout(timeout),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`Ollama API error: ${response.status}`);
|
|
}
|
|
|
|
const reader = response.body!.getReader();
|
|
const decoder = new TextDecoder();
|
|
|
|
return {
|
|
[Symbol.asyncIterator]: async function* () {
|
|
let buffer = '';
|
|
try {
|
|
while (true) {
|
|
const { done, value } = await reader.read();
|
|
if (done) break;
|
|
buffer += decoder.decode(value, { stream: true });
|
|
const lines = buffer.split('\n');
|
|
buffer = lines.pop() || '';
|
|
for (const line of lines) {
|
|
if (!line.trim()) continue;
|
|
try {
|
|
const json = JSON.parse(line);
|
|
yield {
|
|
content: json.message?.content || '',
|
|
thinking: json.message?.thinking,
|
|
done: json.done || false,
|
|
stats: json.done ? {
|
|
totalDuration: json.total_duration,
|
|
evalCount: json.eval_count,
|
|
} : undefined,
|
|
} as IOllamaStreamChunk;
|
|
} catch { /* skip malformed */ }
|
|
}
|
|
}
|
|
} finally {
|
|
reader.releaseLock();
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Stream and collect full response with optional progress callback
|
|
*/
|
|
public async collectStreamResponse(
|
|
optionsArg: IOllamaChatOptions,
|
|
onChunk?: (chunk: IOllamaStreamChunk) => void
|
|
): Promise<IOllamaChatResponse> {
|
|
const stream = await this.chatStreamResponse(optionsArg);
|
|
let content = '';
|
|
let thinking = '';
|
|
let stats: IOllamaChatResponse['stats'];
|
|
|
|
for await (const chunk of stream) {
|
|
if (chunk.content) content += chunk.content;
|
|
if (chunk.thinking) thinking += chunk.thinking;
|
|
if (chunk.stats) stats = chunk.stats;
|
|
if (onChunk) onChunk(chunk);
|
|
}
|
|
|
|
return {
|
|
role: 'assistant' as const,
|
|
message: content,
|
|
thinking: thinking || undefined,
|
|
stats,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Non-streaming chat with full options support
|
|
*/
|
|
public async chatWithOptions(optionsArg: IOllamaChatOptions): Promise<IOllamaChatResponse> {
|
|
const model = optionsArg.model || this.model;
|
|
const timeout = optionsArg.timeout || this.defaultTimeout;
|
|
const modelOptions = { ...this.defaultOptions, ...optionsArg.options };
|
|
|
|
const messages = [
|
|
{ role: 'system', content: optionsArg.systemMessage },
|
|
...optionsArg.messageHistory,
|
|
{ role: 'user', content: optionsArg.userMessage }
|
|
];
|
|
|
|
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
model,
|
|
messages,
|
|
stream: false,
|
|
options: modelOptions,
|
|
}),
|
|
signal: AbortSignal.timeout(timeout),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`Ollama API error: ${response.statusText}`);
|
|
}
|
|
|
|
const result = await response.json();
|
|
return {
|
|
role: 'assistant' as const,
|
|
message: result.message.content,
|
|
thinking: result.message.thinking,
|
|
stats: {
|
|
totalDuration: result.total_duration,
|
|
evalCount: result.eval_count,
|
|
},
|
|
};
|
|
}
|
|
|
|
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
|
throw new Error('Audio generation is not supported by Ollama.');
|
|
}
|
|
|
|
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
|
const base64Image = optionsArg.image.toString('base64');
|
|
|
|
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
model: this.visionModel,
|
|
messages: [{
|
|
role: 'user',
|
|
content: optionsArg.prompt,
|
|
images: [base64Image]
|
|
}],
|
|
stream: false
|
|
}),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`Ollama API error: ${response.statusText}`);
|
|
}
|
|
|
|
const result = await response.json();
|
|
return result.message.content;
|
|
}
|
|
|
|
public async document(optionsArg: {
|
|
systemMessage: string;
|
|
userMessage: string;
|
|
pdfDocuments: Uint8Array[];
|
|
messageHistory: ChatMessage[];
|
|
}): Promise<{ message: any }> {
|
|
// Ensure SmartPdf is initialized before processing documents
|
|
await this.ensureSmartpdfReady();
|
|
|
|
// Convert PDF documents to images using SmartPDF
|
|
let documentImageBytesArray: Uint8Array[] = [];
|
|
|
|
for (const pdfDocument of optionsArg.pdfDocuments) {
|
|
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
|
|
documentImageBytesArray = documentImageBytesArray.concat(documentImageArray);
|
|
}
|
|
|
|
// Convert images to base64
|
|
const base64Images = documentImageBytesArray.map(bytes => Buffer.from(bytes).toString('base64'));
|
|
|
|
// Send request to Ollama with images
|
|
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
model: this.visionModel,
|
|
messages: [
|
|
{ role: 'system', content: optionsArg.systemMessage },
|
|
...optionsArg.messageHistory,
|
|
{
|
|
role: 'user',
|
|
content: optionsArg.userMessage,
|
|
images: base64Images
|
|
}
|
|
],
|
|
stream: false
|
|
}),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`Ollama API error: ${response.statusText}`);
|
|
}
|
|
|
|
const result = await response.json();
|
|
return {
|
|
message: {
|
|
role: 'assistant',
|
|
content: result.message.content
|
|
}
|
|
};
|
|
}
|
|
|
|
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
|
throw new Error('Research capabilities are not yet supported by Ollama provider.');
|
|
}
|
|
|
|
/**
|
|
* Image generation is not supported by Ollama
|
|
*/
|
|
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
|
throw new Error('Image generation is not supported by Ollama. Please use OpenAI provider for image generation.');
|
|
}
|
|
|
|
/**
|
|
* Image editing is not supported by Ollama
|
|
*/
|
|
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
|
throw new Error('Image editing is not supported by Ollama. Please use OpenAI provider for image editing.');
|
|
}
|
|
} |