BREAKING CHANGE(vercel-ai-sdk): migrate to Vercel AI SDK v6 and introduce provider registry (getModel) returning LanguageModelV3

This commit is contained in:
2026-03-05 19:37:29 +00:00
parent 27cef60900
commit c24010c9bc
61 changed files with 4789 additions and 9083 deletions

View File

@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@push.rocks/smartai',
version: '0.13.3',
description: 'SmartAi is a versatile TypeScript library designed to facilitate integration and interaction with various AI models, offering functionalities for chat, audio generation, document processing, and vision tasks.'
version: '2.0.0',
description: 'Provider registry and capability utilities for ai-sdk (Vercel AI SDK). Core export returns LanguageModel; subpath exports provide vision, audio, image, document and research capabilities.'
}

View File

@@ -1,240 +0,0 @@
import * as plugins from './plugins.js';
/**
* Message format for chat interactions
*/
export interface ChatMessage {
role: 'assistant' | 'user' | 'system';
content: string;
/** Base64-encoded images for vision-capable models */
images?: string[];
/** Chain-of-thought reasoning for GPT-OSS models (e.g., Ollama) */
reasoning?: string;
}
/**
* Options for chat interactions
*/
export interface ChatOptions {
systemMessage: string;
userMessage: string;
messageHistory: ChatMessage[];
/** Base64-encoded images for the current message (vision-capable models) */
images?: string[];
}
/**
* Options for streaming chat interactions
*/
export interface StreamingChatOptions extends ChatOptions {
/** Callback fired for each token during generation */
onToken?: (token: string) => void;
}
/**
* Response format for chat interactions
*/
export interface ChatResponse {
role: 'assistant';
message: string;
/** Chain-of-thought reasoning from reasoning models */
reasoning?: string;
}
/**
* Options for research interactions
*/
export interface ResearchOptions {
query: string;
searchDepth?: 'basic' | 'advanced' | 'deep';
maxSources?: number;
includeWebSearch?: boolean;
background?: boolean;
}
/**
* Response format for research interactions
*/
export interface ResearchResponse {
answer: string;
sources: Array<{
url: string;
title: string;
snippet: string;
}>;
searchQueries?: string[];
metadata?: any;
}
/**
* Options for image generation
*/
export interface ImageGenerateOptions {
prompt: string;
model?: 'gpt-image-1' | 'dall-e-3' | 'dall-e-2';
quality?: 'low' | 'medium' | 'high' | 'standard' | 'hd' | 'auto';
size?: '256x256' | '512x512' | '1024x1024' | '1536x1024' | '1024x1536' | '1792x1024' | '1024x1792' | 'auto';
style?: 'vivid' | 'natural';
background?: 'transparent' | 'opaque' | 'auto';
outputFormat?: 'png' | 'jpeg' | 'webp';
outputCompression?: number; // 0-100 for webp/jpeg
moderation?: 'low' | 'auto';
n?: number; // Number of images to generate
stream?: boolean;
partialImages?: number; // 0-3 for streaming
}
/**
* Options for image editing
*/
export interface ImageEditOptions {
image: Buffer;
prompt: string;
mask?: Buffer;
model?: 'gpt-image-1' | 'dall-e-2';
quality?: 'low' | 'medium' | 'high' | 'standard' | 'auto';
size?: '256x256' | '512x512' | '1024x1024' | '1536x1024' | '1024x1536' | 'auto';
background?: 'transparent' | 'opaque' | 'auto';
outputFormat?: 'png' | 'jpeg' | 'webp';
outputCompression?: number;
n?: number;
stream?: boolean;
partialImages?: number;
}
/**
* Response format for image operations
*/
export interface ImageResponse {
images: Array<{
b64_json?: string;
url?: string;
revisedPrompt?: string;
}>;
metadata?: {
model: string;
quality?: string;
size?: string;
outputFormat?: string;
tokensUsed?: number;
};
}
/**
* Abstract base class for multi-modal AI models.
* Provides a common interface for different AI providers (OpenAI, Anthropic, Perplexity, Ollama)
*/
export abstract class MultiModalModel {
/**
* SmartPdf instance for document processing
* Lazy-loaded only when PDF processing is needed to avoid starting browser unnecessarily
*/
protected smartpdfInstance: plugins.smartpdf.SmartPdf | null = null;
/**
* Ensures SmartPdf instance is initialized and ready
* Call this before using smartpdfInstance in document processing methods
*/
protected async ensureSmartpdfReady(): Promise<void> {
if (!this.smartpdfInstance) {
this.smartpdfInstance = new plugins.smartpdf.SmartPdf();
await this.smartpdfInstance.start();
}
}
/**
* Initializes the model and any necessary resources
* Should be called before using any other methods
*/
public async start(): Promise<void> {
// SmartPdf is now lazy-loaded only when needed for PDF processing
// This avoids starting a browser unless document() method is actually used
}
/**
* Cleans up any resources used by the model
* Should be called when the model is no longer needed
*/
public async stop(): Promise<void> {
if (this.smartpdfInstance) {
await this.smartpdfInstance.stop();
this.smartpdfInstance = null;
}
}
/**
* Synchronous chat interaction with the model
* @param optionsArg Options containing system message, user message, and message history
* @returns Promise resolving to the assistant's response
*/
public abstract chat(optionsArg: ChatOptions): Promise<ChatResponse>;
/**
* Streaming interface for chat interactions
* Allows for real-time responses from the model
* @param input Stream of user messages
* @returns Stream of model responses
*/
public abstract chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>>;
/**
* Streaming chat with token callback
* Calls onToken for each token generated, returns final response
* @param optionsArg Options containing system message, user message, message history, and onToken callback
* @returns Promise resolving to the assistant's response
*/
public chatStreaming?(optionsArg: StreamingChatOptions): Promise<ChatResponse>;
/**
* Text-to-speech conversion
* @param optionsArg Options containing the message to convert to speech
* @returns Promise resolving to a readable stream of audio data
* @throws Error if the provider doesn't support audio generation
*/
public abstract audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream>;
/**
* Vision-language processing
* @param optionsArg Options containing the image and prompt for analysis
* @returns Promise resolving to the model's description or analysis of the image
* @throws Error if the provider doesn't support vision tasks
*/
public abstract vision(optionsArg: { image: Buffer; prompt: string }): Promise<string>;
/**
* Document analysis and processing
* @param optionsArg Options containing system message, user message, PDF documents, and message history
* @returns Promise resolving to the model's analysis of the documents
* @throws Error if the provider doesn't support document processing
*/
public abstract document(optionsArg: {
systemMessage: string;
userMessage: string;
pdfDocuments: Uint8Array[];
messageHistory: ChatMessage[];
}): Promise<{ message: any }>;
/**
* Research and web search capabilities
* @param optionsArg Options containing the research query and configuration
* @returns Promise resolving to the research results with sources
* @throws Error if the provider doesn't support research capabilities
*/
public abstract research(optionsArg: ResearchOptions): Promise<ResearchResponse>;
/**
* Image generation from text prompts
* @param optionsArg Options containing the prompt and generation parameters
* @returns Promise resolving to the generated image(s)
* @throws Error if the provider doesn't support image generation
*/
public abstract imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse>;
/**
* Image editing and inpainting
* @param optionsArg Options containing the image, prompt, and editing parameters
* @returns Promise resolving to the edited image(s)
* @throws Error if the provider doesn't support image editing
*/
public abstract imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse>;
}

View File

@@ -1,176 +0,0 @@
import type { SmartAi } from "./classes.smartai.js";
import { OpenAiProvider } from "./provider.openai.js";
type TProcessFunction = (input: string) => Promise<string>;
export interface IConversationOptions {
processFunction: TProcessFunction;
}
/**
* a conversation
*/
export class Conversation {
// STATIC
public static async createWithOpenAi(smartaiRefArg: SmartAi) {
if (!smartaiRefArg.openaiProvider) {
throw new Error('OpenAI provider not available');
}
const conversation = new Conversation(smartaiRefArg, {
processFunction: async (input) => {
return '' // TODO implement proper streaming
}
});
return conversation;
}
public static async createWithAnthropic(smartaiRefArg: SmartAi) {
if (!smartaiRefArg.anthropicProvider) {
throw new Error('Anthropic provider not available');
}
const conversation = new Conversation(smartaiRefArg, {
processFunction: async (input) => {
return '' // TODO implement proper streaming
}
});
return conversation;
}
public static async createWithPerplexity(smartaiRefArg: SmartAi) {
if (!smartaiRefArg.perplexityProvider) {
throw new Error('Perplexity provider not available');
}
const conversation = new Conversation(smartaiRefArg, {
processFunction: async (input) => {
return '' // TODO implement proper streaming
}
});
return conversation;
}
public static async createWithExo(smartaiRefArg: SmartAi) {
if (!smartaiRefArg.exoProvider) {
throw new Error('Exo provider not available');
}
const conversation = new Conversation(smartaiRefArg, {
processFunction: async (input) => {
return '' // TODO implement proper streaming
}
});
return conversation;
}
public static async createWithOllama(smartaiRefArg: SmartAi) {
if (!smartaiRefArg.ollamaProvider) {
throw new Error('Ollama provider not available');
}
const conversation = new Conversation(smartaiRefArg, {
processFunction: async (input) => {
return '' // TODO implement proper streaming
}
});
return conversation;
}
public static async createWithGroq(smartaiRefArg: SmartAi) {
if (!smartaiRefArg.groqProvider) {
throw new Error('Groq provider not available');
}
const conversation = new Conversation(smartaiRefArg, {
processFunction: async (input) => {
return '' // TODO implement proper streaming
}
});
return conversation;
}
public static async createWithMistral(smartaiRefArg: SmartAi) {
if (!smartaiRefArg.mistralProvider) {
throw new Error('Mistral provider not available');
}
const conversation = new Conversation(smartaiRefArg, {
processFunction: async (input) => {
return '' // TODO implement proper streaming
}
});
return conversation;
}
public static async createWithXai(smartaiRefArg: SmartAi) {
if (!smartaiRefArg.xaiProvider) {
throw new Error('XAI provider not available');
}
const conversation = new Conversation(smartaiRefArg, {
processFunction: async (input) => {
return '' // TODO implement proper streaming
}
});
return conversation;
}
public static async createWithElevenlabs(smartaiRefArg: SmartAi) {
if (!smartaiRefArg.elevenlabsProvider) {
throw new Error('ElevenLabs provider not available');
}
const conversation = new Conversation(smartaiRefArg, {
processFunction: async (input) => {
return '' // TODO implement proper streaming
}
});
return conversation;
}
// INSTANCE
smartaiRef: SmartAi
private systemMessage: string;
private processFunction: TProcessFunction;
private inputStreamWriter: WritableStreamDefaultWriter<string> | null = null;
private outputStreamController: ReadableStreamDefaultController<string> | null = null;
constructor(smartairefArg: SmartAi, options: IConversationOptions) {
this.processFunction = options.processFunction;
}
public async setSystemMessage(systemMessageArg: string) {
this.systemMessage = systemMessageArg;
}
private setupOutputStream(): ReadableStream<string> {
return new ReadableStream<string>({
start: (controller) => {
this.outputStreamController = controller;
}
});
}
private setupInputStream(): WritableStream<string> {
const writableStream = new WritableStream<string>({
write: async (chunk) => {
const processedData = await this.processFunction(chunk);
if (this.outputStreamController) {
this.outputStreamController.enqueue(processedData);
}
},
close: () => {
this.outputStreamController?.close();
},
abort: (err) => {
console.error('Stream aborted', err);
this.outputStreamController?.error(err);
}
});
return writableStream;
}
public getInputStreamWriter(): WritableStreamDefaultWriter<string> {
if (!this.inputStreamWriter) {
const inputStream = this.setupInputStream();
this.inputStreamWriter = inputStream.getWriter();
}
return this.inputStreamWriter;
}
public getOutputStream(): ReadableStream<string> {
return this.setupOutputStream();
}
}

View File

@@ -1,187 +0,0 @@
import { Conversation } from './classes.conversation.js';
import * as plugins from './plugins.js';
import { AnthropicProvider } from './provider.anthropic.js';
import { ElevenLabsProvider } from './provider.elevenlabs.js';
import { MistralProvider } from './provider.mistral.js';
import { OllamaProvider, type IOllamaModelOptions } from './provider.ollama.js';
import { OpenAiProvider } from './provider.openai.js';
import { PerplexityProvider } from './provider.perplexity.js';
import { ExoProvider } from './provider.exo.js';
import { GroqProvider } from './provider.groq.js';
import { XAIProvider } from './provider.xai.js';
export interface ISmartAiOptions {
openaiToken?: string;
anthropicToken?: string;
perplexityToken?: string;
groqToken?: string;
mistralToken?: string;
xaiToken?: string;
elevenlabsToken?: string;
exo?: {
baseUrl?: string;
apiKey?: string;
};
mistral?: {
chatModel?: string;
ocrModel?: string;
tableFormat?: 'markdown' | 'html';
};
ollama?: {
baseUrl?: string;
model?: string;
visionModel?: string;
defaultOptions?: IOllamaModelOptions;
defaultTimeout?: number;
};
elevenlabs?: {
defaultVoiceId?: string;
defaultModelId?: string;
};
}
export type TProvider = 'openai' | 'anthropic' | 'perplexity' | 'ollama' | 'exo' | 'groq' | 'mistral' | 'xai' | 'elevenlabs';
export class SmartAi {
public options: ISmartAiOptions;
public openaiProvider: OpenAiProvider;
public anthropicProvider: AnthropicProvider;
public perplexityProvider: PerplexityProvider;
public ollamaProvider: OllamaProvider;
public exoProvider: ExoProvider;
public groqProvider: GroqProvider;
public mistralProvider: MistralProvider;
public xaiProvider: XAIProvider;
public elevenlabsProvider: ElevenLabsProvider;
constructor(optionsArg: ISmartAiOptions) {
this.options = optionsArg;
}
public async start() {
if (this.options.openaiToken) {
this.openaiProvider = new OpenAiProvider({
openaiToken: this.options.openaiToken,
});
await this.openaiProvider.start();
}
if (this.options.anthropicToken) {
this.anthropicProvider = new AnthropicProvider({
anthropicToken: this.options.anthropicToken,
});
await this.anthropicProvider.start();
}
if (this.options.perplexityToken) {
this.perplexityProvider = new PerplexityProvider({
perplexityToken: this.options.perplexityToken,
});
await this.perplexityProvider.start();
}
if (this.options.groqToken) {
this.groqProvider = new GroqProvider({
groqToken: this.options.groqToken,
});
await this.groqProvider.start();
}
if (this.options.mistralToken) {
this.mistralProvider = new MistralProvider({
mistralToken: this.options.mistralToken,
chatModel: this.options.mistral?.chatModel,
ocrModel: this.options.mistral?.ocrModel,
tableFormat: this.options.mistral?.tableFormat,
});
await this.mistralProvider.start();
}
if (this.options.xaiToken) {
this.xaiProvider = new XAIProvider({
xaiToken: this.options.xaiToken,
});
await this.xaiProvider.start();
}
if (this.options.elevenlabsToken) {
this.elevenlabsProvider = new ElevenLabsProvider({
elevenlabsToken: this.options.elevenlabsToken,
defaultVoiceId: this.options.elevenlabs?.defaultVoiceId,
defaultModelId: this.options.elevenlabs?.defaultModelId,
});
await this.elevenlabsProvider.start();
}
if (this.options.ollama) {
this.ollamaProvider = new OllamaProvider({
baseUrl: this.options.ollama.baseUrl,
model: this.options.ollama.model,
visionModel: this.options.ollama.visionModel,
defaultOptions: this.options.ollama.defaultOptions,
defaultTimeout: this.options.ollama.defaultTimeout,
});
await this.ollamaProvider.start();
}
if (this.options.exo) {
this.exoProvider = new ExoProvider({
exoBaseUrl: this.options.exo.baseUrl,
apiKey: this.options.exo.apiKey,
});
await this.exoProvider.start();
}
}
public async stop() {
if (this.openaiProvider) {
await this.openaiProvider.stop();
}
if (this.anthropicProvider) {
await this.anthropicProvider.stop();
}
if (this.perplexityProvider) {
await this.perplexityProvider.stop();
}
if (this.groqProvider) {
await this.groqProvider.stop();
}
if (this.mistralProvider) {
await this.mistralProvider.stop();
}
if (this.xaiProvider) {
await this.xaiProvider.stop();
}
if (this.elevenlabsProvider) {
await this.elevenlabsProvider.stop();
}
if (this.ollamaProvider) {
await this.ollamaProvider.stop();
}
if (this.exoProvider) {
await this.exoProvider.stop();
}
}
/**
* create a new conversation
*/
createConversation(provider: TProvider) {
switch (provider) {
case 'exo':
return Conversation.createWithExo(this);
case 'openai':
return Conversation.createWithOpenAi(this);
case 'anthropic':
return Conversation.createWithAnthropic(this);
case 'perplexity':
return Conversation.createWithPerplexity(this);
case 'ollama':
return Conversation.createWithOllama(this);
case 'groq':
return Conversation.createWithGroq(this);
case 'mistral':
return Conversation.createWithMistral(this);
case 'xai':
return Conversation.createWithXai(this);
case 'elevenlabs':
return Conversation.createWithElevenlabs(this);
default:
throw new Error('Provider not available');
}
}
}

View File

@@ -1,15 +0,0 @@
import type { SmartAi } from './classes.smartai.js';
import * as plugins from './plugins.js';
export class TTS {
public static async createWithOpenAi(smartaiRef: SmartAi): Promise<TTS> {
return new TTS(smartaiRef);
}
// INSTANCE
smartaiRef: SmartAi;
constructor(smartairefArg: SmartAi) {
this.smartaiRef = smartairefArg;
}
}

View File

@@ -1,11 +1,8 @@
export * from './classes.smartai.js';
export * from './abstract.classes.multimodal.js';
export * from './provider.openai.js';
export * from './provider.anthropic.js';
export * from './provider.perplexity.js';
export * from './provider.groq.js';
export * from './provider.mistral.js';
export * from './provider.ollama.js';
export * from './provider.xai.js';
export * from './provider.exo.js';
export * from './provider.elevenlabs.js';
export { getModel } from './smartai.classes.smartai.js';
export type { ISmartAiOptions, TProvider, IOllamaModelOptions, LanguageModelV3 } from './smartai.interfaces.js';
export { createAnthropicCachingMiddleware } from './smartai.middleware.anthropic.js';
export { createOllamaModel } from './smartai.provider.ollama.js';
// Re-export commonly used ai-sdk functions for consumer convenience
export { generateText, streamText, tool, jsonSchema } from 'ai';
export type { ModelMessage, ToolSet, StreamTextResult } from 'ai';

View File

View File

@@ -1,4 +0,0 @@
import * as plugins from './plugins.js';
export const packageDir = plugins.path.join(plugins.smartpath.get.dirnameFromImportMetaUrl(import.meta.url), '../');
export const nogitDir = plugins.path.join(packageDir, './.nogit');

View File

@@ -1,38 +1,22 @@
// node native
import * as path from 'path';
// ai sdk core
import { generateText, streamText, wrapLanguageModel, tool, jsonSchema } from 'ai';
export { generateText, streamText, wrapLanguageModel, tool, jsonSchema };
// ai sdk providers
import { createAnthropic } from '@ai-sdk/anthropic';
import { createOpenAI } from '@ai-sdk/openai';
import { createGoogleGenerativeAI } from '@ai-sdk/google';
import { createGroq } from '@ai-sdk/groq';
import { createMistral } from '@ai-sdk/mistral';
import { createXai } from '@ai-sdk/xai';
import { createPerplexity } from '@ai-sdk/perplexity';
export {
path,
}
// @push.rocks scope
import * as qenv from '@push.rocks/qenv';
import * as smartarray from '@push.rocks/smartarray';
import * as smartfs from '@push.rocks/smartfs';
import * as smartpath from '@push.rocks/smartpath';
import * as smartpdf from '@push.rocks/smartpdf';
import * as smartpromise from '@push.rocks/smartpromise';
import * as smartrequest from '@push.rocks/smartrequest';
import * as webstream from '@push.rocks/webstream';
export {
smartarray,
qenv,
smartfs,
smartpath,
smartpdf,
smartpromise,
smartrequest,
webstream,
}
// third party
import * as anthropic from '@anthropic-ai/sdk';
import * as mistralai from '@mistralai/mistralai';
import * as openai from 'openai';
export {
anthropic,
mistralai,
openai,
}
createAnthropic,
createOpenAI,
createGoogleGenerativeAI,
createGroq,
createMistral,
createXai,
createPerplexity,
};

View File

@@ -1,446 +0,0 @@
import * as plugins from './plugins.js';
import * as paths from './paths.js';
import { MultiModalModel } from './abstract.classes.multimodal.js';
import type {
ChatOptions,
ChatResponse,
ChatMessage,
ResearchOptions,
ResearchResponse,
ImageGenerateOptions,
ImageEditOptions,
ImageResponse
} from './abstract.classes.multimodal.js';
import type { ImageBlockParam, TextBlockParam } from '@anthropic-ai/sdk/resources/messages';
type ContentBlock = ImageBlockParam | TextBlockParam;
export interface IAnthropicProviderOptions {
anthropicToken: string;
enableWebSearch?: boolean;
searchDomainAllowList?: string[];
searchDomainBlockList?: string[];
extendedThinking?: 'quick' | 'normal' | 'deep' | 'off';
}
export class AnthropicProvider extends MultiModalModel {
private options: IAnthropicProviderOptions;
public anthropicApiClient: plugins.anthropic.default;
constructor(optionsArg: IAnthropicProviderOptions) {
super();
this.options = optionsArg // Ensure the token is stored
}
async start() {
await super.start();
this.anthropicApiClient = new plugins.anthropic.default({
apiKey: this.options.anthropicToken,
});
}
async stop() {
await super.stop();
}
/**
* Returns the thinking configuration based on provider options.
* Defaults to 'normal' mode (8000 tokens) if not specified.
*/
private getThinkingConfig(): { type: 'enabled'; budget_tokens: number } | undefined {
const mode = this.options.extendedThinking ?? 'normal';
const budgetMap = {
quick: 2048,
normal: 8000,
deep: 16000,
off: 0,
};
const budget = budgetMap[mode];
return budget > 0 ? { type: 'enabled', budget_tokens: budget } : undefined;
}
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
// Create a TextDecoder to handle incoming chunks
const decoder = new TextDecoder();
let buffer = '';
let currentMessage: { role: string; content: string; } | null = null;
// Create a TransformStream to process the input
const transform = new TransformStream<Uint8Array, string>({
async transform(chunk, controller) {
buffer += decoder.decode(chunk, { stream: true });
// Try to parse complete JSON messages from the buffer
while (true) {
const newlineIndex = buffer.indexOf('\n');
if (newlineIndex === -1) break;
const line = buffer.slice(0, newlineIndex);
buffer = buffer.slice(newlineIndex + 1);
if (line.trim()) {
try {
const message = JSON.parse(line);
currentMessage = {
role: message.role || 'user',
content: message.content || '',
};
} catch (e) {
console.error('Failed to parse message:', e);
}
}
}
// If we have a complete message, send it to Anthropic
if (currentMessage) {
const thinkingConfig = this.getThinkingConfig();
const stream = await this.anthropicApiClient.messages.create({
model: 'claude-sonnet-4-5-20250929',
messages: [{ role: currentMessage.role, content: currentMessage.content }],
system: '',
stream: true,
max_tokens: 20000,
...(thinkingConfig && { thinking: thinkingConfig }),
});
// Process each chunk from Anthropic
for await (const chunk of stream) {
const content = chunk.delta?.text;
if (content) {
controller.enqueue(content);
}
}
currentMessage = null;
}
},
flush(controller) {
if (buffer) {
try {
const message = JSON.parse(buffer);
controller.enqueue(message.content || '');
} catch (e) {
console.error('Failed to parse remaining buffer:', e);
}
}
}
});
// Connect the input to our transform stream
return input.pipeThrough(transform);
}
// Implementing the synchronous chat interaction
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
// Convert message history to Anthropic format
const messages = optionsArg.messageHistory.map(msg => ({
role: msg.role === 'assistant' ? 'assistant' as const : 'user' as const,
content: msg.content
}));
const thinkingConfig = this.getThinkingConfig();
const result = await this.anthropicApiClient.messages.create({
model: 'claude-sonnet-4-5-20250929',
system: optionsArg.systemMessage,
messages: [
...messages,
{ role: 'user' as const, content: optionsArg.userMessage }
],
max_tokens: 20000,
...(thinkingConfig && { thinking: thinkingConfig }),
});
// Extract text content from the response
let message = '';
for (const block of result.content) {
if ('text' in block) {
message += block.text;
}
}
return {
role: 'assistant' as const,
message,
};
}
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
// Anthropic does not provide an audio API, so this method is not implemented.
throw new Error('Audio generation is not yet supported by Anthropic.');
}
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
const base64Image = optionsArg.image.toString('base64');
const content: ContentBlock[] = [
{
type: 'text',
text: optionsArg.prompt
},
{
type: 'image',
source: {
type: 'base64',
media_type: 'image/jpeg',
data: base64Image
}
}
];
const thinkingConfig = this.getThinkingConfig();
const result = await this.anthropicApiClient.messages.create({
model: 'claude-sonnet-4-5-20250929',
messages: [{
role: 'user',
content
}],
max_tokens: 10000,
...(thinkingConfig && { thinking: thinkingConfig }),
});
// Extract text content from the response
let message = '';
for (const block of result.content) {
if ('text' in block) {
message += block.text;
}
}
return message;
}
public async document(optionsArg: {
systemMessage: string;
userMessage: string;
pdfDocuments: Uint8Array[];
messageHistory: ChatMessage[];
}): Promise<{ message: any }> {
// Ensure SmartPdf is initialized before processing documents
await this.ensureSmartpdfReady();
// Convert PDF documents to images using SmartPDF
let documentImageBytesArray: Uint8Array[] = [];
for (const pdfDocument of optionsArg.pdfDocuments) {
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
documentImageBytesArray = documentImageBytesArray.concat(documentImageArray);
}
// Convert message history to Anthropic format
const messages = optionsArg.messageHistory.map(msg => ({
role: msg.role === 'assistant' ? 'assistant' as const : 'user' as const,
content: msg.content
}));
// Create content array with text and images
const content: ContentBlock[] = [
{
type: 'text',
text: optionsArg.userMessage
}
];
// Add each document page as an image
for (const imageBytes of documentImageBytesArray) {
content.push({
type: 'image',
source: {
type: 'base64',
media_type: 'image/png',
data: Buffer.from(imageBytes).toString('base64')
}
});
}
const thinkingConfig = this.getThinkingConfig();
const result = await this.anthropicApiClient.messages.create({
model: 'claude-sonnet-4-5-20250929',
system: optionsArg.systemMessage,
messages: [
...messages,
{ role: 'user', content }
],
max_tokens: 20000,
...(thinkingConfig && { thinking: thinkingConfig }),
});
// Extract text content from the response
let message = '';
for (const block of result.content) {
if ('text' in block) {
message += block.text;
}
}
return {
message: {
role: 'assistant',
content: message
}
};
}
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
// Prepare the messages for the research request
const systemMessage = `You are a research assistant with web search capabilities.
Provide comprehensive, well-researched answers with citations and sources.
When searching the web, be thorough and cite your sources accurately.`;
try {
// Build the tool configuration for web search
const tools: any[] = [];
if (this.options.enableWebSearch) {
const webSearchTool: any = {
type: 'web_search_20250305',
name: 'web_search'
};
// Add optional parameters
if (optionsArg.maxSources) {
webSearchTool.max_uses = optionsArg.maxSources;
}
if (this.options.searchDomainAllowList?.length) {
webSearchTool.allowed_domains = this.options.searchDomainAllowList;
} else if (this.options.searchDomainBlockList?.length) {
webSearchTool.blocked_domains = this.options.searchDomainBlockList;
}
tools.push(webSearchTool);
}
// Configure the request based on search depth
const maxTokens = optionsArg.searchDepth === 'deep' ? 20000 :
optionsArg.searchDepth === 'advanced' ? 20000 : 20000;
// Add thinking configuration if enabled
const thinkingConfig = this.getThinkingConfig();
// Create the research request
// Note: When thinking is enabled, temperature must be 1 (or omitted)
const requestParams: any = {
model: 'claude-sonnet-4-5-20250929',
system: systemMessage,
messages: [
{
role: 'user' as const,
content: optionsArg.query
}
],
max_tokens: maxTokens,
// Only set temperature when thinking is NOT enabled
...(thinkingConfig ? {} : { temperature: 0.7 })
};
// Add tools if web search is enabled
if (tools.length > 0) {
requestParams.tools = tools;
}
// Add thinking configuration if enabled
if (thinkingConfig) {
requestParams.thinking = thinkingConfig;
}
// Execute the research request
const result = await this.anthropicApiClient.messages.create(requestParams);
// Extract the answer from content blocks
let answer = '';
const sources: Array<{ url: string; title: string; snippet: string }> = [];
const searchQueries: string[] = [];
// Process content blocks
for (const block of result.content) {
if ('text' in block) {
// Accumulate text content
answer += block.text;
// Extract citations if present
if ('citations' in block && Array.isArray(block.citations)) {
for (const citation of block.citations) {
if (citation.type === 'web_search_result_location') {
sources.push({
title: citation.title || '',
url: citation.url || '',
snippet: citation.cited_text || ''
});
}
}
}
} else if ('type' in block && block.type === 'server_tool_use') {
// Extract search queries from server tool use
if (block.name === 'web_search' && block.input && typeof block.input === 'object' && 'query' in block.input) {
searchQueries.push((block.input as any).query);
}
} else if ('type' in block && block.type === 'web_search_tool_result') {
// Extract sources from web search results
if (Array.isArray(block.content)) {
for (const result of block.content) {
if (result.type === 'web_search_result') {
// Only add if not already in sources (avoid duplicates from citations)
if (!sources.some(s => s.url === result.url)) {
sources.push({
title: result.title || '',
url: result.url || '',
snippet: '' // Search results don't include snippets, only citations do
});
}
}
}
}
}
}
// Fallback: Parse markdown-style links if no citations found
if (sources.length === 0) {
const urlRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
let match: RegExpExecArray | null;
while ((match = urlRegex.exec(answer)) !== null) {
sources.push({
title: match[1],
url: match[2],
snippet: ''
});
}
}
// Check if web search was used based on usage info
const webSearchCount = result.usage?.server_tool_use?.web_search_requests || 0;
return {
answer,
sources,
searchQueries: searchQueries.length > 0 ? searchQueries : undefined,
metadata: {
model: 'claude-sonnet-4-5-20250929',
searchDepth: optionsArg.searchDepth || 'basic',
tokensUsed: result.usage?.output_tokens,
webSearchesPerformed: webSearchCount
}
};
} catch (error) {
console.error('Anthropic research error:', error);
throw new Error(`Failed to perform research: ${error.message}`);
}
}
/**
* Image generation is not supported by Anthropic
*/
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
throw new Error('Image generation is not supported by Anthropic. Claude can only analyze images, not generate them. Please use OpenAI provider for image generation.');
}
/**
* Image editing is not supported by Anthropic
*/
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
throw new Error('Image editing is not supported by Anthropic. Claude can only analyze images, not edit them. Please use OpenAI provider for image editing.');
}
}

View File

@@ -1,116 +0,0 @@
import * as plugins from './plugins.js';
import { Readable } from 'stream';
import { MultiModalModel } from './abstract.classes.multimodal.js';
import type {
ChatOptions,
ChatResponse,
ResearchOptions,
ResearchResponse,
ImageGenerateOptions,
ImageEditOptions,
ImageResponse
} from './abstract.classes.multimodal.js';
export interface IElevenLabsProviderOptions {
elevenlabsToken: string;
defaultVoiceId?: string;
defaultModelId?: string;
}
export interface IElevenLabsVoiceSettings {
stability?: number;
similarity_boost?: number;
style?: number;
use_speaker_boost?: boolean;
}
export class ElevenLabsProvider extends MultiModalModel {
private options: IElevenLabsProviderOptions;
private baseUrl: string = 'https://api.elevenlabs.io/v1';
constructor(optionsArg: IElevenLabsProviderOptions) {
super();
this.options = optionsArg;
}
public async start() {
await super.start();
}
public async stop() {
await super.stop();
}
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
throw new Error('ElevenLabs does not support chat functionality. This provider is specialized for text-to-speech only.');
}
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
throw new Error('ElevenLabs does not support chat streaming functionality. This provider is specialized for text-to-speech only.');
}
public async audio(optionsArg: {
message: string;
voiceId?: string;
modelId?: string;
voiceSettings?: IElevenLabsVoiceSettings;
}): Promise<NodeJS.ReadableStream> {
// Use Samara voice as default fallback
const voiceId = optionsArg.voiceId || this.options.defaultVoiceId || '19STyYD15bswVz51nqLf';
const modelId = optionsArg.modelId || this.options.defaultModelId || 'eleven_v3';
const url = `${this.baseUrl}/text-to-speech/${voiceId}`;
const requestBody: any = {
text: optionsArg.message,
model_id: modelId,
};
if (optionsArg.voiceSettings) {
requestBody.voice_settings = optionsArg.voiceSettings;
}
const response = await plugins.smartrequest.SmartRequest.create()
.url(url)
.header('xi-api-key', this.options.elevenlabsToken)
.json(requestBody)
.autoDrain(false)
.post();
if (!response.ok) {
const errorText = await response.text();
throw new Error(`ElevenLabs API error: ${response.status} ${response.statusText} - ${errorText}`);
}
const webStream = response.stream();
const nodeStream = Readable.fromWeb(webStream as any);
return nodeStream;
}
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
throw new Error('ElevenLabs does not support vision functionality. This provider is specialized for text-to-speech only.');
}
public async document(optionsArg: {
systemMessage: string;
userMessage: string;
pdfDocuments: Uint8Array[];
messageHistory: any[];
}): Promise<{ message: any }> {
throw new Error('ElevenLabs does not support document processing. This provider is specialized for text-to-speech only.');
}
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
throw new Error('ElevenLabs does not support research capabilities. This provider is specialized for text-to-speech only.');
}
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
throw new Error('ElevenLabs does not support image generation. This provider is specialized for text-to-speech only.');
}
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
throw new Error('ElevenLabs does not support image editing. This provider is specialized for text-to-speech only.');
}
}

View File

@@ -1,155 +0,0 @@
import * as plugins from './plugins.js';
import * as paths from './paths.js';
import { MultiModalModel } from './abstract.classes.multimodal.js';
import type {
ChatOptions,
ChatResponse,
ChatMessage,
ResearchOptions,
ResearchResponse,
ImageGenerateOptions,
ImageEditOptions,
ImageResponse
} from './abstract.classes.multimodal.js';
import type { ChatCompletionMessageParam } from 'openai/resources/chat/completions';
export interface IExoProviderOptions {
exoBaseUrl?: string;
apiKey?: string;
}
export class ExoProvider extends MultiModalModel {
private options: IExoProviderOptions;
public openAiApiClient: plugins.openai.default;
constructor(optionsArg: IExoProviderOptions = {}) {
super();
this.options = {
exoBaseUrl: 'http://localhost:8080/v1', // Default Exo API endpoint
...optionsArg
};
}
public async start() {
this.openAiApiClient = new plugins.openai.default({
apiKey: this.options.apiKey || 'not-needed', // Exo might not require an API key for local deployment
baseURL: this.options.exoBaseUrl,
});
}
public async stop() {}
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
// Create a TextDecoder to handle incoming chunks
const decoder = new TextDecoder();
let buffer = '';
let currentMessage: { role: string; content: string; } | null = null;
// Create a TransformStream to process the input
const transform = new TransformStream<Uint8Array, string>({
transform: async (chunk, controller) => {
buffer += decoder.decode(chunk, { stream: true });
// Try to parse complete JSON messages from the buffer
while (true) {
const newlineIndex = buffer.indexOf('\n');
if (newlineIndex === -1) break;
const line = buffer.slice(0, newlineIndex);
buffer = buffer.slice(newlineIndex + 1);
if (line.trim()) {
try {
const message = JSON.parse(line);
currentMessage = message;
// Process the message based on its type
if (message.type === 'message') {
const response = await this.chat({
systemMessage: '',
userMessage: message.content,
messageHistory: [{ role: message.role as 'user' | 'assistant' | 'system', content: message.content }]
});
controller.enqueue(JSON.stringify(response) + '\n');
}
} catch (error) {
console.error('Error processing message:', error);
}
}
}
},
flush(controller) {
if (buffer) {
try {
const message = JSON.parse(buffer);
currentMessage = message;
} catch (error) {
console.error('Error processing remaining buffer:', error);
}
}
}
});
return input.pipeThrough(transform);
}
public async chat(options: ChatOptions): Promise<ChatResponse> {
const messages: ChatCompletionMessageParam[] = [
{ role: 'system', content: options.systemMessage },
...options.messageHistory,
{ role: 'user', content: options.userMessage }
];
try {
const response = await this.openAiApiClient.chat.completions.create({
model: 'local-model', // Exo uses local models
messages: messages,
stream: false
});
return {
role: 'assistant',
message: response.choices[0]?.message?.content || ''
};
} catch (error) {
console.error('Error in chat completion:', error);
throw error;
}
}
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
throw new Error('Audio generation is not supported by Exo provider');
}
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
throw new Error('Vision processing is not supported by Exo provider');
}
public async document(optionsArg: {
systemMessage: string;
userMessage: string;
pdfDocuments: Uint8Array[];
messageHistory: ChatMessage[];
}): Promise<{ message: any }> {
throw new Error('Document processing is not supported by Exo provider');
}
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
throw new Error('Research capabilities are not yet supported by Exo provider.');
}
/**
* Image generation is not supported by Exo
*/
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
throw new Error('Image generation is not supported by Exo. Please use OpenAI provider for image generation.');
}
/**
* Image editing is not supported by Exo
*/
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
throw new Error('Image editing is not supported by Exo. Please use OpenAI provider for image editing.');
}
}

View File

@@ -1,219 +0,0 @@
import * as plugins from './plugins.js';
import * as paths from './paths.js';
import { MultiModalModel } from './abstract.classes.multimodal.js';
import type {
ChatOptions,
ChatResponse,
ChatMessage,
ResearchOptions,
ResearchResponse,
ImageGenerateOptions,
ImageEditOptions,
ImageResponse
} from './abstract.classes.multimodal.js';
export interface IGroqProviderOptions {
groqToken: string;
model?: string;
}
export class GroqProvider extends MultiModalModel {
private options: IGroqProviderOptions;
private baseUrl = 'https://api.groq.com/v1';
constructor(optionsArg: IGroqProviderOptions) {
super();
this.options = {
...optionsArg,
model: optionsArg.model || 'llama-3.3-70b-versatile', // Default model
};
}
async start() {}
async stop() {}
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
// Create a TextDecoder to handle incoming chunks
const decoder = new TextDecoder();
let buffer = '';
let currentMessage: { role: string; content: string; } | null = null;
// Create a TransformStream to process the input
const transform = new TransformStream<Uint8Array, string>({
transform: async (chunk, controller) => {
buffer += decoder.decode(chunk, { stream: true });
// Try to parse complete JSON messages from the buffer
while (true) {
const newlineIndex = buffer.indexOf('\n');
if (newlineIndex === -1) break;
const line = buffer.slice(0, newlineIndex);
buffer = buffer.slice(newlineIndex + 1);
if (line.trim()) {
try {
const message = JSON.parse(line);
currentMessage = {
role: message.role || 'user',
content: message.content || '',
};
} catch (e) {
console.error('Failed to parse message:', e);
}
}
}
// If we have a complete message, send it to Groq
if (currentMessage) {
const response = await fetch(`${this.baseUrl}/chat/completions`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.options.groqToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: this.options.model,
messages: [{ role: currentMessage.role, content: currentMessage.content }],
stream: true,
}),
});
// Process each chunk from Groq
const reader = response.body?.getReader();
if (reader) {
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = new TextDecoder().decode(value);
const lines = chunk.split('\n');
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') break;
try {
const parsed = JSON.parse(data);
const content = parsed.choices[0]?.delta?.content;
if (content) {
controller.enqueue(content);
}
} catch (e) {
console.error('Failed to parse SSE data:', e);
}
}
}
}
} finally {
reader.releaseLock();
}
}
currentMessage = null;
}
},
flush(controller) {
if (buffer) {
try {
const message = JSON.parse(buffer);
controller.enqueue(message.content || '');
} catch (e) {
console.error('Failed to parse remaining buffer:', e);
}
}
}
});
// Connect the input to our transform stream
return input.pipeThrough(transform);
}
// Implementing the synchronous chat interaction
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
const messages = [
// System message
{
role: 'system',
content: optionsArg.systemMessage,
},
// Message history
...optionsArg.messageHistory.map(msg => ({
role: msg.role,
content: msg.content,
})),
// User message
{
role: 'user',
content: optionsArg.userMessage,
},
];
const response = await fetch(`${this.baseUrl}/chat/completions`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.options.groqToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: this.options.model,
messages,
temperature: 0.7,
max_completion_tokens: 1024,
stream: false,
}),
});
if (!response.ok) {
const error = await response.json();
throw new Error(`Groq API error: ${error.message || response.statusText}`);
}
const result = await response.json();
return {
role: 'assistant',
message: result.choices[0].message.content,
};
}
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
// Groq does not provide an audio API, so this method is not implemented.
throw new Error('Audio generation is not yet supported by Groq.');
}
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
throw new Error('Vision tasks are not yet supported by Groq.');
}
public async document(optionsArg: {
systemMessage: string;
userMessage: string;
pdfDocuments: Uint8Array[];
messageHistory: ChatMessage[];
}): Promise<{ message: any }> {
throw new Error('Document processing is not yet supported by Groq.');
}
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
throw new Error('Research capabilities are not yet supported by Groq provider.');
}
/**
* Image generation is not supported by Groq
*/
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
throw new Error('Image generation is not supported by Groq. Please use OpenAI provider for image generation.');
}
/**
* Image editing is not supported by Groq
*/
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
throw new Error('Image editing is not supported by Groq. Please use OpenAI provider for image editing.');
}
}

View File

@@ -1,352 +0,0 @@
import * as plugins from './plugins.js';
import { MultiModalModel } from './abstract.classes.multimodal.js';
import type {
ChatOptions,
ChatResponse,
ChatMessage,
ResearchOptions,
ResearchResponse,
ImageGenerateOptions,
ImageEditOptions,
ImageResponse
} from './abstract.classes.multimodal.js';
export interface IMistralProviderOptions {
mistralToken: string;
chatModel?: string; // default: 'mistral-large-latest'
ocrModel?: string; // default: 'mistral-ocr-latest'
tableFormat?: 'markdown' | 'html';
}
export class MistralProvider extends MultiModalModel {
private options: IMistralProviderOptions;
public mistralClient: plugins.mistralai.Mistral;
constructor(optionsArg: IMistralProviderOptions) {
super();
this.options = optionsArg;
}
async start() {
await super.start();
this.mistralClient = new plugins.mistralai.Mistral({
apiKey: this.options.mistralToken,
});
}
async stop() {
await super.stop();
}
/**
* Synchronous chat interaction using Mistral's chat API
*/
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
// Convert message history to Mistral format
const messages: Array<{
role: 'system' | 'user' | 'assistant';
content: string;
}> = [];
// Add system message first
if (optionsArg.systemMessage) {
messages.push({
role: 'system',
content: optionsArg.systemMessage
});
}
// Add message history
for (const msg of optionsArg.messageHistory) {
messages.push({
role: msg.role === 'system' ? 'system' : msg.role === 'assistant' ? 'assistant' : 'user',
content: msg.content
});
}
// Add current user message
messages.push({
role: 'user',
content: optionsArg.userMessage
});
const result = await this.mistralClient.chat.complete({
model: this.options.chatModel || 'mistral-large-latest',
messages: messages,
});
// Extract content from response
const choice = result.choices?.[0];
let content = '';
if (choice?.message?.content) {
if (typeof choice.message.content === 'string') {
content = choice.message.content;
} else if (Array.isArray(choice.message.content)) {
// Handle array of content chunks
content = choice.message.content
.map((chunk: any) => {
if (typeof chunk === 'string') return chunk;
if (chunk && typeof chunk === 'object' && 'text' in chunk) return chunk.text;
return '';
})
.join('');
}
}
return {
role: 'assistant',
message: content,
};
}
/**
* Streaming chat using Mistral's streaming API
*/
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
const decoder = new TextDecoder();
let buffer = '';
const mistralClient = this.mistralClient;
const chatModel = this.options.chatModel || 'mistral-large-latest';
const transform = new TransformStream<Uint8Array, string>({
async transform(chunk, controller) {
buffer += decoder.decode(chunk, { stream: true });
// Try to parse complete JSON messages from the buffer
while (true) {
const newlineIndex = buffer.indexOf('\n');
if (newlineIndex === -1) break;
const line = buffer.slice(0, newlineIndex);
buffer = buffer.slice(newlineIndex + 1);
if (line.trim()) {
try {
const message = JSON.parse(line);
// Build messages array
const messages: Array<{
role: 'system' | 'user' | 'assistant';
content: string;
}> = [];
if (message.systemMessage) {
messages.push({
role: 'system',
content: message.systemMessage
});
}
messages.push({
role: message.role === 'assistant' ? 'assistant' : 'user',
content: message.content
});
// Use Mistral streaming
const stream = await mistralClient.chat.stream({
model: chatModel,
messages: messages,
});
// Process streaming events
for await (const event of stream) {
const delta = event.data?.choices?.[0]?.delta;
if (delta?.content) {
if (typeof delta.content === 'string') {
controller.enqueue(delta.content);
} else if (Array.isArray(delta.content)) {
for (const chunk of delta.content) {
if (typeof chunk === 'string') {
controller.enqueue(chunk);
} else if (chunk && typeof chunk === 'object' && 'text' in chunk) {
controller.enqueue((chunk as any).text);
}
}
}
}
}
} catch (e) {
console.error('Failed to parse message:', e);
}
}
}
},
flush(controller) {
if (buffer.trim()) {
try {
const message = JSON.parse(buffer);
controller.enqueue(message.content || '');
} catch (e) {
console.error('Failed to parse remaining buffer:', e);
}
}
}
});
return input.pipeThrough(transform);
}
/**
* Audio generation is not supported by Mistral
*/
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
throw new Error('Audio generation is not supported by Mistral. Please use ElevenLabs or OpenAI provider for audio generation.');
}
/**
* Vision using Mistral's OCR API for image analysis
*/
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
const base64Image = optionsArg.image.toString('base64');
// Detect image type from buffer header
let mimeType = 'image/jpeg';
if (optionsArg.image[0] === 0x89 && optionsArg.image[1] === 0x50) {
mimeType = 'image/png';
} else if (optionsArg.image[0] === 0x47 && optionsArg.image[1] === 0x49) {
mimeType = 'image/gif';
} else if (optionsArg.image[0] === 0x52 && optionsArg.image[1] === 0x49) {
mimeType = 'image/webp';
}
// Use OCR API with image data URL
const ocrResult = await this.mistralClient.ocr.process({
model: this.options.ocrModel || 'mistral-ocr-latest',
document: {
imageUrl: `data:${mimeType};base64,${base64Image}`,
type: 'image_url',
},
});
// Combine markdown from all pages
const extractedText = ocrResult.pages.map(page => page.markdown).join('\n\n');
// If a prompt is provided, use chat to analyze the extracted text
if (optionsArg.prompt && optionsArg.prompt.trim()) {
const chatResponse = await this.chat({
systemMessage: 'You are an assistant analyzing image content. The following is text extracted from an image using OCR.',
userMessage: `${optionsArg.prompt}\n\nExtracted content:\n${extractedText}`,
messageHistory: [],
});
return chatResponse.message;
}
return extractedText;
}
/**
* Document processing using Mistral's OCR API
* PDFs are uploaded via Files API first, then processed with OCR
*/
public async document(optionsArg: {
systemMessage: string;
userMessage: string;
pdfDocuments: Uint8Array[];
messageHistory: ChatMessage[];
}): Promise<{ message: any }> {
const extractedTexts: string[] = [];
const uploadedFileIds: string[] = [];
try {
// Process each PDF document using Mistral OCR
for (let i = 0; i < optionsArg.pdfDocuments.length; i++) {
const pdfDocument = optionsArg.pdfDocuments[i];
// Upload the PDF to Mistral's Files API first
const uploadResult = await this.mistralClient.files.upload({
file: {
fileName: `document_${i + 1}.pdf`,
content: pdfDocument,
},
purpose: 'ocr',
});
uploadedFileIds.push(uploadResult.id);
// Now use OCR with the uploaded file
const ocrResult = await this.mistralClient.ocr.process({
model: this.options.ocrModel || 'mistral-ocr-latest',
document: {
type: 'file',
fileId: uploadResult.id,
},
tableFormat: this.options.tableFormat || 'markdown',
});
// Combine all page markdown with page separators
const pageTexts = ocrResult.pages.map((page, index) => {
let pageContent = `--- Page ${index + 1} ---\n${page.markdown}`;
// Include tables if present
if (page.tables && page.tables.length > 0) {
pageContent += '\n\n**Tables:**\n' + page.tables.map((t: any) => t.markdown || t.html || '').join('\n');
}
// Include header/footer if present
if (page.header) {
pageContent = `Header: ${page.header}\n${pageContent}`;
}
if (page.footer) {
pageContent += `\nFooter: ${page.footer}`;
}
return pageContent;
}).join('\n\n');
extractedTexts.push(pageTexts);
}
// Combine all document texts
const allDocumentText = extractedTexts.length === 1
? extractedTexts[0]
: extractedTexts.map((text, i) => `=== Document ${i + 1} ===\n${text}`).join('\n\n');
// Use chat API to process the extracted text with the user's query
const chatResponse = await this.chat({
systemMessage: optionsArg.systemMessage || 'You are a helpful assistant analyzing document content.',
userMessage: `${optionsArg.userMessage}\n\n---\nDocument Content:\n${allDocumentText}`,
messageHistory: optionsArg.messageHistory,
});
return {
message: {
role: 'assistant',
content: chatResponse.message
}
};
} finally {
// Clean up uploaded files
for (const fileId of uploadedFileIds) {
try {
await this.mistralClient.files.delete({ fileId });
} catch (cleanupError) {
// Ignore cleanup errors - files may have already been auto-deleted
console.warn(`Failed to delete temporary file ${fileId}:`, cleanupError);
}
}
}
}
/**
* Research is not natively supported by Mistral
*/
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
throw new Error('Research/web search is not supported by Mistral. Please use Perplexity or Anthropic provider for research capabilities.');
}
/**
* Image generation is not supported by Mistral
*/
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
throw new Error('Image generation is not supported by Mistral. Please use OpenAI provider for image generation.');
}
/**
* Image editing is not supported by Mistral
*/
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
throw new Error('Image editing is not supported by Mistral. Please use OpenAI provider for image editing.');
}
}

View File

@@ -1,705 +0,0 @@
import * as plugins from './plugins.js';
import * as paths from './paths.js';
import { MultiModalModel } from './abstract.classes.multimodal.js';
import type {
ChatOptions,
ChatResponse,
ChatMessage,
ResearchOptions,
ResearchResponse,
ImageGenerateOptions,
ImageEditOptions,
ImageResponse,
StreamingChatOptions
} from './abstract.classes.multimodal.js';
/**
* Ollama model runtime options
* @see https://github.com/ollama/ollama/blob/main/docs/modelfile.md
*/
export interface IOllamaModelOptions {
num_ctx?: number; // Context window (default: 2048)
temperature?: number; // 0 = deterministic (default: 0.8)
top_k?: number; // Top-k sampling (default: 40)
top_p?: number; // Nucleus sampling (default: 0.9)
repeat_penalty?: number;// Repeat penalty (default: 1.1)
num_predict?: number; // Max tokens to predict
stop?: string[]; // Stop sequences
seed?: number; // Random seed for reproducibility
think?: boolean; // Enable thinking/reasoning mode (for GPT-OSS, QwQ, etc.)
}
/**
* JSON Schema tool definition for Ollama native tool calling
* @see https://docs.ollama.com/capabilities/tool-calling
*/
export interface IOllamaTool {
type: 'function';
function: {
name: string;
description: string;
parameters: {
type: 'object';
properties: Record<string, {
type: string;
description?: string;
enum?: string[];
}>;
required?: string[];
};
};
}
/**
* Tool call returned by model in native tool calling mode
*/
export interface IOllamaToolCall {
function: {
name: string;
arguments: Record<string, unknown>;
index?: number;
};
}
export interface IOllamaProviderOptions {
baseUrl?: string;
model?: string;
visionModel?: string; // Model to use for vision tasks (e.g. 'llava')
defaultOptions?: IOllamaModelOptions; // Default model options
defaultTimeout?: number; // Default timeout in ms (default: 120000)
}
/**
* Extended chat options with Ollama-specific settings
*/
export interface IOllamaChatOptions extends ChatOptions {
options?: IOllamaModelOptions; // Per-request model options
timeout?: number; // Per-request timeout in ms
model?: string; // Per-request model override
tools?: IOllamaTool[]; // Available tools for native function calling
// images is inherited from ChatOptions
}
/**
* Chunk emitted during streaming
*/
export interface IOllamaStreamChunk {
content: string;
thinking?: string; // For models with extended thinking
toolCalls?: IOllamaToolCall[]; // Tool calls in streaming mode
done: boolean;
stats?: {
totalDuration?: number;
evalCount?: number;
};
}
/**
* Extended chat response with Ollama-specific fields
*/
export interface IOllamaChatResponse extends ChatResponse {
thinking?: string;
toolCalls?: IOllamaToolCall[]; // Tool calls from model (native tool calling)
stats?: {
totalDuration?: number;
evalCount?: number;
};
}
export class OllamaProvider extends MultiModalModel {
private options: IOllamaProviderOptions;
private baseUrl: string;
private model: string;
private visionModel: string;
private defaultOptions: IOllamaModelOptions;
private defaultTimeout: number;
constructor(optionsArg: IOllamaProviderOptions = {}) {
super();
this.options = optionsArg;
this.baseUrl = optionsArg.baseUrl || 'http://localhost:11434';
this.model = optionsArg.model || 'llama2';
this.visionModel = optionsArg.visionModel || 'llava';
this.defaultOptions = optionsArg.defaultOptions || {};
this.defaultTimeout = optionsArg.defaultTimeout || 120000;
}
async start() {
await super.start();
// Verify Ollama is running
try {
const response = await fetch(`${this.baseUrl}/api/tags`);
if (!response.ok) {
throw new Error('Failed to connect to Ollama server');
}
} catch (error) {
throw new Error(`Failed to connect to Ollama server at ${this.baseUrl}: ${error.message}`);
}
}
async stop() {
await super.stop();
}
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
// Create a TextDecoder to handle incoming chunks
const decoder = new TextDecoder();
let buffer = '';
let currentMessage: { role: string; content: string; } | null = null;
// Create a TransformStream to process the input
const transform = new TransformStream<Uint8Array, string>({
transform: async (chunk, controller) => {
buffer += decoder.decode(chunk, { stream: true });
// Try to parse complete JSON messages from the buffer
while (true) {
const newlineIndex = buffer.indexOf('\n');
if (newlineIndex === -1) break;
const line = buffer.slice(0, newlineIndex);
buffer = buffer.slice(newlineIndex + 1);
if (line.trim()) {
try {
const message = JSON.parse(line);
currentMessage = {
role: message.role || 'user',
content: message.content || '',
};
} catch (e) {
console.error('Failed to parse message:', e);
}
}
}
// If we have a complete message, send it to Ollama
if (currentMessage) {
const response = await fetch(`${this.baseUrl}/api/chat`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: this.model,
messages: [{ role: currentMessage.role, content: currentMessage.content }],
stream: true,
}),
});
// Process each chunk from Ollama
const reader = response.body?.getReader();
if (reader) {
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = new TextDecoder().decode(value);
const lines = chunk.split('\n');
for (const line of lines) {
if (line.trim()) {
try {
const parsed = JSON.parse(line);
const content = parsed.message?.content;
if (content) {
controller.enqueue(content);
}
} catch (e) {
console.error('Failed to parse Ollama response:', e);
}
}
}
}
} finally {
reader.releaseLock();
}
}
currentMessage = null;
}
},
flush(controller) {
if (buffer) {
try {
const message = JSON.parse(buffer);
controller.enqueue(message.content || '');
} catch (e) {
console.error('Failed to parse remaining buffer:', e);
}
}
}
});
// Connect the input to our transform stream
return input.pipeThrough(transform);
}
// Implementing the synchronous chat interaction
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
// Format messages for Ollama
const historyMessages = optionsArg.messageHistory.map((msg) => {
const formatted: { role: string; content: string; images?: string[]; reasoning?: string } = {
role: msg.role,
content: msg.content,
};
if (msg.images && msg.images.length > 0) {
formatted.images = msg.images;
}
if (msg.reasoning) {
formatted.reasoning = msg.reasoning;
}
return formatted;
});
// Build user message with optional images
const userMessage: { role: string; content: string; images?: string[] } = {
role: 'user',
content: optionsArg.userMessage,
};
if (optionsArg.images && optionsArg.images.length > 0) {
userMessage.images = optionsArg.images;
}
const messages = [
{ role: 'system', content: optionsArg.systemMessage },
...historyMessages,
userMessage,
];
// Build request body - include think parameter if set
const requestBody: Record<string, unknown> = {
model: this.model,
messages: messages,
stream: false,
options: this.defaultOptions,
};
// Add think parameter for reasoning models (GPT-OSS, QwQ, etc.)
if (this.defaultOptions.think !== undefined) {
requestBody.think = this.defaultOptions.think;
}
// Make API call to Ollama with defaultOptions and timeout
const response = await fetch(`${this.baseUrl}/api/chat`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody),
signal: AbortSignal.timeout(this.defaultTimeout),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.statusText}`);
}
const result = await response.json();
return {
role: 'assistant' as const,
message: result.message.content,
reasoning: result.message.thinking || result.message.reasoning,
};
}
/**
* Streaming chat with token callback (implements MultiModalModel interface)
* Calls onToken for each token generated during the response
*/
public async chatStreaming(optionsArg: StreamingChatOptions): Promise<ChatResponse> {
const onToken = optionsArg.onToken;
// Use existing collectStreamResponse with callback, including images
const response = await this.collectStreamResponse(
{
systemMessage: optionsArg.systemMessage,
userMessage: optionsArg.userMessage,
messageHistory: optionsArg.messageHistory,
images: optionsArg.images,
},
(chunk) => {
if (onToken) {
if (chunk.thinking) onToken(chunk.thinking);
if (chunk.content) onToken(chunk.content);
}
}
);
return {
role: 'assistant' as const,
message: response.message,
reasoning: response.thinking,
};
}
/**
* Streaming chat with async iteration and options support
*/
public async chatStreamResponse(
optionsArg: IOllamaChatOptions
): Promise<AsyncIterable<IOllamaStreamChunk>> {
const model = optionsArg.model || this.model;
const timeout = optionsArg.timeout || this.defaultTimeout;
const modelOptions = { ...this.defaultOptions, ...optionsArg.options };
// Format history messages with optional images, reasoning, and tool_calls
const historyMessages = optionsArg.messageHistory.map((msg) => {
const formatted: { role: string; content: string; images?: string[]; reasoning?: string; tool_calls?: any[] } = {
role: msg.role,
content: msg.content,
};
if (msg.images && msg.images.length > 0) {
formatted.images = msg.images;
}
if (msg.reasoning) {
formatted.reasoning = msg.reasoning;
}
// CRITICAL: Include tool_calls in history for native tool calling
// Without this, the model doesn't know it already called a tool and may call it again
if ((msg as any).tool_calls && Array.isArray((msg as any).tool_calls)) {
formatted.tool_calls = (msg as any).tool_calls;
}
return formatted;
});
// Build user message with optional images
const userMessage: { role: string; content: string; images?: string[] } = {
role: 'user',
content: optionsArg.userMessage,
};
if (optionsArg.images && optionsArg.images.length > 0) {
userMessage.images = optionsArg.images;
}
const messages = [
{ role: 'system', content: optionsArg.systemMessage },
...historyMessages,
userMessage,
];
// Build request body with optional tools and think parameters
const requestBody: Record<string, unknown> = {
model,
messages,
stream: true,
options: modelOptions,
};
// Add think parameter for reasoning models (GPT-OSS, QwQ, etc.)
if (modelOptions.think !== undefined) {
requestBody.think = modelOptions.think;
}
// Add tools for native function calling
if (optionsArg.tools && optionsArg.tools.length > 0) {
requestBody.tools = optionsArg.tools;
}
const response = await fetch(`${this.baseUrl}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(requestBody),
signal: AbortSignal.timeout(timeout),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.status}`);
}
const reader = response.body!.getReader();
const decoder = new TextDecoder();
return {
[Symbol.asyncIterator]: async function* () {
let buffer = '';
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
if (!line.trim()) continue;
try {
const json = JSON.parse(line);
// Parse tool_calls from response
let toolCalls: IOllamaToolCall[] | undefined;
if (json.message?.tool_calls && Array.isArray(json.message.tool_calls)) {
toolCalls = json.message.tool_calls.map((tc: any) => ({
function: {
name: tc.function?.name || '',
arguments: typeof tc.function?.arguments === 'string'
? JSON.parse(tc.function.arguments)
: tc.function?.arguments || {},
index: tc.index,
},
}));
}
yield {
content: json.message?.content || '',
thinking: json.message?.thinking,
toolCalls,
done: json.done || false,
stats: json.done ? {
totalDuration: json.total_duration,
evalCount: json.eval_count,
} : undefined,
} as IOllamaStreamChunk;
} catch { /* skip malformed */ }
}
}
} finally {
reader.releaseLock();
}
}
};
}
/**
* Stream and collect full response with optional progress callback
*/
public async collectStreamResponse(
optionsArg: IOllamaChatOptions,
onChunk?: (chunk: IOllamaStreamChunk) => void
): Promise<IOllamaChatResponse> {
const stream = await this.chatStreamResponse(optionsArg);
let content = '';
let thinking = '';
let toolCalls: IOllamaToolCall[] = [];
let stats: IOllamaChatResponse['stats'];
for await (const chunk of stream) {
if (chunk.content) content += chunk.content;
if (chunk.thinking) thinking += chunk.thinking;
if (chunk.toolCalls) toolCalls = toolCalls.concat(chunk.toolCalls);
if (chunk.stats) stats = chunk.stats;
if (onChunk) onChunk(chunk);
}
return {
role: 'assistant' as const,
message: content,
thinking: thinking || undefined,
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
stats,
};
}
/**
* Non-streaming chat with full options support
*/
public async chatWithOptions(optionsArg: IOllamaChatOptions): Promise<IOllamaChatResponse> {
const model = optionsArg.model || this.model;
const timeout = optionsArg.timeout || this.defaultTimeout;
const modelOptions = { ...this.defaultOptions, ...optionsArg.options };
// Format history messages with optional images, reasoning, tool_calls, and tool role
const historyMessages = optionsArg.messageHistory.map((msg) => {
// Handle tool result messages
if ((msg as any).role === 'tool') {
return {
role: 'tool',
content: msg.content,
tool_name: (msg as any).toolName,
};
}
const formatted: { role: string; content: string; images?: string[]; reasoning?: string; tool_calls?: any[] } = {
role: msg.role,
content: msg.content,
};
if (msg.images && msg.images.length > 0) {
formatted.images = msg.images;
}
if (msg.reasoning) {
formatted.reasoning = msg.reasoning;
}
// CRITICAL: Include tool_calls in history for native tool calling
// Without this, the model doesn't know it already called a tool and may call it again
if ((msg as any).tool_calls && Array.isArray((msg as any).tool_calls)) {
formatted.tool_calls = (msg as any).tool_calls;
}
return formatted;
});
// Build user message with optional images
const userMessage: { role: string; content: string; images?: string[] } = {
role: 'user',
content: optionsArg.userMessage,
};
if (optionsArg.images && optionsArg.images.length > 0) {
userMessage.images = optionsArg.images;
}
const messages = [
{ role: 'system', content: optionsArg.systemMessage },
...historyMessages,
userMessage,
];
// Build request body with optional tools and think parameters
const requestBody: Record<string, unknown> = {
model,
messages,
stream: false,
options: modelOptions,
};
// Add think parameter for reasoning models (GPT-OSS, QwQ, etc.)
if (modelOptions.think !== undefined) {
requestBody.think = modelOptions.think;
}
// Add tools for native function calling
if (optionsArg.tools && optionsArg.tools.length > 0) {
requestBody.tools = optionsArg.tools;
}
const response = await fetch(`${this.baseUrl}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(requestBody),
signal: AbortSignal.timeout(timeout),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.statusText}`);
}
const result = await response.json();
// Parse tool_calls from response
let toolCalls: IOllamaToolCall[] | undefined;
if (result.message?.tool_calls && Array.isArray(result.message.tool_calls)) {
toolCalls = result.message.tool_calls.map((tc: any) => ({
function: {
name: tc.function?.name || '',
arguments: typeof tc.function?.arguments === 'string'
? JSON.parse(tc.function.arguments)
: tc.function?.arguments || {},
index: tc.index,
},
}));
}
return {
role: 'assistant' as const,
message: result.message.content || '',
thinking: result.message.thinking,
toolCalls,
stats: {
totalDuration: result.total_duration,
evalCount: result.eval_count,
},
};
}
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
throw new Error('Audio generation is not supported by Ollama.');
}
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
const base64Image = optionsArg.image.toString('base64');
const response = await fetch(`${this.baseUrl}/api/chat`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: this.visionModel,
messages: [{
role: 'user',
content: optionsArg.prompt,
images: [base64Image]
}],
stream: false
}),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.statusText}`);
}
const result = await response.json();
return result.message.content;
}
public async document(optionsArg: {
systemMessage: string;
userMessage: string;
pdfDocuments: Uint8Array[];
messageHistory: ChatMessage[];
}): Promise<{ message: any }> {
// Ensure SmartPdf is initialized before processing documents
await this.ensureSmartpdfReady();
// Convert PDF documents to images using SmartPDF
let documentImageBytesArray: Uint8Array[] = [];
for (const pdfDocument of optionsArg.pdfDocuments) {
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
documentImageBytesArray = documentImageBytesArray.concat(documentImageArray);
}
// Convert images to base64
const base64Images = documentImageBytesArray.map(bytes => Buffer.from(bytes).toString('base64'));
// Send request to Ollama with images
const response = await fetch(`${this.baseUrl}/api/chat`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: this.visionModel,
messages: [
{ role: 'system', content: optionsArg.systemMessage },
...optionsArg.messageHistory,
{
role: 'user',
content: optionsArg.userMessage,
images: base64Images
}
],
stream: false
}),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.statusText}`);
}
const result = await response.json();
return {
message: {
role: 'assistant',
content: result.message.content
}
};
}
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
throw new Error('Research capabilities are not yet supported by Ollama provider.');
}
/**
* Image generation is not supported by Ollama
*/
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
throw new Error('Image generation is not supported by Ollama. Please use OpenAI provider for image generation.');
}
/**
* Image editing is not supported by Ollama
*/
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
throw new Error('Image editing is not supported by Ollama. Please use OpenAI provider for image editing.');
}
}

View File

@@ -1,462 +0,0 @@
import * as plugins from './plugins.js';
import * as paths from './paths.js';
import { Readable } from 'stream';
import { toFile } from 'openai';
// Custom type definition for chat completion messages
export type TChatCompletionRequestMessage = {
role: "system" | "user" | "assistant";
content: string;
};
import { MultiModalModel } from './abstract.classes.multimodal.js';
import type {
ResearchOptions,
ResearchResponse,
ImageGenerateOptions,
ImageEditOptions,
ImageResponse
} from './abstract.classes.multimodal.js';
export interface IOpenaiProviderOptions {
openaiToken: string;
chatModel?: string;
audioModel?: string;
visionModel?: string;
researchModel?: string;
imageModel?: string;
enableWebSearch?: boolean;
}
export class OpenAiProvider extends MultiModalModel {
private options: IOpenaiProviderOptions;
public openAiApiClient: plugins.openai.default;
constructor(optionsArg: IOpenaiProviderOptions) {
super();
this.options = optionsArg;
}
public async start() {
await super.start();
this.openAiApiClient = new plugins.openai.default({
apiKey: this.options.openaiToken,
dangerouslyAllowBrowser: true,
});
}
public async stop() {
await super.stop();
}
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
// Create a TextDecoder to handle incoming chunks
const decoder = new TextDecoder();
let buffer = '';
let currentMessage: {
role: "function" | "user" | "system" | "assistant" | "tool" | "developer";
content: string;
} | null = null;
// Create a TransformStream to process the input
const transform = new TransformStream<Uint8Array, string>({
transform: async (chunk, controller) => {
buffer += decoder.decode(chunk, { stream: true });
// Try to parse complete JSON messages from the buffer
while (true) {
const newlineIndex = buffer.indexOf('\n');
if (newlineIndex === -1) break;
const line = buffer.slice(0, newlineIndex);
buffer = buffer.slice(newlineIndex + 1);
if (line.trim()) {
try {
const message = JSON.parse(line);
currentMessage = {
role: (message.role || 'user') as "function" | "user" | "system" | "assistant" | "tool" | "developer",
content: message.content || '',
};
} catch (e) {
console.error('Failed to parse message:', e);
}
}
}
// If we have a complete message, send it to OpenAI
if (currentMessage) {
const messageToSend = { role: "user" as const, content: currentMessage.content };
const chatModel = this.options.chatModel ?? 'gpt-5-mini';
const requestParams: any = {
model: chatModel,
messages: [messageToSend],
stream: true,
};
// Temperature is omitted since the model does not support it.
const stream = await this.openAiApiClient.chat.completions.create(requestParams);
// Explicitly cast the stream as an async iterable to satisfy TypeScript.
const streamAsyncIterable = stream as unknown as AsyncIterableIterator<any>;
// Process each chunk from OpenAI
for await (const chunk of streamAsyncIterable) {
const content = chunk.choices[0]?.delta?.content;
if (content) {
controller.enqueue(content);
}
}
currentMessage = null;
}
},
flush(controller) {
if (buffer) {
try {
const message = JSON.parse(buffer);
controller.enqueue(message.content || '');
} catch (e) {
console.error('Failed to parse remaining buffer:', e);
}
}
}
});
// Connect the input to our transform stream
return input.pipeThrough(transform);
}
// Implementing the synchronous chat interaction
public async chat(optionsArg: {
systemMessage: string;
userMessage: string;
messageHistory: {
role: 'assistant' | 'user';
content: string;
}[];
}) {
const chatModel = this.options.chatModel ?? 'gpt-5-mini';
const requestParams: any = {
model: chatModel,
messages: [
{ role: 'system', content: optionsArg.systemMessage },
...optionsArg.messageHistory,
{ role: 'user', content: optionsArg.userMessage },
],
};
// Temperature parameter removed to avoid unsupported error.
const result = await this.openAiApiClient.chat.completions.create(requestParams);
return {
role: result.choices[0].message.role as 'assistant',
message: result.choices[0].message.content,
};
}
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
const done = plugins.smartpromise.defer<NodeJS.ReadableStream>();
const result = await this.openAiApiClient.audio.speech.create({
model: this.options.audioModel ?? 'tts-1-hd',
input: optionsArg.message,
voice: 'nova',
response_format: 'mp3',
speed: 1,
});
const stream = result.body;
const nodeStream = Readable.fromWeb(stream as any);
done.resolve(nodeStream);
return done.promise;
}
public async document(optionsArg: {
systemMessage: string;
userMessage: string;
pdfDocuments: Uint8Array[];
messageHistory: {
role: 'assistant' | 'user';
content: any;
}[];
}) {
// Ensure SmartPdf is initialized before processing documents
await this.ensureSmartpdfReady();
let pdfDocumentImageBytesArray: Uint8Array[] = [];
// Convert each PDF into one or more image byte arrays.
for (const pdfDocument of optionsArg.pdfDocuments) {
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
}
console.log(`image smartfile array`);
console.log(pdfDocumentImageBytesArray.map((smartfile) => smartfile.length));
// Filter out any empty buffers to avoid sending invalid image URLs.
const validImageBytesArray = pdfDocumentImageBytesArray.filter(imageBytes => imageBytes && imageBytes.length > 0);
const imageAttachments = validImageBytesArray.map(imageBytes => ({
type: 'image_url',
image_url: {
url: 'data:image/png;base64,' + Buffer.from(imageBytes).toString('base64'),
},
}));
const chatModel = this.options.chatModel ?? 'gpt-5-mini';
const requestParams: any = {
model: chatModel,
messages: [
{ role: 'system', content: optionsArg.systemMessage },
...optionsArg.messageHistory,
{
role: 'user',
content: [
{ type: 'text', text: optionsArg.userMessage },
...imageAttachments,
],
},
],
};
// Temperature parameter removed.
const result = await this.openAiApiClient.chat.completions.create(requestParams);
return {
message: result.choices[0].message,
};
}
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
const visionModel = this.options.visionModel ?? '04-mini';
const requestParams: any = {
model: visionModel,
messages: [
{
role: 'user',
content: [
{ type: 'text', text: optionsArg.prompt },
{
type: 'image_url',
image_url: {
url: `data:image/jpeg;base64,${optionsArg.image.toString('base64')}`
}
}
]
}
],
max_tokens: 300
};
const result = await this.openAiApiClient.chat.completions.create(requestParams);
return result.choices[0].message.content || '';
}
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
// Determine which model to use - Deep Research API requires specific models
let model: string;
if (optionsArg.searchDepth === 'deep') {
model = this.options.researchModel || 'o4-mini-deep-research-2025-06-26';
} else {
// For basic/advanced, still use deep research models if web search is needed
if (optionsArg.includeWebSearch) {
model = this.options.researchModel || 'o4-mini-deep-research-2025-06-26';
} else {
model = this.options.chatModel || 'gpt-5-mini';
}
}
const systemMessage = 'You are a research assistant. Provide comprehensive answers with citations and sources when available.';
// Prepare request parameters using Deep Research API format
const requestParams: any = {
model,
instructions: systemMessage,
input: optionsArg.query
};
// Add web search tool if requested
if (optionsArg.includeWebSearch || optionsArg.searchDepth === 'deep') {
requestParams.tools = [
{
type: 'web_search_preview',
search_context_size: optionsArg.searchDepth === 'deep' ? 'high' :
optionsArg.searchDepth === 'advanced' ? 'medium' : 'low'
}
];
}
// Add background flag for deep research
if (optionsArg.background && optionsArg.searchDepth === 'deep') {
requestParams.background = true;
}
try {
// Execute the research request using Deep Research API
const result = await this.openAiApiClient.responses.create(requestParams);
// Extract the answer from output items
let answer = '';
const sources: Array<{ url: string; title: string; snippet: string }> = [];
const searchQueries: string[] = [];
// Process output items
for (const item of result.output || []) {
// Extract message content
if (item.type === 'message' && 'content' in item) {
const messageItem = item as any;
for (const contentItem of messageItem.content || []) {
if (contentItem.type === 'output_text' && 'text' in contentItem) {
answer += contentItem.text;
}
}
}
// Extract web search queries
if (item.type === 'web_search_call' && 'action' in item) {
const searchItem = item as any;
if (searchItem.action && searchItem.action.type === 'search' && 'query' in searchItem.action) {
searchQueries.push(searchItem.action.query);
}
}
}
// Parse sources from markdown links in the answer
const urlRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
let match: RegExpExecArray | null;
while ((match = urlRegex.exec(answer)) !== null) {
sources.push({
title: match[1],
url: match[2],
snippet: ''
});
}
return {
answer,
sources,
searchQueries: searchQueries.length > 0 ? searchQueries : undefined,
metadata: {
model,
searchDepth: optionsArg.searchDepth || 'basic',
tokensUsed: result.usage?.total_tokens
}
};
} catch (error) {
console.error('Research API error:', error);
throw new Error(`Failed to perform research: ${error.message}`);
}
}
/**
* Image generation using OpenAI's gpt-image-1 or DALL-E models
*/
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
const model = optionsArg.model || this.options.imageModel || 'gpt-image-1';
try {
const requestParams: any = {
model,
prompt: optionsArg.prompt,
n: optionsArg.n || 1,
};
// Add gpt-image-1 specific parameters
if (model === 'gpt-image-1') {
if (optionsArg.quality) requestParams.quality = optionsArg.quality;
if (optionsArg.size) requestParams.size = optionsArg.size;
if (optionsArg.background) requestParams.background = optionsArg.background;
if (optionsArg.outputFormat) requestParams.output_format = optionsArg.outputFormat;
if (optionsArg.outputCompression !== undefined) requestParams.output_compression = optionsArg.outputCompression;
if (optionsArg.moderation) requestParams.moderation = optionsArg.moderation;
if (optionsArg.stream !== undefined) requestParams.stream = optionsArg.stream;
if (optionsArg.partialImages !== undefined) requestParams.partial_images = optionsArg.partialImages;
} else if (model === 'dall-e-3') {
// DALL-E 3 specific parameters
if (optionsArg.quality) requestParams.quality = optionsArg.quality;
if (optionsArg.size) requestParams.size = optionsArg.size;
if (optionsArg.style) requestParams.style = optionsArg.style;
requestParams.response_format = 'b64_json'; // Always use base64 for consistency
} else if (model === 'dall-e-2') {
// DALL-E 2 specific parameters
if (optionsArg.size) requestParams.size = optionsArg.size;
requestParams.response_format = 'b64_json';
}
const result = await this.openAiApiClient.images.generate(requestParams);
const images = (result.data || []).map(img => ({
b64_json: img.b64_json,
url: img.url,
revisedPrompt: img.revised_prompt
}));
return {
images,
metadata: {
model,
quality: result.quality,
size: result.size,
outputFormat: result.output_format,
tokensUsed: result.usage?.total_tokens
}
};
} catch (error) {
console.error('Image generation error:', error);
throw new Error(`Failed to generate image: ${error.message}`);
}
}
/**
* Image editing using OpenAI's gpt-image-1 or DALL-E 2 models
*/
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
const model = optionsArg.model || this.options.imageModel || 'gpt-image-1';
try {
// Convert Buffer to uploadable file format for OpenAI API
const imageFile = await toFile(optionsArg.image, 'image.png', { type: 'image/png' });
const requestParams: any = {
model,
image: imageFile,
prompt: optionsArg.prompt,
n: optionsArg.n || 1,
};
// Add mask if provided (also convert to file format)
if (optionsArg.mask) {
requestParams.mask = await toFile(optionsArg.mask, 'mask.png', { type: 'image/png' });
}
// Add gpt-image-1 specific parameters
if (model === 'gpt-image-1') {
if (optionsArg.quality) requestParams.quality = optionsArg.quality;
if (optionsArg.size) requestParams.size = optionsArg.size;
if (optionsArg.background) requestParams.background = optionsArg.background;
if (optionsArg.outputFormat) requestParams.output_format = optionsArg.outputFormat;
if (optionsArg.outputCompression !== undefined) requestParams.output_compression = optionsArg.outputCompression;
if (optionsArg.stream !== undefined) requestParams.stream = optionsArg.stream;
if (optionsArg.partialImages !== undefined) requestParams.partial_images = optionsArg.partialImages;
} else if (model === 'dall-e-2') {
// DALL-E 2 specific parameters
if (optionsArg.size) requestParams.size = optionsArg.size;
requestParams.response_format = 'b64_json';
}
const result = await this.openAiApiClient.images.edit(requestParams);
const images = (result.data || []).map(img => ({
b64_json: img.b64_json,
url: img.url,
revisedPrompt: img.revised_prompt
}));
return {
images,
metadata: {
model,
quality: result.quality,
size: result.size,
outputFormat: result.output_format,
tokensUsed: result.usage?.total_tokens
}
};
} catch (error) {
console.error('Image edit error:', error);
throw new Error(`Failed to edit image: ${error.message}`);
}
}
}

View File

@@ -1,259 +0,0 @@
import * as plugins from './plugins.js';
import * as paths from './paths.js';
import { MultiModalModel } from './abstract.classes.multimodal.js';
import type {
ChatOptions,
ChatResponse,
ChatMessage,
ResearchOptions,
ResearchResponse,
ImageGenerateOptions,
ImageEditOptions,
ImageResponse
} from './abstract.classes.multimodal.js';
export interface IPerplexityProviderOptions {
perplexityToken: string;
}
export class PerplexityProvider extends MultiModalModel {
private options: IPerplexityProviderOptions;
constructor(optionsArg: IPerplexityProviderOptions) {
super();
this.options = optionsArg;
}
async start() {
// Initialize any necessary clients or resources
}
async stop() {}
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
// Create a TextDecoder to handle incoming chunks
const decoder = new TextDecoder();
let buffer = '';
let currentMessage: { role: string; content: string; } | null = null;
// Create a TransformStream to process the input
const transform = new TransformStream<Uint8Array, string>({
async transform(chunk, controller) {
buffer += decoder.decode(chunk, { stream: true });
// Try to parse complete JSON messages from the buffer
while (true) {
const newlineIndex = buffer.indexOf('\n');
if (newlineIndex === -1) break;
const line = buffer.slice(0, newlineIndex);
buffer = buffer.slice(newlineIndex + 1);
if (line.trim()) {
try {
const message = JSON.parse(line);
currentMessage = {
role: message.role || 'user',
content: message.content || '',
};
} catch (e) {
console.error('Failed to parse message:', e);
}
}
}
// If we have a complete message, send it to Perplexity
if (currentMessage) {
const response = await fetch('https://api.perplexity.ai/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.options.perplexityToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'mixtral-8x7b-instruct',
messages: [{ role: currentMessage.role, content: currentMessage.content }],
stream: true,
}),
});
// Process each chunk from Perplexity
const reader = response.body?.getReader();
if (reader) {
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = new TextDecoder().decode(value);
const lines = chunk.split('\n');
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') break;
try {
const parsed = JSON.parse(data);
const content = parsed.choices[0]?.delta?.content;
if (content) {
controller.enqueue(content);
}
} catch (e) {
console.error('Failed to parse SSE data:', e);
}
}
}
}
} finally {
reader.releaseLock();
}
}
currentMessage = null;
}
},
flush(controller) {
if (buffer) {
try {
const message = JSON.parse(buffer);
controller.enqueue(message.content || '');
} catch (e) {
console.error('Failed to parse remaining buffer:', e);
}
}
}
});
// Connect the input to our transform stream
return input.pipeThrough(transform);
}
// Implementing the synchronous chat interaction
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
// Make API call to Perplexity
const response = await fetch('https://api.perplexity.ai/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.options.perplexityToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'mixtral-8x7b-instruct', // Using Mixtral model
messages: [
{ role: 'system', content: optionsArg.systemMessage },
...optionsArg.messageHistory,
{ role: 'user', content: optionsArg.userMessage }
],
}),
});
if (!response.ok) {
throw new Error(`Perplexity API error: ${response.statusText}`);
}
const result = await response.json();
return {
role: 'assistant' as const,
message: result.choices[0].message.content,
};
}
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
throw new Error('Audio generation is not supported by Perplexity.');
}
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
throw new Error('Vision tasks are not supported by Perplexity.');
}
public async document(optionsArg: {
systemMessage: string;
userMessage: string;
pdfDocuments: Uint8Array[];
messageHistory: ChatMessage[];
}): Promise<{ message: any }> {
throw new Error('Document processing is not supported by Perplexity.');
}
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
// Perplexity has Sonar models that are optimized for search
// sonar models: sonar, sonar-pro
const model = optionsArg.searchDepth === 'deep' ? 'sonar-pro' : 'sonar';
try {
const response = await fetch('https://api.perplexity.ai/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.options.perplexityToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model,
messages: [
{
role: 'system',
content: 'You are a helpful research assistant. Provide accurate information with sources.'
},
{
role: 'user',
content: optionsArg.query
}
],
temperature: 0.7,
max_tokens: 4000
}),
});
if (!response.ok) {
throw new Error(`Perplexity API error: ${response.statusText}`);
}
const result = await response.json();
const answer = result.choices[0].message.content;
// Parse citations from the response
const sources: Array<{ url: string; title: string; snippet: string }> = [];
// Perplexity includes citations in the format [1], [2], etc. with sources listed
// This is a simplified parser - could be enhanced based on actual Perplexity response format
if (result.citations) {
for (const citation of result.citations) {
sources.push({
url: citation.url || '',
title: citation.title || '',
snippet: citation.snippet || ''
});
}
}
return {
answer,
sources,
metadata: {
model,
searchDepth: optionsArg.searchDepth || 'basic'
}
};
} catch (error) {
console.error('Perplexity research error:', error);
throw new Error(`Failed to perform research: ${error.message}`);
}
}
/**
* Image generation is not supported by Perplexity
*/
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
throw new Error('Image generation is not supported by Perplexity. Please use OpenAI provider for image generation.');
}
/**
* Image editing is not supported by Perplexity
*/
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
throw new Error('Image editing is not supported by Perplexity. Please use OpenAI provider for image editing.');
}
}

View File

@@ -1,214 +0,0 @@
import * as plugins from './plugins.js';
import * as paths from './paths.js';
import { MultiModalModel } from './abstract.classes.multimodal.js';
import type {
ChatOptions,
ChatResponse,
ChatMessage,
ResearchOptions,
ResearchResponse,
ImageGenerateOptions,
ImageEditOptions,
ImageResponse
} from './abstract.classes.multimodal.js';
import type { ChatCompletionMessageParam } from 'openai/resources/chat/completions';
export interface IXAIProviderOptions {
xaiToken: string;
}
export class XAIProvider extends MultiModalModel {
private options: IXAIProviderOptions;
public openAiApiClient: plugins.openai.default;
constructor(optionsArg: IXAIProviderOptions) {
super();
this.options = optionsArg;
}
public async start() {
await super.start();
this.openAiApiClient = new plugins.openai.default({
apiKey: this.options.xaiToken,
baseURL: 'https://api.x.ai/v1',
});
}
public async stop() {
await super.stop();
}
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
// Create a TextDecoder to handle incoming chunks
const decoder = new TextDecoder();
let buffer = '';
let currentMessage: { role: string; content: string; } | null = null;
// Create a TransformStream to process the input
const transform = new TransformStream<Uint8Array, string>({
async transform(chunk, controller) {
buffer += decoder.decode(chunk, { stream: true });
// Try to parse complete JSON messages from the buffer
while (true) {
const newlineIndex = buffer.indexOf('\n');
if (newlineIndex === -1) break;
const line = buffer.slice(0, newlineIndex);
buffer = buffer.slice(newlineIndex + 1);
if (line.trim()) {
try {
const message = JSON.parse(line);
currentMessage = {
role: message.role || 'user',
content: message.content || '',
};
} catch (e) {
console.error('Failed to parse message:', e);
}
}
}
// If we have a complete message, send it to X.AI
if (currentMessage) {
const stream = await this.openAiApiClient.chat.completions.create({
model: 'grok-2-latest',
messages: [{ role: currentMessage.role, content: currentMessage.content }],
stream: true,
});
// Process each chunk from X.AI
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content;
if (content) {
controller.enqueue(content);
}
}
currentMessage = null;
}
},
flush(controller) {
if (buffer) {
try {
const message = JSON.parse(buffer);
controller.enqueue(message.content || '');
} catch (e) {
console.error('Failed to parse remaining buffer:', e);
}
}
}
});
// Connect the input to our transform stream
return input.pipeThrough(transform);
}
public async chat(optionsArg: {
systemMessage: string;
userMessage: string;
messageHistory: { role: string; content: string; }[];
}): Promise<{ role: 'assistant'; message: string; }> {
// Prepare messages array with system message, history, and user message
const messages: ChatCompletionMessageParam[] = [
{ role: 'system', content: optionsArg.systemMessage },
...optionsArg.messageHistory.map(msg => ({
role: msg.role as 'system' | 'user' | 'assistant',
content: msg.content
})),
{ role: 'user', content: optionsArg.userMessage }
];
// Call X.AI's chat completion API
const completion = await this.openAiApiClient.chat.completions.create({
model: 'grok-2-latest',
messages: messages,
stream: false,
});
// Return the assistant's response
return {
role: 'assistant',
message: completion.choices[0]?.message?.content || ''
};
}
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
throw new Error('Audio generation is not supported by X.AI');
}
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
throw new Error('Vision tasks are not supported by X.AI');
}
public async document(optionsArg: {
systemMessage: string;
userMessage: string;
pdfDocuments: Uint8Array[];
messageHistory: { role: string; content: string; }[];
}): Promise<{ message: any }> {
// Ensure SmartPdf is initialized before processing documents
await this.ensureSmartpdfReady();
// First convert PDF documents to images
let pdfDocumentImageBytesArray: Uint8Array[] = [];
for (const pdfDocument of optionsArg.pdfDocuments) {
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
}
// Convert images to base64 for inclusion in the message
const imageBase64Array = pdfDocumentImageBytesArray.map(bytes =>
Buffer.from(bytes).toString('base64')
);
// Combine document images into the user message
const enhancedUserMessage = `
${optionsArg.userMessage}
Document contents (as images):
${imageBase64Array.map((img, i) => `Image ${i + 1}: <image data>`).join('\n')}
`;
// Use chat completion to analyze the documents
const messages: ChatCompletionMessageParam[] = [
{ role: 'system', content: optionsArg.systemMessage },
...optionsArg.messageHistory.map(msg => ({
role: msg.role as 'system' | 'user' | 'assistant',
content: msg.content
})),
{ role: 'user', content: enhancedUserMessage }
];
const completion = await this.openAiApiClient.chat.completions.create({
model: 'grok-2-latest',
messages: messages,
stream: false,
});
return {
message: completion.choices[0]?.message?.content || ''
};
}
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
throw new Error('Research capabilities are not yet supported by xAI provider.');
}
/**
* Image generation is not supported by xAI
*/
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
throw new Error('Image generation is not supported by xAI. Please use OpenAI provider for image generation.');
}
/**
* Image editing is not supported by xAI
*/
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
throw new Error('Image editing is not supported by xAI. Please use OpenAI provider for image editing.');
}
}

View File

@@ -0,0 +1,51 @@
import * as plugins from './plugins.js';
import type { ISmartAiOptions, LanguageModelV3 } from './smartai.interfaces.js';
import { createOllamaModel } from './smartai.provider.ollama.js';
import { createAnthropicCachingMiddleware } from './smartai.middleware.anthropic.js';
/**
* Returns a LanguageModelV3 for the given provider and model.
* This is the primary API — consumers use the returned model with AI SDK's
* generateText(), streamText(), etc.
*/
export function getModel(options: ISmartAiOptions): LanguageModelV3 {
switch (options.provider) {
case 'anthropic': {
const p = plugins.createAnthropic({ apiKey: options.apiKey });
const base = p(options.model) as LanguageModelV3;
if (options.promptCaching === false) return base;
return plugins.wrapLanguageModel({
model: base,
middleware: createAnthropicCachingMiddleware(),
}) as unknown as LanguageModelV3;
}
case 'openai': {
const p = plugins.createOpenAI({ apiKey: options.apiKey });
return p(options.model) as LanguageModelV3;
}
case 'google': {
const p = plugins.createGoogleGenerativeAI({ apiKey: options.apiKey });
return p(options.model) as LanguageModelV3;
}
case 'groq': {
const p = plugins.createGroq({ apiKey: options.apiKey });
return p(options.model) as LanguageModelV3;
}
case 'mistral': {
const p = plugins.createMistral({ apiKey: options.apiKey });
return p(options.model) as LanguageModelV3;
}
case 'xai': {
const p = plugins.createXai({ apiKey: options.apiKey });
return p(options.model) as LanguageModelV3;
}
case 'perplexity': {
const p = plugins.createPerplexity({ apiKey: options.apiKey });
return p(options.model) as LanguageModelV3;
}
case 'ollama':
return createOllamaModel(options);
default:
throw new Error(`Unknown provider: ${(options as ISmartAiOptions).provider}`);
}
}

53
ts/smartai.interfaces.ts Normal file
View File

@@ -0,0 +1,53 @@
import type { LanguageModelV3 } from '@ai-sdk/provider';
export type TProvider =
| 'anthropic'
| 'openai'
| 'google'
| 'groq'
| 'mistral'
| 'xai'
| 'perplexity'
| 'ollama';
export interface ISmartAiOptions {
provider: TProvider;
model: string;
apiKey?: string;
/** For Ollama: base URL of the local server. Default: http://localhost:11434 */
baseUrl?: string;
/**
* Ollama-specific model runtime options.
* Only used when provider === 'ollama'.
*/
ollamaOptions?: IOllamaModelOptions;
/**
* Enable Anthropic prompt caching on system + recent messages.
* Only used when provider === 'anthropic'. Default: true.
*/
promptCaching?: boolean;
}
/**
* Ollama model runtime options passed in the request body `options` field.
* @see https://github.com/ollama/ollama/blob/main/docs/modelfile.md
*/
export interface IOllamaModelOptions {
/** Context window size. Default: 2048. */
num_ctx?: number;
/** 0 = deterministic. Default: 0.8. For Qwen models use 0.55. */
temperature?: number;
top_k?: number;
top_p?: number;
repeat_penalty?: number;
num_predict?: number;
stop?: string[];
seed?: number;
/**
* Enable thinking/reasoning mode (Qwen3, QwQ, DeepSeek-R1 etc.).
* The custom Ollama provider handles this directly.
*/
think?: boolean;
}
export type { LanguageModelV3 };

View File

@@ -0,0 +1,38 @@
import type { LanguageModelV3Middleware, LanguageModelV3Prompt } from '@ai-sdk/provider';
/**
* Creates middleware that adds Anthropic prompt caching directives.
* Marks the last system message and last user message with ephemeral cache control,
* reducing input token cost and latency on repeated calls.
*/
export function createAnthropicCachingMiddleware(): LanguageModelV3Middleware {
return {
specificationVersion: 'v3',
transformParams: async ({ params }) => {
const messages = [...params.prompt] as Array<Record<string, unknown>>;
// Find the last system message and last user message
let lastSystemIdx = -1;
let lastUserIdx = -1;
for (let i = 0; i < messages.length; i++) {
if (messages[i].role === 'system') lastSystemIdx = i;
if (messages[i].role === 'user') lastUserIdx = i;
}
const targets = [lastSystemIdx, lastUserIdx].filter(i => i >= 0);
for (const idx of targets) {
const msg = { ...messages[idx] };
msg.providerOptions = {
...(msg.providerOptions as Record<string, unknown> || {}),
anthropic: {
...((msg.providerOptions as Record<string, unknown>)?.anthropic as Record<string, unknown> || {}),
cacheControl: { type: 'ephemeral' },
},
};
messages[idx] = msg;
}
return { ...params, prompt: messages as unknown as LanguageModelV3Prompt };
},
};
}

View File

@@ -0,0 +1,426 @@
import type {
LanguageModelV3,
LanguageModelV3CallOptions,
LanguageModelV3GenerateResult,
LanguageModelV3StreamResult,
LanguageModelV3StreamPart,
LanguageModelV3Prompt,
LanguageModelV3Content,
LanguageModelV3Usage,
LanguageModelV3FinishReason,
} from '@ai-sdk/provider';
import type { ISmartAiOptions, IOllamaModelOptions } from './smartai.interfaces.js';
interface IOllamaMessage {
role: string;
content: string;
images?: string[];
tool_calls?: Array<{
function: { name: string; arguments: Record<string, unknown> };
}>;
thinking?: string;
}
interface IOllamaTool {
type: 'function';
function: {
name: string;
description: string;
parameters: Record<string, unknown>;
};
}
/**
* Convert AI SDK V3 prompt messages to Ollama's message format.
*/
function convertPromptToOllamaMessages(prompt: LanguageModelV3Prompt): IOllamaMessage[] {
const messages: IOllamaMessage[] = [];
for (const msg of prompt) {
if (msg.role === 'system') {
// System message content is a plain string in V3
messages.push({ role: 'system', content: msg.content });
} else if (msg.role === 'user') {
let text = '';
const images: string[] = [];
for (const part of msg.content) {
if (part.type === 'text') {
text += part.text;
} else if (part.type === 'file' && part.mediaType?.startsWith('image/')) {
// Handle image files — Ollama expects base64 images
if (typeof part.data === 'string') {
images.push(part.data);
} else if (part.data instanceof Uint8Array) {
images.push(Buffer.from(part.data).toString('base64'));
}
}
}
const m: IOllamaMessage = { role: 'user', content: text };
if (images.length > 0) m.images = images;
messages.push(m);
} else if (msg.role === 'assistant') {
let text = '';
let thinking = '';
const toolCalls: IOllamaMessage['tool_calls'] = [];
for (const part of msg.content) {
if (part.type === 'text') {
text += part.text;
} else if (part.type === 'reasoning') {
thinking += part.text;
} else if (part.type === 'tool-call') {
const args = typeof part.input === 'string'
? JSON.parse(part.input as string)
: (part.input as Record<string, unknown>);
toolCalls.push({
function: {
name: part.toolName,
arguments: args,
},
});
}
}
const m: IOllamaMessage = { role: 'assistant', content: text };
if (toolCalls.length > 0) m.tool_calls = toolCalls;
if (thinking) m.thinking = thinking;
messages.push(m);
} else if (msg.role === 'tool') {
for (const part of msg.content) {
if (part.type === 'tool-result') {
let resultContent = '';
if (part.output) {
if (part.output.type === 'text') {
resultContent = part.output.value;
} else if (part.output.type === 'json') {
resultContent = JSON.stringify(part.output.value);
}
}
messages.push({ role: 'tool', content: resultContent });
}
}
}
}
return messages;
}
/**
* Convert AI SDK V3 tools to Ollama's tool format.
*/
function convertToolsToOllamaTools(tools: LanguageModelV3CallOptions['tools']): IOllamaTool[] | undefined {
if (!tools || tools.length === 0) return undefined;
return tools
.filter((t): t is Extract<typeof t, { type: 'function' }> => t.type === 'function')
.map(t => ({
type: 'function' as const,
function: {
name: t.name,
description: t.description ?? '',
parameters: t.inputSchema as Record<string, unknown>,
},
}));
}
function makeUsage(promptTokens?: number, completionTokens?: number): LanguageModelV3Usage {
return {
inputTokens: {
total: promptTokens,
noCache: undefined,
cacheRead: undefined,
cacheWrite: undefined,
},
outputTokens: {
total: completionTokens,
text: completionTokens,
reasoning: undefined,
},
};
}
function makeFinishReason(reason?: string): LanguageModelV3FinishReason {
if (reason === 'tool_calls' || reason === 'tool-calls') {
return { unified: 'tool-calls', raw: reason };
}
return { unified: 'stop', raw: reason ?? 'stop' };
}
let idCounter = 0;
function generateId(): string {
return `ollama-${Date.now()}-${idCounter++}`;
}
/**
* Custom LanguageModelV3 implementation for Ollama.
* Calls Ollama's native /api/chat endpoint directly to support
* think, num_ctx, temperature, and other model options.
*/
export function createOllamaModel(options: ISmartAiOptions): LanguageModelV3 {
const baseUrl = options.baseUrl ?? 'http://localhost:11434';
const modelId = options.model;
const ollamaOpts: IOllamaModelOptions = { ...options.ollamaOptions };
// Apply default temperature of 0.55 for Qwen models
if (modelId.toLowerCase().includes('qwen') && ollamaOpts.temperature === undefined) {
ollamaOpts.temperature = 0.55;
}
const model: LanguageModelV3 = {
specificationVersion: 'v3',
provider: 'ollama',
modelId,
supportedUrls: {},
async doGenerate(callOptions: LanguageModelV3CallOptions): Promise<LanguageModelV3GenerateResult> {
const messages = convertPromptToOllamaMessages(callOptions.prompt);
const tools = convertToolsToOllamaTools(callOptions.tools);
const ollamaModelOptions: Record<string, unknown> = { ...ollamaOpts };
// Override with call-level options if provided
if (callOptions.temperature !== undefined) ollamaModelOptions.temperature = callOptions.temperature;
if (callOptions.topP !== undefined) ollamaModelOptions.top_p = callOptions.topP;
if (callOptions.topK !== undefined) ollamaModelOptions.top_k = callOptions.topK;
if (callOptions.maxOutputTokens !== undefined) ollamaModelOptions.num_predict = callOptions.maxOutputTokens;
if (callOptions.seed !== undefined) ollamaModelOptions.seed = callOptions.seed;
if (callOptions.stopSequences) ollamaModelOptions.stop = callOptions.stopSequences;
// Remove think from options — it goes at the top level
const { think, ...modelOpts } = ollamaModelOptions;
const requestBody: Record<string, unknown> = {
model: modelId,
messages,
stream: false,
options: modelOpts,
};
// Add think parameter at the top level (Ollama API requirement)
if (ollamaOpts.think !== undefined) {
requestBody.think = ollamaOpts.think;
}
if (tools) requestBody.tools = tools;
const response = await fetch(`${baseUrl}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(requestBody),
signal: callOptions.abortSignal,
});
if (!response.ok) {
const body = await response.text();
throw new Error(`Ollama API error ${response.status}: ${body}`);
}
const result = await response.json() as Record<string, unknown>;
const message = result.message as Record<string, unknown>;
// Build content array
const content: LanguageModelV3Content[] = [];
// Add reasoning if present
if (message.thinking && typeof message.thinking === 'string') {
content.push({ type: 'reasoning', text: message.thinking });
}
// Add text content
if (message.content && typeof message.content === 'string') {
content.push({ type: 'text', text: message.content });
}
// Add tool calls if present
if (Array.isArray(message.tool_calls)) {
for (const tc of message.tool_calls as Array<Record<string, unknown>>) {
const fn = tc.function as Record<string, unknown>;
content.push({
type: 'tool-call',
toolCallId: generateId(),
toolName: fn.name as string,
input: JSON.stringify(fn.arguments),
});
}
}
const finishReason = Array.isArray(message.tool_calls) && (message.tool_calls as unknown[]).length > 0
? makeFinishReason('tool_calls')
: makeFinishReason('stop');
return {
content,
finishReason,
usage: makeUsage(
(result.prompt_eval_count as number) ?? undefined,
(result.eval_count as number) ?? undefined,
),
warnings: [],
request: { body: requestBody },
};
},
async doStream(callOptions: LanguageModelV3CallOptions): Promise<LanguageModelV3StreamResult> {
const messages = convertPromptToOllamaMessages(callOptions.prompt);
const tools = convertToolsToOllamaTools(callOptions.tools);
const ollamaModelOptions: Record<string, unknown> = { ...ollamaOpts };
if (callOptions.temperature !== undefined) ollamaModelOptions.temperature = callOptions.temperature;
if (callOptions.topP !== undefined) ollamaModelOptions.top_p = callOptions.topP;
if (callOptions.topK !== undefined) ollamaModelOptions.top_k = callOptions.topK;
if (callOptions.maxOutputTokens !== undefined) ollamaModelOptions.num_predict = callOptions.maxOutputTokens;
if (callOptions.seed !== undefined) ollamaModelOptions.seed = callOptions.seed;
if (callOptions.stopSequences) ollamaModelOptions.stop = callOptions.stopSequences;
const { think, ...modelOpts } = ollamaModelOptions;
const requestBody: Record<string, unknown> = {
model: modelId,
messages,
stream: true,
options: modelOpts,
};
if (ollamaOpts.think !== undefined) {
requestBody.think = ollamaOpts.think;
}
if (tools) requestBody.tools = tools;
const response = await fetch(`${baseUrl}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(requestBody),
signal: callOptions.abortSignal,
});
if (!response.ok) {
const body = await response.text();
throw new Error(`Ollama API error ${response.status}: ${body}`);
}
const reader = response.body!.getReader();
const decoder = new TextDecoder();
const textId = generateId();
const reasoningId = generateId();
let textStarted = false;
let reasoningStarted = false;
let hasToolCalls = false;
let closed = false;
const stream = new ReadableStream<LanguageModelV3StreamPart>({
async pull(controller) {
if (closed) return;
const processLine = (line: string) => {
if (!line.trim()) return;
let json: Record<string, unknown>;
try {
json = JSON.parse(line);
} catch {
return;
}
const msg = json.message as Record<string, unknown> | undefined;
// Handle thinking/reasoning content
if (msg?.thinking && typeof msg.thinking === 'string') {
if (!reasoningStarted) {
reasoningStarted = true;
controller.enqueue({ type: 'reasoning-start', id: reasoningId });
}
controller.enqueue({ type: 'reasoning-delta', id: reasoningId, delta: msg.thinking });
}
// Handle text content
if (msg?.content && typeof msg.content === 'string') {
if (reasoningStarted && !textStarted) {
controller.enqueue({ type: 'reasoning-end', id: reasoningId });
}
if (!textStarted) {
textStarted = true;
controller.enqueue({ type: 'text-start', id: textId });
}
controller.enqueue({ type: 'text-delta', id: textId, delta: msg.content });
}
// Handle tool calls
if (Array.isArray(msg?.tool_calls)) {
hasToolCalls = true;
for (const tc of msg!.tool_calls as Array<Record<string, unknown>>) {
const fn = tc.function as Record<string, unknown>;
const callId = generateId();
controller.enqueue({
type: 'tool-call',
toolCallId: callId,
toolName: fn.name as string,
input: JSON.stringify(fn.arguments),
});
}
}
// Handle done
if (json.done) {
if (reasoningStarted && !textStarted) {
controller.enqueue({ type: 'reasoning-end', id: reasoningId });
}
if (textStarted) {
controller.enqueue({ type: 'text-end', id: textId });
}
controller.enqueue({
type: 'finish',
finishReason: hasToolCalls
? makeFinishReason('tool_calls')
: makeFinishReason('stop'),
usage: makeUsage(
(json.prompt_eval_count as number) ?? undefined,
(json.eval_count as number) ?? undefined,
),
});
closed = true;
controller.close();
}
};
try {
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) {
if (buffer.trim()) processLine(buffer);
if (!closed) {
controller.enqueue({
type: 'finish',
finishReason: makeFinishReason('stop'),
usage: makeUsage(undefined, undefined),
});
closed = true;
controller.close();
}
return;
}
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
processLine(line);
if (closed) return;
}
}
} catch (error) {
if (!closed) {
controller.error(error);
closed = true;
}
} finally {
reader.releaseLock();
}
},
});
return {
stream,
request: { body: requestBody },
};
},
};
return model;
}