BREAKING CHANGE(vercel-ai-sdk): migrate to Vercel AI SDK v6 and introduce provider registry (getModel) returning LanguageModelV3
This commit is contained in:
@@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@push.rocks/smartai',
|
||||
version: '0.13.3',
|
||||
description: 'SmartAi is a versatile TypeScript library designed to facilitate integration and interaction with various AI models, offering functionalities for chat, audio generation, document processing, and vision tasks.'
|
||||
version: '2.0.0',
|
||||
description: 'Provider registry and capability utilities for ai-sdk (Vercel AI SDK). Core export returns LanguageModel; subpath exports provide vision, audio, image, document and research capabilities.'
|
||||
}
|
||||
|
||||
@@ -1,240 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
|
||||
/**
|
||||
* Message format for chat interactions
|
||||
*/
|
||||
export interface ChatMessage {
|
||||
role: 'assistant' | 'user' | 'system';
|
||||
content: string;
|
||||
/** Base64-encoded images for vision-capable models */
|
||||
images?: string[];
|
||||
/** Chain-of-thought reasoning for GPT-OSS models (e.g., Ollama) */
|
||||
reasoning?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for chat interactions
|
||||
*/
|
||||
export interface ChatOptions {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
messageHistory: ChatMessage[];
|
||||
/** Base64-encoded images for the current message (vision-capable models) */
|
||||
images?: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for streaming chat interactions
|
||||
*/
|
||||
export interface StreamingChatOptions extends ChatOptions {
|
||||
/** Callback fired for each token during generation */
|
||||
onToken?: (token: string) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response format for chat interactions
|
||||
*/
|
||||
export interface ChatResponse {
|
||||
role: 'assistant';
|
||||
message: string;
|
||||
/** Chain-of-thought reasoning from reasoning models */
|
||||
reasoning?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for research interactions
|
||||
*/
|
||||
export interface ResearchOptions {
|
||||
query: string;
|
||||
searchDepth?: 'basic' | 'advanced' | 'deep';
|
||||
maxSources?: number;
|
||||
includeWebSearch?: boolean;
|
||||
background?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response format for research interactions
|
||||
*/
|
||||
export interface ResearchResponse {
|
||||
answer: string;
|
||||
sources: Array<{
|
||||
url: string;
|
||||
title: string;
|
||||
snippet: string;
|
||||
}>;
|
||||
searchQueries?: string[];
|
||||
metadata?: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for image generation
|
||||
*/
|
||||
export interface ImageGenerateOptions {
|
||||
prompt: string;
|
||||
model?: 'gpt-image-1' | 'dall-e-3' | 'dall-e-2';
|
||||
quality?: 'low' | 'medium' | 'high' | 'standard' | 'hd' | 'auto';
|
||||
size?: '256x256' | '512x512' | '1024x1024' | '1536x1024' | '1024x1536' | '1792x1024' | '1024x1792' | 'auto';
|
||||
style?: 'vivid' | 'natural';
|
||||
background?: 'transparent' | 'opaque' | 'auto';
|
||||
outputFormat?: 'png' | 'jpeg' | 'webp';
|
||||
outputCompression?: number; // 0-100 for webp/jpeg
|
||||
moderation?: 'low' | 'auto';
|
||||
n?: number; // Number of images to generate
|
||||
stream?: boolean;
|
||||
partialImages?: number; // 0-3 for streaming
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for image editing
|
||||
*/
|
||||
export interface ImageEditOptions {
|
||||
image: Buffer;
|
||||
prompt: string;
|
||||
mask?: Buffer;
|
||||
model?: 'gpt-image-1' | 'dall-e-2';
|
||||
quality?: 'low' | 'medium' | 'high' | 'standard' | 'auto';
|
||||
size?: '256x256' | '512x512' | '1024x1024' | '1536x1024' | '1024x1536' | 'auto';
|
||||
background?: 'transparent' | 'opaque' | 'auto';
|
||||
outputFormat?: 'png' | 'jpeg' | 'webp';
|
||||
outputCompression?: number;
|
||||
n?: number;
|
||||
stream?: boolean;
|
||||
partialImages?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response format for image operations
|
||||
*/
|
||||
export interface ImageResponse {
|
||||
images: Array<{
|
||||
b64_json?: string;
|
||||
url?: string;
|
||||
revisedPrompt?: string;
|
||||
}>;
|
||||
metadata?: {
|
||||
model: string;
|
||||
quality?: string;
|
||||
size?: string;
|
||||
outputFormat?: string;
|
||||
tokensUsed?: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Abstract base class for multi-modal AI models.
|
||||
* Provides a common interface for different AI providers (OpenAI, Anthropic, Perplexity, Ollama)
|
||||
*/
|
||||
export abstract class MultiModalModel {
|
||||
/**
|
||||
* SmartPdf instance for document processing
|
||||
* Lazy-loaded only when PDF processing is needed to avoid starting browser unnecessarily
|
||||
*/
|
||||
protected smartpdfInstance: plugins.smartpdf.SmartPdf | null = null;
|
||||
|
||||
/**
|
||||
* Ensures SmartPdf instance is initialized and ready
|
||||
* Call this before using smartpdfInstance in document processing methods
|
||||
*/
|
||||
protected async ensureSmartpdfReady(): Promise<void> {
|
||||
if (!this.smartpdfInstance) {
|
||||
this.smartpdfInstance = new plugins.smartpdf.SmartPdf();
|
||||
await this.smartpdfInstance.start();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the model and any necessary resources
|
||||
* Should be called before using any other methods
|
||||
*/
|
||||
public async start(): Promise<void> {
|
||||
// SmartPdf is now lazy-loaded only when needed for PDF processing
|
||||
// This avoids starting a browser unless document() method is actually used
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans up any resources used by the model
|
||||
* Should be called when the model is no longer needed
|
||||
*/
|
||||
public async stop(): Promise<void> {
|
||||
if (this.smartpdfInstance) {
|
||||
await this.smartpdfInstance.stop();
|
||||
this.smartpdfInstance = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchronous chat interaction with the model
|
||||
* @param optionsArg Options containing system message, user message, and message history
|
||||
* @returns Promise resolving to the assistant's response
|
||||
*/
|
||||
public abstract chat(optionsArg: ChatOptions): Promise<ChatResponse>;
|
||||
|
||||
/**
|
||||
* Streaming interface for chat interactions
|
||||
* Allows for real-time responses from the model
|
||||
* @param input Stream of user messages
|
||||
* @returns Stream of model responses
|
||||
*/
|
||||
public abstract chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>>;
|
||||
|
||||
/**
|
||||
* Streaming chat with token callback
|
||||
* Calls onToken for each token generated, returns final response
|
||||
* @param optionsArg Options containing system message, user message, message history, and onToken callback
|
||||
* @returns Promise resolving to the assistant's response
|
||||
*/
|
||||
public chatStreaming?(optionsArg: StreamingChatOptions): Promise<ChatResponse>;
|
||||
|
||||
/**
|
||||
* Text-to-speech conversion
|
||||
* @param optionsArg Options containing the message to convert to speech
|
||||
* @returns Promise resolving to a readable stream of audio data
|
||||
* @throws Error if the provider doesn't support audio generation
|
||||
*/
|
||||
public abstract audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream>;
|
||||
|
||||
/**
|
||||
* Vision-language processing
|
||||
* @param optionsArg Options containing the image and prompt for analysis
|
||||
* @returns Promise resolving to the model's description or analysis of the image
|
||||
* @throws Error if the provider doesn't support vision tasks
|
||||
*/
|
||||
public abstract vision(optionsArg: { image: Buffer; prompt: string }): Promise<string>;
|
||||
|
||||
/**
|
||||
* Document analysis and processing
|
||||
* @param optionsArg Options containing system message, user message, PDF documents, and message history
|
||||
* @returns Promise resolving to the model's analysis of the documents
|
||||
* @throws Error if the provider doesn't support document processing
|
||||
*/
|
||||
public abstract document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: ChatMessage[];
|
||||
}): Promise<{ message: any }>;
|
||||
|
||||
/**
|
||||
* Research and web search capabilities
|
||||
* @param optionsArg Options containing the research query and configuration
|
||||
* @returns Promise resolving to the research results with sources
|
||||
* @throws Error if the provider doesn't support research capabilities
|
||||
*/
|
||||
public abstract research(optionsArg: ResearchOptions): Promise<ResearchResponse>;
|
||||
|
||||
/**
|
||||
* Image generation from text prompts
|
||||
* @param optionsArg Options containing the prompt and generation parameters
|
||||
* @returns Promise resolving to the generated image(s)
|
||||
* @throws Error if the provider doesn't support image generation
|
||||
*/
|
||||
public abstract imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse>;
|
||||
|
||||
/**
|
||||
* Image editing and inpainting
|
||||
* @param optionsArg Options containing the image, prompt, and editing parameters
|
||||
* @returns Promise resolving to the edited image(s)
|
||||
* @throws Error if the provider doesn't support image editing
|
||||
*/
|
||||
public abstract imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse>;
|
||||
}
|
||||
@@ -1,176 +0,0 @@
|
||||
import type { SmartAi } from "./classes.smartai.js";
|
||||
import { OpenAiProvider } from "./provider.openai.js";
|
||||
|
||||
type TProcessFunction = (input: string) => Promise<string>;
|
||||
|
||||
export interface IConversationOptions {
|
||||
processFunction: TProcessFunction;
|
||||
}
|
||||
|
||||
/**
|
||||
* a conversation
|
||||
*/
|
||||
export class Conversation {
|
||||
// STATIC
|
||||
public static async createWithOpenAi(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.openaiProvider) {
|
||||
throw new Error('OpenAI provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
public static async createWithAnthropic(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.anthropicProvider) {
|
||||
throw new Error('Anthropic provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
public static async createWithPerplexity(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.perplexityProvider) {
|
||||
throw new Error('Perplexity provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
public static async createWithExo(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.exoProvider) {
|
||||
throw new Error('Exo provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
public static async createWithOllama(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.ollamaProvider) {
|
||||
throw new Error('Ollama provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
public static async createWithGroq(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.groqProvider) {
|
||||
throw new Error('Groq provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
public static async createWithMistral(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.mistralProvider) {
|
||||
throw new Error('Mistral provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
public static async createWithXai(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.xaiProvider) {
|
||||
throw new Error('XAI provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
public static async createWithElevenlabs(smartaiRefArg: SmartAi) {
|
||||
if (!smartaiRefArg.elevenlabsProvider) {
|
||||
throw new Error('ElevenLabs provider not available');
|
||||
}
|
||||
const conversation = new Conversation(smartaiRefArg, {
|
||||
processFunction: async (input) => {
|
||||
return '' // TODO implement proper streaming
|
||||
}
|
||||
});
|
||||
return conversation;
|
||||
}
|
||||
|
||||
// INSTANCE
|
||||
smartaiRef: SmartAi
|
||||
private systemMessage: string;
|
||||
private processFunction: TProcessFunction;
|
||||
private inputStreamWriter: WritableStreamDefaultWriter<string> | null = null;
|
||||
private outputStreamController: ReadableStreamDefaultController<string> | null = null;
|
||||
|
||||
constructor(smartairefArg: SmartAi, options: IConversationOptions) {
|
||||
this.processFunction = options.processFunction;
|
||||
}
|
||||
|
||||
public async setSystemMessage(systemMessageArg: string) {
|
||||
this.systemMessage = systemMessageArg;
|
||||
}
|
||||
|
||||
private setupOutputStream(): ReadableStream<string> {
|
||||
return new ReadableStream<string>({
|
||||
start: (controller) => {
|
||||
this.outputStreamController = controller;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private setupInputStream(): WritableStream<string> {
|
||||
const writableStream = new WritableStream<string>({
|
||||
write: async (chunk) => {
|
||||
const processedData = await this.processFunction(chunk);
|
||||
if (this.outputStreamController) {
|
||||
this.outputStreamController.enqueue(processedData);
|
||||
}
|
||||
},
|
||||
close: () => {
|
||||
this.outputStreamController?.close();
|
||||
},
|
||||
abort: (err) => {
|
||||
console.error('Stream aborted', err);
|
||||
this.outputStreamController?.error(err);
|
||||
}
|
||||
});
|
||||
return writableStream;
|
||||
}
|
||||
|
||||
public getInputStreamWriter(): WritableStreamDefaultWriter<string> {
|
||||
if (!this.inputStreamWriter) {
|
||||
const inputStream = this.setupInputStream();
|
||||
this.inputStreamWriter = inputStream.getWriter();
|
||||
}
|
||||
return this.inputStreamWriter;
|
||||
}
|
||||
|
||||
public getOutputStream(): ReadableStream<string> {
|
||||
return this.setupOutputStream();
|
||||
}
|
||||
}
|
||||
@@ -1,187 +0,0 @@
|
||||
import { Conversation } from './classes.conversation.js';
|
||||
import * as plugins from './plugins.js';
|
||||
import { AnthropicProvider } from './provider.anthropic.js';
|
||||
import { ElevenLabsProvider } from './provider.elevenlabs.js';
|
||||
import { MistralProvider } from './provider.mistral.js';
|
||||
import { OllamaProvider, type IOllamaModelOptions } from './provider.ollama.js';
|
||||
import { OpenAiProvider } from './provider.openai.js';
|
||||
import { PerplexityProvider } from './provider.perplexity.js';
|
||||
import { ExoProvider } from './provider.exo.js';
|
||||
import { GroqProvider } from './provider.groq.js';
|
||||
import { XAIProvider } from './provider.xai.js';
|
||||
|
||||
|
||||
export interface ISmartAiOptions {
|
||||
openaiToken?: string;
|
||||
anthropicToken?: string;
|
||||
perplexityToken?: string;
|
||||
groqToken?: string;
|
||||
mistralToken?: string;
|
||||
xaiToken?: string;
|
||||
elevenlabsToken?: string;
|
||||
exo?: {
|
||||
baseUrl?: string;
|
||||
apiKey?: string;
|
||||
};
|
||||
mistral?: {
|
||||
chatModel?: string;
|
||||
ocrModel?: string;
|
||||
tableFormat?: 'markdown' | 'html';
|
||||
};
|
||||
ollama?: {
|
||||
baseUrl?: string;
|
||||
model?: string;
|
||||
visionModel?: string;
|
||||
defaultOptions?: IOllamaModelOptions;
|
||||
defaultTimeout?: number;
|
||||
};
|
||||
elevenlabs?: {
|
||||
defaultVoiceId?: string;
|
||||
defaultModelId?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export type TProvider = 'openai' | 'anthropic' | 'perplexity' | 'ollama' | 'exo' | 'groq' | 'mistral' | 'xai' | 'elevenlabs';
|
||||
|
||||
export class SmartAi {
|
||||
public options: ISmartAiOptions;
|
||||
|
||||
public openaiProvider: OpenAiProvider;
|
||||
public anthropicProvider: AnthropicProvider;
|
||||
public perplexityProvider: PerplexityProvider;
|
||||
public ollamaProvider: OllamaProvider;
|
||||
public exoProvider: ExoProvider;
|
||||
public groqProvider: GroqProvider;
|
||||
public mistralProvider: MistralProvider;
|
||||
public xaiProvider: XAIProvider;
|
||||
public elevenlabsProvider: ElevenLabsProvider;
|
||||
|
||||
constructor(optionsArg: ISmartAiOptions) {
|
||||
this.options = optionsArg;
|
||||
}
|
||||
|
||||
public async start() {
|
||||
if (this.options.openaiToken) {
|
||||
this.openaiProvider = new OpenAiProvider({
|
||||
openaiToken: this.options.openaiToken,
|
||||
});
|
||||
await this.openaiProvider.start();
|
||||
}
|
||||
if (this.options.anthropicToken) {
|
||||
this.anthropicProvider = new AnthropicProvider({
|
||||
anthropicToken: this.options.anthropicToken,
|
||||
});
|
||||
await this.anthropicProvider.start();
|
||||
}
|
||||
if (this.options.perplexityToken) {
|
||||
this.perplexityProvider = new PerplexityProvider({
|
||||
perplexityToken: this.options.perplexityToken,
|
||||
});
|
||||
await this.perplexityProvider.start();
|
||||
}
|
||||
if (this.options.groqToken) {
|
||||
this.groqProvider = new GroqProvider({
|
||||
groqToken: this.options.groqToken,
|
||||
});
|
||||
await this.groqProvider.start();
|
||||
}
|
||||
if (this.options.mistralToken) {
|
||||
this.mistralProvider = new MistralProvider({
|
||||
mistralToken: this.options.mistralToken,
|
||||
chatModel: this.options.mistral?.chatModel,
|
||||
ocrModel: this.options.mistral?.ocrModel,
|
||||
tableFormat: this.options.mistral?.tableFormat,
|
||||
});
|
||||
await this.mistralProvider.start();
|
||||
}
|
||||
if (this.options.xaiToken) {
|
||||
this.xaiProvider = new XAIProvider({
|
||||
xaiToken: this.options.xaiToken,
|
||||
});
|
||||
await this.xaiProvider.start();
|
||||
}
|
||||
if (this.options.elevenlabsToken) {
|
||||
this.elevenlabsProvider = new ElevenLabsProvider({
|
||||
elevenlabsToken: this.options.elevenlabsToken,
|
||||
defaultVoiceId: this.options.elevenlabs?.defaultVoiceId,
|
||||
defaultModelId: this.options.elevenlabs?.defaultModelId,
|
||||
});
|
||||
await this.elevenlabsProvider.start();
|
||||
}
|
||||
if (this.options.ollama) {
|
||||
this.ollamaProvider = new OllamaProvider({
|
||||
baseUrl: this.options.ollama.baseUrl,
|
||||
model: this.options.ollama.model,
|
||||
visionModel: this.options.ollama.visionModel,
|
||||
defaultOptions: this.options.ollama.defaultOptions,
|
||||
defaultTimeout: this.options.ollama.defaultTimeout,
|
||||
});
|
||||
await this.ollamaProvider.start();
|
||||
}
|
||||
if (this.options.exo) {
|
||||
this.exoProvider = new ExoProvider({
|
||||
exoBaseUrl: this.options.exo.baseUrl,
|
||||
apiKey: this.options.exo.apiKey,
|
||||
});
|
||||
await this.exoProvider.start();
|
||||
}
|
||||
}
|
||||
|
||||
public async stop() {
|
||||
if (this.openaiProvider) {
|
||||
await this.openaiProvider.stop();
|
||||
}
|
||||
if (this.anthropicProvider) {
|
||||
await this.anthropicProvider.stop();
|
||||
}
|
||||
if (this.perplexityProvider) {
|
||||
await this.perplexityProvider.stop();
|
||||
}
|
||||
if (this.groqProvider) {
|
||||
await this.groqProvider.stop();
|
||||
}
|
||||
if (this.mistralProvider) {
|
||||
await this.mistralProvider.stop();
|
||||
}
|
||||
if (this.xaiProvider) {
|
||||
await this.xaiProvider.stop();
|
||||
}
|
||||
if (this.elevenlabsProvider) {
|
||||
await this.elevenlabsProvider.stop();
|
||||
}
|
||||
if (this.ollamaProvider) {
|
||||
await this.ollamaProvider.stop();
|
||||
}
|
||||
if (this.exoProvider) {
|
||||
await this.exoProvider.stop();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* create a new conversation
|
||||
*/
|
||||
createConversation(provider: TProvider) {
|
||||
switch (provider) {
|
||||
case 'exo':
|
||||
return Conversation.createWithExo(this);
|
||||
case 'openai':
|
||||
return Conversation.createWithOpenAi(this);
|
||||
case 'anthropic':
|
||||
return Conversation.createWithAnthropic(this);
|
||||
case 'perplexity':
|
||||
return Conversation.createWithPerplexity(this);
|
||||
case 'ollama':
|
||||
return Conversation.createWithOllama(this);
|
||||
case 'groq':
|
||||
return Conversation.createWithGroq(this);
|
||||
case 'mistral':
|
||||
return Conversation.createWithMistral(this);
|
||||
case 'xai':
|
||||
return Conversation.createWithXai(this);
|
||||
case 'elevenlabs':
|
||||
return Conversation.createWithElevenlabs(this);
|
||||
default:
|
||||
throw new Error('Provider not available');
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
import type { SmartAi } from './classes.smartai.js';
|
||||
import * as plugins from './plugins.js';
|
||||
|
||||
export class TTS {
|
||||
public static async createWithOpenAi(smartaiRef: SmartAi): Promise<TTS> {
|
||||
return new TTS(smartaiRef);
|
||||
}
|
||||
|
||||
// INSTANCE
|
||||
smartaiRef: SmartAi;
|
||||
|
||||
constructor(smartairefArg: SmartAi) {
|
||||
this.smartaiRef = smartairefArg;
|
||||
}
|
||||
}
|
||||
19
ts/index.ts
19
ts/index.ts
@@ -1,11 +1,8 @@
|
||||
export * from './classes.smartai.js';
|
||||
export * from './abstract.classes.multimodal.js';
|
||||
export * from './provider.openai.js';
|
||||
export * from './provider.anthropic.js';
|
||||
export * from './provider.perplexity.js';
|
||||
export * from './provider.groq.js';
|
||||
export * from './provider.mistral.js';
|
||||
export * from './provider.ollama.js';
|
||||
export * from './provider.xai.js';
|
||||
export * from './provider.exo.js';
|
||||
export * from './provider.elevenlabs.js';
|
||||
export { getModel } from './smartai.classes.smartai.js';
|
||||
export type { ISmartAiOptions, TProvider, IOllamaModelOptions, LanguageModelV3 } from './smartai.interfaces.js';
|
||||
export { createAnthropicCachingMiddleware } from './smartai.middleware.anthropic.js';
|
||||
export { createOllamaModel } from './smartai.provider.ollama.js';
|
||||
|
||||
// Re-export commonly used ai-sdk functions for consumer convenience
|
||||
export { generateText, streamText, tool, jsonSchema } from 'ai';
|
||||
export type { ModelMessage, ToolSet, StreamTextResult } from 'ai';
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
|
||||
export const packageDir = plugins.path.join(plugins.smartpath.get.dirnameFromImportMetaUrl(import.meta.url), '../');
|
||||
export const nogitDir = plugins.path.join(packageDir, './.nogit');
|
||||
@@ -1,38 +1,22 @@
|
||||
// node native
|
||||
import * as path from 'path';
|
||||
// ai sdk core
|
||||
import { generateText, streamText, wrapLanguageModel, tool, jsonSchema } from 'ai';
|
||||
export { generateText, streamText, wrapLanguageModel, tool, jsonSchema };
|
||||
|
||||
// ai sdk providers
|
||||
import { createAnthropic } from '@ai-sdk/anthropic';
|
||||
import { createOpenAI } from '@ai-sdk/openai';
|
||||
import { createGoogleGenerativeAI } from '@ai-sdk/google';
|
||||
import { createGroq } from '@ai-sdk/groq';
|
||||
import { createMistral } from '@ai-sdk/mistral';
|
||||
import { createXai } from '@ai-sdk/xai';
|
||||
import { createPerplexity } from '@ai-sdk/perplexity';
|
||||
|
||||
export {
|
||||
path,
|
||||
}
|
||||
|
||||
// @push.rocks scope
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import * as smartarray from '@push.rocks/smartarray';
|
||||
import * as smartfs from '@push.rocks/smartfs';
|
||||
import * as smartpath from '@push.rocks/smartpath';
|
||||
import * as smartpdf from '@push.rocks/smartpdf';
|
||||
import * as smartpromise from '@push.rocks/smartpromise';
|
||||
import * as smartrequest from '@push.rocks/smartrequest';
|
||||
import * as webstream from '@push.rocks/webstream';
|
||||
|
||||
export {
|
||||
smartarray,
|
||||
qenv,
|
||||
smartfs,
|
||||
smartpath,
|
||||
smartpdf,
|
||||
smartpromise,
|
||||
smartrequest,
|
||||
webstream,
|
||||
}
|
||||
|
||||
// third party
|
||||
import * as anthropic from '@anthropic-ai/sdk';
|
||||
import * as mistralai from '@mistralai/mistralai';
|
||||
import * as openai from 'openai';
|
||||
|
||||
export {
|
||||
anthropic,
|
||||
mistralai,
|
||||
openai,
|
||||
}
|
||||
createAnthropic,
|
||||
createOpenAI,
|
||||
createGoogleGenerativeAI,
|
||||
createGroq,
|
||||
createMistral,
|
||||
createXai,
|
||||
createPerplexity,
|
||||
};
|
||||
|
||||
@@ -1,446 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ChatOptions,
|
||||
ChatResponse,
|
||||
ChatMessage,
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
import type { ImageBlockParam, TextBlockParam } from '@anthropic-ai/sdk/resources/messages';
|
||||
|
||||
type ContentBlock = ImageBlockParam | TextBlockParam;
|
||||
|
||||
export interface IAnthropicProviderOptions {
|
||||
anthropicToken: string;
|
||||
enableWebSearch?: boolean;
|
||||
searchDomainAllowList?: string[];
|
||||
searchDomainBlockList?: string[];
|
||||
extendedThinking?: 'quick' | 'normal' | 'deep' | 'off';
|
||||
}
|
||||
|
||||
export class AnthropicProvider extends MultiModalModel {
|
||||
private options: IAnthropicProviderOptions;
|
||||
public anthropicApiClient: plugins.anthropic.default;
|
||||
|
||||
constructor(optionsArg: IAnthropicProviderOptions) {
|
||||
super();
|
||||
this.options = optionsArg // Ensure the token is stored
|
||||
}
|
||||
|
||||
async start() {
|
||||
await super.start();
|
||||
this.anthropicApiClient = new plugins.anthropic.default({
|
||||
apiKey: this.options.anthropicToken,
|
||||
});
|
||||
}
|
||||
|
||||
async stop() {
|
||||
await super.stop();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the thinking configuration based on provider options.
|
||||
* Defaults to 'normal' mode (8000 tokens) if not specified.
|
||||
*/
|
||||
private getThinkingConfig(): { type: 'enabled'; budget_tokens: number } | undefined {
|
||||
const mode = this.options.extendedThinking ?? 'normal';
|
||||
|
||||
const budgetMap = {
|
||||
quick: 2048,
|
||||
normal: 8000,
|
||||
deep: 16000,
|
||||
off: 0,
|
||||
};
|
||||
|
||||
const budget = budgetMap[mode];
|
||||
|
||||
return budget > 0 ? { type: 'enabled', budget_tokens: budget } : undefined;
|
||||
}
|
||||
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
// Create a TextDecoder to handle incoming chunks
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let currentMessage: { role: string; content: string; } | null = null;
|
||||
|
||||
// Create a TransformStream to process the input
|
||||
const transform = new TransformStream<Uint8Array, string>({
|
||||
async transform(chunk, controller) {
|
||||
buffer += decoder.decode(chunk, { stream: true });
|
||||
|
||||
// Try to parse complete JSON messages from the buffer
|
||||
while (true) {
|
||||
const newlineIndex = buffer.indexOf('\n');
|
||||
if (newlineIndex === -1) break;
|
||||
|
||||
const line = buffer.slice(0, newlineIndex);
|
||||
buffer = buffer.slice(newlineIndex + 1);
|
||||
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(line);
|
||||
currentMessage = {
|
||||
role: message.role || 'user',
|
||||
content: message.content || '',
|
||||
};
|
||||
} catch (e) {
|
||||
console.error('Failed to parse message:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a complete message, send it to Anthropic
|
||||
if (currentMessage) {
|
||||
const thinkingConfig = this.getThinkingConfig();
|
||||
const stream = await this.anthropicApiClient.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
messages: [{ role: currentMessage.role, content: currentMessage.content }],
|
||||
system: '',
|
||||
stream: true,
|
||||
max_tokens: 20000,
|
||||
...(thinkingConfig && { thinking: thinkingConfig }),
|
||||
});
|
||||
|
||||
// Process each chunk from Anthropic
|
||||
for await (const chunk of stream) {
|
||||
const content = chunk.delta?.text;
|
||||
if (content) {
|
||||
controller.enqueue(content);
|
||||
}
|
||||
}
|
||||
|
||||
currentMessage = null;
|
||||
}
|
||||
},
|
||||
|
||||
flush(controller) {
|
||||
if (buffer) {
|
||||
try {
|
||||
const message = JSON.parse(buffer);
|
||||
controller.enqueue(message.content || '');
|
||||
} catch (e) {
|
||||
console.error('Failed to parse remaining buffer:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Connect the input to our transform stream
|
||||
return input.pipeThrough(transform);
|
||||
}
|
||||
|
||||
// Implementing the synchronous chat interaction
|
||||
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
|
||||
// Convert message history to Anthropic format
|
||||
const messages = optionsArg.messageHistory.map(msg => ({
|
||||
role: msg.role === 'assistant' ? 'assistant' as const : 'user' as const,
|
||||
content: msg.content
|
||||
}));
|
||||
|
||||
const thinkingConfig = this.getThinkingConfig();
|
||||
const result = await this.anthropicApiClient.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
system: optionsArg.systemMessage,
|
||||
messages: [
|
||||
...messages,
|
||||
{ role: 'user' as const, content: optionsArg.userMessage }
|
||||
],
|
||||
max_tokens: 20000,
|
||||
...(thinkingConfig && { thinking: thinkingConfig }),
|
||||
});
|
||||
|
||||
// Extract text content from the response
|
||||
let message = '';
|
||||
for (const block of result.content) {
|
||||
if ('text' in block) {
|
||||
message += block.text;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
role: 'assistant' as const,
|
||||
message,
|
||||
};
|
||||
}
|
||||
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
// Anthropic does not provide an audio API, so this method is not implemented.
|
||||
throw new Error('Audio generation is not yet supported by Anthropic.');
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
const base64Image = optionsArg.image.toString('base64');
|
||||
|
||||
const content: ContentBlock[] = [
|
||||
{
|
||||
type: 'text',
|
||||
text: optionsArg.prompt
|
||||
},
|
||||
{
|
||||
type: 'image',
|
||||
source: {
|
||||
type: 'base64',
|
||||
media_type: 'image/jpeg',
|
||||
data: base64Image
|
||||
}
|
||||
}
|
||||
];
|
||||
|
||||
const thinkingConfig = this.getThinkingConfig();
|
||||
const result = await this.anthropicApiClient.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
messages: [{
|
||||
role: 'user',
|
||||
content
|
||||
}],
|
||||
max_tokens: 10000,
|
||||
...(thinkingConfig && { thinking: thinkingConfig }),
|
||||
});
|
||||
|
||||
// Extract text content from the response
|
||||
let message = '';
|
||||
for (const block of result.content) {
|
||||
if ('text' in block) {
|
||||
message += block.text;
|
||||
}
|
||||
}
|
||||
return message;
|
||||
}
|
||||
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: ChatMessage[];
|
||||
}): Promise<{ message: any }> {
|
||||
// Ensure SmartPdf is initialized before processing documents
|
||||
await this.ensureSmartpdfReady();
|
||||
|
||||
// Convert PDF documents to images using SmartPDF
|
||||
let documentImageBytesArray: Uint8Array[] = [];
|
||||
|
||||
for (const pdfDocument of optionsArg.pdfDocuments) {
|
||||
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
|
||||
documentImageBytesArray = documentImageBytesArray.concat(documentImageArray);
|
||||
}
|
||||
|
||||
// Convert message history to Anthropic format
|
||||
const messages = optionsArg.messageHistory.map(msg => ({
|
||||
role: msg.role === 'assistant' ? 'assistant' as const : 'user' as const,
|
||||
content: msg.content
|
||||
}));
|
||||
|
||||
// Create content array with text and images
|
||||
const content: ContentBlock[] = [
|
||||
{
|
||||
type: 'text',
|
||||
text: optionsArg.userMessage
|
||||
}
|
||||
];
|
||||
|
||||
// Add each document page as an image
|
||||
for (const imageBytes of documentImageBytesArray) {
|
||||
content.push({
|
||||
type: 'image',
|
||||
source: {
|
||||
type: 'base64',
|
||||
media_type: 'image/png',
|
||||
data: Buffer.from(imageBytes).toString('base64')
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const thinkingConfig = this.getThinkingConfig();
|
||||
const result = await this.anthropicApiClient.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
system: optionsArg.systemMessage,
|
||||
messages: [
|
||||
...messages,
|
||||
{ role: 'user', content }
|
||||
],
|
||||
max_tokens: 20000,
|
||||
...(thinkingConfig && { thinking: thinkingConfig }),
|
||||
});
|
||||
|
||||
// Extract text content from the response
|
||||
let message = '';
|
||||
for (const block of result.content) {
|
||||
if ('text' in block) {
|
||||
message += block.text;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: message
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
// Prepare the messages for the research request
|
||||
const systemMessage = `You are a research assistant with web search capabilities.
|
||||
Provide comprehensive, well-researched answers with citations and sources.
|
||||
When searching the web, be thorough and cite your sources accurately.`;
|
||||
|
||||
try {
|
||||
// Build the tool configuration for web search
|
||||
const tools: any[] = [];
|
||||
|
||||
if (this.options.enableWebSearch) {
|
||||
const webSearchTool: any = {
|
||||
type: 'web_search_20250305',
|
||||
name: 'web_search'
|
||||
};
|
||||
|
||||
// Add optional parameters
|
||||
if (optionsArg.maxSources) {
|
||||
webSearchTool.max_uses = optionsArg.maxSources;
|
||||
}
|
||||
|
||||
if (this.options.searchDomainAllowList?.length) {
|
||||
webSearchTool.allowed_domains = this.options.searchDomainAllowList;
|
||||
} else if (this.options.searchDomainBlockList?.length) {
|
||||
webSearchTool.blocked_domains = this.options.searchDomainBlockList;
|
||||
}
|
||||
|
||||
tools.push(webSearchTool);
|
||||
}
|
||||
|
||||
// Configure the request based on search depth
|
||||
const maxTokens = optionsArg.searchDepth === 'deep' ? 20000 :
|
||||
optionsArg.searchDepth === 'advanced' ? 20000 : 20000;
|
||||
|
||||
// Add thinking configuration if enabled
|
||||
const thinkingConfig = this.getThinkingConfig();
|
||||
|
||||
// Create the research request
|
||||
// Note: When thinking is enabled, temperature must be 1 (or omitted)
|
||||
const requestParams: any = {
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
system: systemMessage,
|
||||
messages: [
|
||||
{
|
||||
role: 'user' as const,
|
||||
content: optionsArg.query
|
||||
}
|
||||
],
|
||||
max_tokens: maxTokens,
|
||||
// Only set temperature when thinking is NOT enabled
|
||||
...(thinkingConfig ? {} : { temperature: 0.7 })
|
||||
};
|
||||
|
||||
// Add tools if web search is enabled
|
||||
if (tools.length > 0) {
|
||||
requestParams.tools = tools;
|
||||
}
|
||||
|
||||
// Add thinking configuration if enabled
|
||||
if (thinkingConfig) {
|
||||
requestParams.thinking = thinkingConfig;
|
||||
}
|
||||
|
||||
// Execute the research request
|
||||
const result = await this.anthropicApiClient.messages.create(requestParams);
|
||||
|
||||
// Extract the answer from content blocks
|
||||
let answer = '';
|
||||
const sources: Array<{ url: string; title: string; snippet: string }> = [];
|
||||
const searchQueries: string[] = [];
|
||||
|
||||
// Process content blocks
|
||||
for (const block of result.content) {
|
||||
if ('text' in block) {
|
||||
// Accumulate text content
|
||||
answer += block.text;
|
||||
|
||||
// Extract citations if present
|
||||
if ('citations' in block && Array.isArray(block.citations)) {
|
||||
for (const citation of block.citations) {
|
||||
if (citation.type === 'web_search_result_location') {
|
||||
sources.push({
|
||||
title: citation.title || '',
|
||||
url: citation.url || '',
|
||||
snippet: citation.cited_text || ''
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if ('type' in block && block.type === 'server_tool_use') {
|
||||
// Extract search queries from server tool use
|
||||
if (block.name === 'web_search' && block.input && typeof block.input === 'object' && 'query' in block.input) {
|
||||
searchQueries.push((block.input as any).query);
|
||||
}
|
||||
} else if ('type' in block && block.type === 'web_search_tool_result') {
|
||||
// Extract sources from web search results
|
||||
if (Array.isArray(block.content)) {
|
||||
for (const result of block.content) {
|
||||
if (result.type === 'web_search_result') {
|
||||
// Only add if not already in sources (avoid duplicates from citations)
|
||||
if (!sources.some(s => s.url === result.url)) {
|
||||
sources.push({
|
||||
title: result.title || '',
|
||||
url: result.url || '',
|
||||
snippet: '' // Search results don't include snippets, only citations do
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: Parse markdown-style links if no citations found
|
||||
if (sources.length === 0) {
|
||||
const urlRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
|
||||
let match: RegExpExecArray | null;
|
||||
|
||||
while ((match = urlRegex.exec(answer)) !== null) {
|
||||
sources.push({
|
||||
title: match[1],
|
||||
url: match[2],
|
||||
snippet: ''
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Check if web search was used based on usage info
|
||||
const webSearchCount = result.usage?.server_tool_use?.web_search_requests || 0;
|
||||
|
||||
return {
|
||||
answer,
|
||||
sources,
|
||||
searchQueries: searchQueries.length > 0 ? searchQueries : undefined,
|
||||
metadata: {
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
searchDepth: optionsArg.searchDepth || 'basic',
|
||||
tokensUsed: result.usage?.output_tokens,
|
||||
webSearchesPerformed: webSearchCount
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Anthropic research error:', error);
|
||||
throw new Error(`Failed to perform research: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Image generation is not supported by Anthropic
|
||||
*/
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image generation is not supported by Anthropic. Claude can only analyze images, not generate them. Please use OpenAI provider for image generation.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image editing is not supported by Anthropic
|
||||
*/
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image editing is not supported by Anthropic. Claude can only analyze images, not edit them. Please use OpenAI provider for image editing.');
|
||||
}
|
||||
}
|
||||
@@ -1,116 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import { Readable } from 'stream';
|
||||
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ChatOptions,
|
||||
ChatResponse,
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
|
||||
export interface IElevenLabsProviderOptions {
|
||||
elevenlabsToken: string;
|
||||
defaultVoiceId?: string;
|
||||
defaultModelId?: string;
|
||||
}
|
||||
|
||||
export interface IElevenLabsVoiceSettings {
|
||||
stability?: number;
|
||||
similarity_boost?: number;
|
||||
style?: number;
|
||||
use_speaker_boost?: boolean;
|
||||
}
|
||||
|
||||
export class ElevenLabsProvider extends MultiModalModel {
|
||||
private options: IElevenLabsProviderOptions;
|
||||
private baseUrl: string = 'https://api.elevenlabs.io/v1';
|
||||
|
||||
constructor(optionsArg: IElevenLabsProviderOptions) {
|
||||
super();
|
||||
this.options = optionsArg;
|
||||
}
|
||||
|
||||
public async start() {
|
||||
await super.start();
|
||||
}
|
||||
|
||||
public async stop() {
|
||||
await super.stop();
|
||||
}
|
||||
|
||||
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
|
||||
throw new Error('ElevenLabs does not support chat functionality. This provider is specialized for text-to-speech only.');
|
||||
}
|
||||
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
throw new Error('ElevenLabs does not support chat streaming functionality. This provider is specialized for text-to-speech only.');
|
||||
}
|
||||
|
||||
public async audio(optionsArg: {
|
||||
message: string;
|
||||
voiceId?: string;
|
||||
modelId?: string;
|
||||
voiceSettings?: IElevenLabsVoiceSettings;
|
||||
}): Promise<NodeJS.ReadableStream> {
|
||||
// Use Samara voice as default fallback
|
||||
const voiceId = optionsArg.voiceId || this.options.defaultVoiceId || '19STyYD15bswVz51nqLf';
|
||||
|
||||
const modelId = optionsArg.modelId || this.options.defaultModelId || 'eleven_v3';
|
||||
|
||||
const url = `${this.baseUrl}/text-to-speech/${voiceId}`;
|
||||
|
||||
const requestBody: any = {
|
||||
text: optionsArg.message,
|
||||
model_id: modelId,
|
||||
};
|
||||
|
||||
if (optionsArg.voiceSettings) {
|
||||
requestBody.voice_settings = optionsArg.voiceSettings;
|
||||
}
|
||||
|
||||
const response = await plugins.smartrequest.SmartRequest.create()
|
||||
.url(url)
|
||||
.header('xi-api-key', this.options.elevenlabsToken)
|
||||
.json(requestBody)
|
||||
.autoDrain(false)
|
||||
.post();
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`ElevenLabs API error: ${response.status} ${response.statusText} - ${errorText}`);
|
||||
}
|
||||
|
||||
const webStream = response.stream();
|
||||
const nodeStream = Readable.fromWeb(webStream as any);
|
||||
return nodeStream;
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
throw new Error('ElevenLabs does not support vision functionality. This provider is specialized for text-to-speech only.');
|
||||
}
|
||||
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: any[];
|
||||
}): Promise<{ message: any }> {
|
||||
throw new Error('ElevenLabs does not support document processing. This provider is specialized for text-to-speech only.');
|
||||
}
|
||||
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
throw new Error('ElevenLabs does not support research capabilities. This provider is specialized for text-to-speech only.');
|
||||
}
|
||||
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
throw new Error('ElevenLabs does not support image generation. This provider is specialized for text-to-speech only.');
|
||||
}
|
||||
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
throw new Error('ElevenLabs does not support image editing. This provider is specialized for text-to-speech only.');
|
||||
}
|
||||
}
|
||||
@@ -1,155 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ChatOptions,
|
||||
ChatResponse,
|
||||
ChatMessage,
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
import type { ChatCompletionMessageParam } from 'openai/resources/chat/completions';
|
||||
|
||||
export interface IExoProviderOptions {
|
||||
exoBaseUrl?: string;
|
||||
apiKey?: string;
|
||||
}
|
||||
|
||||
export class ExoProvider extends MultiModalModel {
|
||||
private options: IExoProviderOptions;
|
||||
public openAiApiClient: plugins.openai.default;
|
||||
|
||||
constructor(optionsArg: IExoProviderOptions = {}) {
|
||||
super();
|
||||
this.options = {
|
||||
exoBaseUrl: 'http://localhost:8080/v1', // Default Exo API endpoint
|
||||
...optionsArg
|
||||
};
|
||||
}
|
||||
|
||||
public async start() {
|
||||
this.openAiApiClient = new plugins.openai.default({
|
||||
apiKey: this.options.apiKey || 'not-needed', // Exo might not require an API key for local deployment
|
||||
baseURL: this.options.exoBaseUrl,
|
||||
});
|
||||
}
|
||||
|
||||
public async stop() {}
|
||||
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
// Create a TextDecoder to handle incoming chunks
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let currentMessage: { role: string; content: string; } | null = null;
|
||||
|
||||
// Create a TransformStream to process the input
|
||||
const transform = new TransformStream<Uint8Array, string>({
|
||||
transform: async (chunk, controller) => {
|
||||
buffer += decoder.decode(chunk, { stream: true });
|
||||
|
||||
// Try to parse complete JSON messages from the buffer
|
||||
while (true) {
|
||||
const newlineIndex = buffer.indexOf('\n');
|
||||
if (newlineIndex === -1) break;
|
||||
|
||||
const line = buffer.slice(0, newlineIndex);
|
||||
buffer = buffer.slice(newlineIndex + 1);
|
||||
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(line);
|
||||
currentMessage = message;
|
||||
|
||||
// Process the message based on its type
|
||||
if (message.type === 'message') {
|
||||
const response = await this.chat({
|
||||
systemMessage: '',
|
||||
userMessage: message.content,
|
||||
messageHistory: [{ role: message.role as 'user' | 'assistant' | 'system', content: message.content }]
|
||||
});
|
||||
|
||||
controller.enqueue(JSON.stringify(response) + '\n');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error processing message:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
flush(controller) {
|
||||
if (buffer) {
|
||||
try {
|
||||
const message = JSON.parse(buffer);
|
||||
currentMessage = message;
|
||||
} catch (error) {
|
||||
console.error('Error processing remaining buffer:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return input.pipeThrough(transform);
|
||||
}
|
||||
|
||||
public async chat(options: ChatOptions): Promise<ChatResponse> {
|
||||
const messages: ChatCompletionMessageParam[] = [
|
||||
{ role: 'system', content: options.systemMessage },
|
||||
...options.messageHistory,
|
||||
{ role: 'user', content: options.userMessage }
|
||||
];
|
||||
|
||||
try {
|
||||
const response = await this.openAiApiClient.chat.completions.create({
|
||||
model: 'local-model', // Exo uses local models
|
||||
messages: messages,
|
||||
stream: false
|
||||
});
|
||||
|
||||
return {
|
||||
role: 'assistant',
|
||||
message: response.choices[0]?.message?.content || ''
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Error in chat completion:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
throw new Error('Audio generation is not supported by Exo provider');
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
throw new Error('Vision processing is not supported by Exo provider');
|
||||
}
|
||||
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: ChatMessage[];
|
||||
}): Promise<{ message: any }> {
|
||||
throw new Error('Document processing is not supported by Exo provider');
|
||||
}
|
||||
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
throw new Error('Research capabilities are not yet supported by Exo provider.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image generation is not supported by Exo
|
||||
*/
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image generation is not supported by Exo. Please use OpenAI provider for image generation.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image editing is not supported by Exo
|
||||
*/
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image editing is not supported by Exo. Please use OpenAI provider for image editing.');
|
||||
}
|
||||
}
|
||||
@@ -1,219 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ChatOptions,
|
||||
ChatResponse,
|
||||
ChatMessage,
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
|
||||
export interface IGroqProviderOptions {
|
||||
groqToken: string;
|
||||
model?: string;
|
||||
}
|
||||
|
||||
export class GroqProvider extends MultiModalModel {
|
||||
private options: IGroqProviderOptions;
|
||||
private baseUrl = 'https://api.groq.com/v1';
|
||||
|
||||
constructor(optionsArg: IGroqProviderOptions) {
|
||||
super();
|
||||
this.options = {
|
||||
...optionsArg,
|
||||
model: optionsArg.model || 'llama-3.3-70b-versatile', // Default model
|
||||
};
|
||||
}
|
||||
|
||||
async start() {}
|
||||
|
||||
async stop() {}
|
||||
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
// Create a TextDecoder to handle incoming chunks
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let currentMessage: { role: string; content: string; } | null = null;
|
||||
|
||||
// Create a TransformStream to process the input
|
||||
const transform = new TransformStream<Uint8Array, string>({
|
||||
transform: async (chunk, controller) => {
|
||||
buffer += decoder.decode(chunk, { stream: true });
|
||||
|
||||
// Try to parse complete JSON messages from the buffer
|
||||
while (true) {
|
||||
const newlineIndex = buffer.indexOf('\n');
|
||||
if (newlineIndex === -1) break;
|
||||
|
||||
const line = buffer.slice(0, newlineIndex);
|
||||
buffer = buffer.slice(newlineIndex + 1);
|
||||
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(line);
|
||||
currentMessage = {
|
||||
role: message.role || 'user',
|
||||
content: message.content || '',
|
||||
};
|
||||
} catch (e) {
|
||||
console.error('Failed to parse message:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a complete message, send it to Groq
|
||||
if (currentMessage) {
|
||||
const response = await fetch(`${this.baseUrl}/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${this.options.groqToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.options.model,
|
||||
messages: [{ role: currentMessage.role, content: currentMessage.content }],
|
||||
stream: true,
|
||||
}),
|
||||
});
|
||||
|
||||
// Process each chunk from Groq
|
||||
const reader = response.body?.getReader();
|
||||
if (reader) {
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const chunk = new TextDecoder().decode(value);
|
||||
const lines = chunk.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('data: ')) {
|
||||
const data = line.slice(6);
|
||||
if (data === '[DONE]') break;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(data);
|
||||
const content = parsed.choices[0]?.delta?.content;
|
||||
if (content) {
|
||||
controller.enqueue(content);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to parse SSE data:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
}
|
||||
|
||||
currentMessage = null;
|
||||
}
|
||||
},
|
||||
|
||||
flush(controller) {
|
||||
if (buffer) {
|
||||
try {
|
||||
const message = JSON.parse(buffer);
|
||||
controller.enqueue(message.content || '');
|
||||
} catch (e) {
|
||||
console.error('Failed to parse remaining buffer:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Connect the input to our transform stream
|
||||
return input.pipeThrough(transform);
|
||||
}
|
||||
|
||||
// Implementing the synchronous chat interaction
|
||||
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
|
||||
const messages = [
|
||||
// System message
|
||||
{
|
||||
role: 'system',
|
||||
content: optionsArg.systemMessage,
|
||||
},
|
||||
// Message history
|
||||
...optionsArg.messageHistory.map(msg => ({
|
||||
role: msg.role,
|
||||
content: msg.content,
|
||||
})),
|
||||
// User message
|
||||
{
|
||||
role: 'user',
|
||||
content: optionsArg.userMessage,
|
||||
},
|
||||
];
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${this.options.groqToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.options.model,
|
||||
messages,
|
||||
temperature: 0.7,
|
||||
max_completion_tokens: 1024,
|
||||
stream: false,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json();
|
||||
throw new Error(`Groq API error: ${error.message || response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
return {
|
||||
role: 'assistant',
|
||||
message: result.choices[0].message.content,
|
||||
};
|
||||
}
|
||||
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
// Groq does not provide an audio API, so this method is not implemented.
|
||||
throw new Error('Audio generation is not yet supported by Groq.');
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
throw new Error('Vision tasks are not yet supported by Groq.');
|
||||
}
|
||||
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: ChatMessage[];
|
||||
}): Promise<{ message: any }> {
|
||||
throw new Error('Document processing is not yet supported by Groq.');
|
||||
}
|
||||
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
throw new Error('Research capabilities are not yet supported by Groq provider.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image generation is not supported by Groq
|
||||
*/
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image generation is not supported by Groq. Please use OpenAI provider for image generation.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image editing is not supported by Groq
|
||||
*/
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image editing is not supported by Groq. Please use OpenAI provider for image editing.');
|
||||
}
|
||||
}
|
||||
@@ -1,352 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ChatOptions,
|
||||
ChatResponse,
|
||||
ChatMessage,
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
|
||||
export interface IMistralProviderOptions {
|
||||
mistralToken: string;
|
||||
chatModel?: string; // default: 'mistral-large-latest'
|
||||
ocrModel?: string; // default: 'mistral-ocr-latest'
|
||||
tableFormat?: 'markdown' | 'html';
|
||||
}
|
||||
|
||||
export class MistralProvider extends MultiModalModel {
|
||||
private options: IMistralProviderOptions;
|
||||
public mistralClient: plugins.mistralai.Mistral;
|
||||
|
||||
constructor(optionsArg: IMistralProviderOptions) {
|
||||
super();
|
||||
this.options = optionsArg;
|
||||
}
|
||||
|
||||
async start() {
|
||||
await super.start();
|
||||
this.mistralClient = new plugins.mistralai.Mistral({
|
||||
apiKey: this.options.mistralToken,
|
||||
});
|
||||
}
|
||||
|
||||
async stop() {
|
||||
await super.stop();
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchronous chat interaction using Mistral's chat API
|
||||
*/
|
||||
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
|
||||
// Convert message history to Mistral format
|
||||
const messages: Array<{
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}> = [];
|
||||
|
||||
// Add system message first
|
||||
if (optionsArg.systemMessage) {
|
||||
messages.push({
|
||||
role: 'system',
|
||||
content: optionsArg.systemMessage
|
||||
});
|
||||
}
|
||||
|
||||
// Add message history
|
||||
for (const msg of optionsArg.messageHistory) {
|
||||
messages.push({
|
||||
role: msg.role === 'system' ? 'system' : msg.role === 'assistant' ? 'assistant' : 'user',
|
||||
content: msg.content
|
||||
});
|
||||
}
|
||||
|
||||
// Add current user message
|
||||
messages.push({
|
||||
role: 'user',
|
||||
content: optionsArg.userMessage
|
||||
});
|
||||
|
||||
const result = await this.mistralClient.chat.complete({
|
||||
model: this.options.chatModel || 'mistral-large-latest',
|
||||
messages: messages,
|
||||
});
|
||||
|
||||
// Extract content from response
|
||||
const choice = result.choices?.[0];
|
||||
let content = '';
|
||||
|
||||
if (choice?.message?.content) {
|
||||
if (typeof choice.message.content === 'string') {
|
||||
content = choice.message.content;
|
||||
} else if (Array.isArray(choice.message.content)) {
|
||||
// Handle array of content chunks
|
||||
content = choice.message.content
|
||||
.map((chunk: any) => {
|
||||
if (typeof chunk === 'string') return chunk;
|
||||
if (chunk && typeof chunk === 'object' && 'text' in chunk) return chunk.text;
|
||||
return '';
|
||||
})
|
||||
.join('');
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
role: 'assistant',
|
||||
message: content,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Streaming chat using Mistral's streaming API
|
||||
*/
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
const mistralClient = this.mistralClient;
|
||||
const chatModel = this.options.chatModel || 'mistral-large-latest';
|
||||
|
||||
const transform = new TransformStream<Uint8Array, string>({
|
||||
async transform(chunk, controller) {
|
||||
buffer += decoder.decode(chunk, { stream: true });
|
||||
|
||||
// Try to parse complete JSON messages from the buffer
|
||||
while (true) {
|
||||
const newlineIndex = buffer.indexOf('\n');
|
||||
if (newlineIndex === -1) break;
|
||||
|
||||
const line = buffer.slice(0, newlineIndex);
|
||||
buffer = buffer.slice(newlineIndex + 1);
|
||||
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(line);
|
||||
|
||||
// Build messages array
|
||||
const messages: Array<{
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}> = [];
|
||||
|
||||
if (message.systemMessage) {
|
||||
messages.push({
|
||||
role: 'system',
|
||||
content: message.systemMessage
|
||||
});
|
||||
}
|
||||
|
||||
messages.push({
|
||||
role: message.role === 'assistant' ? 'assistant' : 'user',
|
||||
content: message.content
|
||||
});
|
||||
|
||||
// Use Mistral streaming
|
||||
const stream = await mistralClient.chat.stream({
|
||||
model: chatModel,
|
||||
messages: messages,
|
||||
});
|
||||
|
||||
// Process streaming events
|
||||
for await (const event of stream) {
|
||||
const delta = event.data?.choices?.[0]?.delta;
|
||||
if (delta?.content) {
|
||||
if (typeof delta.content === 'string') {
|
||||
controller.enqueue(delta.content);
|
||||
} else if (Array.isArray(delta.content)) {
|
||||
for (const chunk of delta.content) {
|
||||
if (typeof chunk === 'string') {
|
||||
controller.enqueue(chunk);
|
||||
} else if (chunk && typeof chunk === 'object' && 'text' in chunk) {
|
||||
controller.enqueue((chunk as any).text);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to parse message:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
flush(controller) {
|
||||
if (buffer.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(buffer);
|
||||
controller.enqueue(message.content || '');
|
||||
} catch (e) {
|
||||
console.error('Failed to parse remaining buffer:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return input.pipeThrough(transform);
|
||||
}
|
||||
|
||||
/**
|
||||
* Audio generation is not supported by Mistral
|
||||
*/
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
throw new Error('Audio generation is not supported by Mistral. Please use ElevenLabs or OpenAI provider for audio generation.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Vision using Mistral's OCR API for image analysis
|
||||
*/
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
const base64Image = optionsArg.image.toString('base64');
|
||||
|
||||
// Detect image type from buffer header
|
||||
let mimeType = 'image/jpeg';
|
||||
if (optionsArg.image[0] === 0x89 && optionsArg.image[1] === 0x50) {
|
||||
mimeType = 'image/png';
|
||||
} else if (optionsArg.image[0] === 0x47 && optionsArg.image[1] === 0x49) {
|
||||
mimeType = 'image/gif';
|
||||
} else if (optionsArg.image[0] === 0x52 && optionsArg.image[1] === 0x49) {
|
||||
mimeType = 'image/webp';
|
||||
}
|
||||
|
||||
// Use OCR API with image data URL
|
||||
const ocrResult = await this.mistralClient.ocr.process({
|
||||
model: this.options.ocrModel || 'mistral-ocr-latest',
|
||||
document: {
|
||||
imageUrl: `data:${mimeType};base64,${base64Image}`,
|
||||
type: 'image_url',
|
||||
},
|
||||
});
|
||||
|
||||
// Combine markdown from all pages
|
||||
const extractedText = ocrResult.pages.map(page => page.markdown).join('\n\n');
|
||||
|
||||
// If a prompt is provided, use chat to analyze the extracted text
|
||||
if (optionsArg.prompt && optionsArg.prompt.trim()) {
|
||||
const chatResponse = await this.chat({
|
||||
systemMessage: 'You are an assistant analyzing image content. The following is text extracted from an image using OCR.',
|
||||
userMessage: `${optionsArg.prompt}\n\nExtracted content:\n${extractedText}`,
|
||||
messageHistory: [],
|
||||
});
|
||||
return chatResponse.message;
|
||||
}
|
||||
|
||||
return extractedText;
|
||||
}
|
||||
|
||||
/**
|
||||
* Document processing using Mistral's OCR API
|
||||
* PDFs are uploaded via Files API first, then processed with OCR
|
||||
*/
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: ChatMessage[];
|
||||
}): Promise<{ message: any }> {
|
||||
const extractedTexts: string[] = [];
|
||||
const uploadedFileIds: string[] = [];
|
||||
|
||||
try {
|
||||
// Process each PDF document using Mistral OCR
|
||||
for (let i = 0; i < optionsArg.pdfDocuments.length; i++) {
|
||||
const pdfDocument = optionsArg.pdfDocuments[i];
|
||||
|
||||
// Upload the PDF to Mistral's Files API first
|
||||
const uploadResult = await this.mistralClient.files.upload({
|
||||
file: {
|
||||
fileName: `document_${i + 1}.pdf`,
|
||||
content: pdfDocument,
|
||||
},
|
||||
purpose: 'ocr',
|
||||
});
|
||||
|
||||
uploadedFileIds.push(uploadResult.id);
|
||||
|
||||
// Now use OCR with the uploaded file
|
||||
const ocrResult = await this.mistralClient.ocr.process({
|
||||
model: this.options.ocrModel || 'mistral-ocr-latest',
|
||||
document: {
|
||||
type: 'file',
|
||||
fileId: uploadResult.id,
|
||||
},
|
||||
tableFormat: this.options.tableFormat || 'markdown',
|
||||
});
|
||||
|
||||
// Combine all page markdown with page separators
|
||||
const pageTexts = ocrResult.pages.map((page, index) => {
|
||||
let pageContent = `--- Page ${index + 1} ---\n${page.markdown}`;
|
||||
|
||||
// Include tables if present
|
||||
if (page.tables && page.tables.length > 0) {
|
||||
pageContent += '\n\n**Tables:**\n' + page.tables.map((t: any) => t.markdown || t.html || '').join('\n');
|
||||
}
|
||||
|
||||
// Include header/footer if present
|
||||
if (page.header) {
|
||||
pageContent = `Header: ${page.header}\n${pageContent}`;
|
||||
}
|
||||
if (page.footer) {
|
||||
pageContent += `\nFooter: ${page.footer}`;
|
||||
}
|
||||
|
||||
return pageContent;
|
||||
}).join('\n\n');
|
||||
|
||||
extractedTexts.push(pageTexts);
|
||||
}
|
||||
|
||||
// Combine all document texts
|
||||
const allDocumentText = extractedTexts.length === 1
|
||||
? extractedTexts[0]
|
||||
: extractedTexts.map((text, i) => `=== Document ${i + 1} ===\n${text}`).join('\n\n');
|
||||
|
||||
// Use chat API to process the extracted text with the user's query
|
||||
const chatResponse = await this.chat({
|
||||
systemMessage: optionsArg.systemMessage || 'You are a helpful assistant analyzing document content.',
|
||||
userMessage: `${optionsArg.userMessage}\n\n---\nDocument Content:\n${allDocumentText}`,
|
||||
messageHistory: optionsArg.messageHistory,
|
||||
});
|
||||
|
||||
return {
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: chatResponse.message
|
||||
}
|
||||
};
|
||||
} finally {
|
||||
// Clean up uploaded files
|
||||
for (const fileId of uploadedFileIds) {
|
||||
try {
|
||||
await this.mistralClient.files.delete({ fileId });
|
||||
} catch (cleanupError) {
|
||||
// Ignore cleanup errors - files may have already been auto-deleted
|
||||
console.warn(`Failed to delete temporary file ${fileId}:`, cleanupError);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Research is not natively supported by Mistral
|
||||
*/
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
throw new Error('Research/web search is not supported by Mistral. Please use Perplexity or Anthropic provider for research capabilities.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image generation is not supported by Mistral
|
||||
*/
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image generation is not supported by Mistral. Please use OpenAI provider for image generation.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image editing is not supported by Mistral
|
||||
*/
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image editing is not supported by Mistral. Please use OpenAI provider for image editing.');
|
||||
}
|
||||
}
|
||||
@@ -1,705 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ChatOptions,
|
||||
ChatResponse,
|
||||
ChatMessage,
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse,
|
||||
StreamingChatOptions
|
||||
} from './abstract.classes.multimodal.js';
|
||||
|
||||
/**
|
||||
* Ollama model runtime options
|
||||
* @see https://github.com/ollama/ollama/blob/main/docs/modelfile.md
|
||||
*/
|
||||
export interface IOllamaModelOptions {
|
||||
num_ctx?: number; // Context window (default: 2048)
|
||||
temperature?: number; // 0 = deterministic (default: 0.8)
|
||||
top_k?: number; // Top-k sampling (default: 40)
|
||||
top_p?: number; // Nucleus sampling (default: 0.9)
|
||||
repeat_penalty?: number;// Repeat penalty (default: 1.1)
|
||||
num_predict?: number; // Max tokens to predict
|
||||
stop?: string[]; // Stop sequences
|
||||
seed?: number; // Random seed for reproducibility
|
||||
think?: boolean; // Enable thinking/reasoning mode (for GPT-OSS, QwQ, etc.)
|
||||
}
|
||||
|
||||
/**
|
||||
* JSON Schema tool definition for Ollama native tool calling
|
||||
* @see https://docs.ollama.com/capabilities/tool-calling
|
||||
*/
|
||||
export interface IOllamaTool {
|
||||
type: 'function';
|
||||
function: {
|
||||
name: string;
|
||||
description: string;
|
||||
parameters: {
|
||||
type: 'object';
|
||||
properties: Record<string, {
|
||||
type: string;
|
||||
description?: string;
|
||||
enum?: string[];
|
||||
}>;
|
||||
required?: string[];
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Tool call returned by model in native tool calling mode
|
||||
*/
|
||||
export interface IOllamaToolCall {
|
||||
function: {
|
||||
name: string;
|
||||
arguments: Record<string, unknown>;
|
||||
index?: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface IOllamaProviderOptions {
|
||||
baseUrl?: string;
|
||||
model?: string;
|
||||
visionModel?: string; // Model to use for vision tasks (e.g. 'llava')
|
||||
defaultOptions?: IOllamaModelOptions; // Default model options
|
||||
defaultTimeout?: number; // Default timeout in ms (default: 120000)
|
||||
}
|
||||
|
||||
/**
|
||||
* Extended chat options with Ollama-specific settings
|
||||
*/
|
||||
export interface IOllamaChatOptions extends ChatOptions {
|
||||
options?: IOllamaModelOptions; // Per-request model options
|
||||
timeout?: number; // Per-request timeout in ms
|
||||
model?: string; // Per-request model override
|
||||
tools?: IOllamaTool[]; // Available tools for native function calling
|
||||
// images is inherited from ChatOptions
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk emitted during streaming
|
||||
*/
|
||||
export interface IOllamaStreamChunk {
|
||||
content: string;
|
||||
thinking?: string; // For models with extended thinking
|
||||
toolCalls?: IOllamaToolCall[]; // Tool calls in streaming mode
|
||||
done: boolean;
|
||||
stats?: {
|
||||
totalDuration?: number;
|
||||
evalCount?: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extended chat response with Ollama-specific fields
|
||||
*/
|
||||
export interface IOllamaChatResponse extends ChatResponse {
|
||||
thinking?: string;
|
||||
toolCalls?: IOllamaToolCall[]; // Tool calls from model (native tool calling)
|
||||
stats?: {
|
||||
totalDuration?: number;
|
||||
evalCount?: number;
|
||||
};
|
||||
}
|
||||
|
||||
export class OllamaProvider extends MultiModalModel {
|
||||
private options: IOllamaProviderOptions;
|
||||
private baseUrl: string;
|
||||
private model: string;
|
||||
private visionModel: string;
|
||||
private defaultOptions: IOllamaModelOptions;
|
||||
private defaultTimeout: number;
|
||||
|
||||
constructor(optionsArg: IOllamaProviderOptions = {}) {
|
||||
super();
|
||||
this.options = optionsArg;
|
||||
this.baseUrl = optionsArg.baseUrl || 'http://localhost:11434';
|
||||
this.model = optionsArg.model || 'llama2';
|
||||
this.visionModel = optionsArg.visionModel || 'llava';
|
||||
this.defaultOptions = optionsArg.defaultOptions || {};
|
||||
this.defaultTimeout = optionsArg.defaultTimeout || 120000;
|
||||
}
|
||||
|
||||
async start() {
|
||||
await super.start();
|
||||
// Verify Ollama is running
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/api/tags`);
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to connect to Ollama server');
|
||||
}
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to connect to Ollama server at ${this.baseUrl}: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
async stop() {
|
||||
await super.stop();
|
||||
}
|
||||
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
// Create a TextDecoder to handle incoming chunks
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let currentMessage: { role: string; content: string; } | null = null;
|
||||
|
||||
// Create a TransformStream to process the input
|
||||
const transform = new TransformStream<Uint8Array, string>({
|
||||
transform: async (chunk, controller) => {
|
||||
buffer += decoder.decode(chunk, { stream: true });
|
||||
|
||||
// Try to parse complete JSON messages from the buffer
|
||||
while (true) {
|
||||
const newlineIndex = buffer.indexOf('\n');
|
||||
if (newlineIndex === -1) break;
|
||||
|
||||
const line = buffer.slice(0, newlineIndex);
|
||||
buffer = buffer.slice(newlineIndex + 1);
|
||||
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(line);
|
||||
currentMessage = {
|
||||
role: message.role || 'user',
|
||||
content: message.content || '',
|
||||
};
|
||||
} catch (e) {
|
||||
console.error('Failed to parse message:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a complete message, send it to Ollama
|
||||
if (currentMessage) {
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.model,
|
||||
messages: [{ role: currentMessage.role, content: currentMessage.content }],
|
||||
stream: true,
|
||||
}),
|
||||
});
|
||||
|
||||
// Process each chunk from Ollama
|
||||
const reader = response.body?.getReader();
|
||||
if (reader) {
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const chunk = new TextDecoder().decode(value);
|
||||
const lines = chunk.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const parsed = JSON.parse(line);
|
||||
const content = parsed.message?.content;
|
||||
if (content) {
|
||||
controller.enqueue(content);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to parse Ollama response:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
}
|
||||
|
||||
currentMessage = null;
|
||||
}
|
||||
},
|
||||
|
||||
flush(controller) {
|
||||
if (buffer) {
|
||||
try {
|
||||
const message = JSON.parse(buffer);
|
||||
controller.enqueue(message.content || '');
|
||||
} catch (e) {
|
||||
console.error('Failed to parse remaining buffer:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Connect the input to our transform stream
|
||||
return input.pipeThrough(transform);
|
||||
}
|
||||
|
||||
// Implementing the synchronous chat interaction
|
||||
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
|
||||
// Format messages for Ollama
|
||||
const historyMessages = optionsArg.messageHistory.map((msg) => {
|
||||
const formatted: { role: string; content: string; images?: string[]; reasoning?: string } = {
|
||||
role: msg.role,
|
||||
content: msg.content,
|
||||
};
|
||||
if (msg.images && msg.images.length > 0) {
|
||||
formatted.images = msg.images;
|
||||
}
|
||||
if (msg.reasoning) {
|
||||
formatted.reasoning = msg.reasoning;
|
||||
}
|
||||
return formatted;
|
||||
});
|
||||
|
||||
// Build user message with optional images
|
||||
const userMessage: { role: string; content: string; images?: string[] } = {
|
||||
role: 'user',
|
||||
content: optionsArg.userMessage,
|
||||
};
|
||||
if (optionsArg.images && optionsArg.images.length > 0) {
|
||||
userMessage.images = optionsArg.images;
|
||||
}
|
||||
|
||||
const messages = [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...historyMessages,
|
||||
userMessage,
|
||||
];
|
||||
|
||||
// Build request body - include think parameter if set
|
||||
const requestBody: Record<string, unknown> = {
|
||||
model: this.model,
|
||||
messages: messages,
|
||||
stream: false,
|
||||
options: this.defaultOptions,
|
||||
};
|
||||
|
||||
// Add think parameter for reasoning models (GPT-OSS, QwQ, etc.)
|
||||
if (this.defaultOptions.think !== undefined) {
|
||||
requestBody.think = this.defaultOptions.think;
|
||||
}
|
||||
|
||||
// Make API call to Ollama with defaultOptions and timeout
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(requestBody),
|
||||
signal: AbortSignal.timeout(this.defaultTimeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
return {
|
||||
role: 'assistant' as const,
|
||||
message: result.message.content,
|
||||
reasoning: result.message.thinking || result.message.reasoning,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Streaming chat with token callback (implements MultiModalModel interface)
|
||||
* Calls onToken for each token generated during the response
|
||||
*/
|
||||
public async chatStreaming(optionsArg: StreamingChatOptions): Promise<ChatResponse> {
|
||||
const onToken = optionsArg.onToken;
|
||||
|
||||
// Use existing collectStreamResponse with callback, including images
|
||||
const response = await this.collectStreamResponse(
|
||||
{
|
||||
systemMessage: optionsArg.systemMessage,
|
||||
userMessage: optionsArg.userMessage,
|
||||
messageHistory: optionsArg.messageHistory,
|
||||
images: optionsArg.images,
|
||||
},
|
||||
(chunk) => {
|
||||
if (onToken) {
|
||||
if (chunk.thinking) onToken(chunk.thinking);
|
||||
if (chunk.content) onToken(chunk.content);
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
role: 'assistant' as const,
|
||||
message: response.message,
|
||||
reasoning: response.thinking,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Streaming chat with async iteration and options support
|
||||
*/
|
||||
public async chatStreamResponse(
|
||||
optionsArg: IOllamaChatOptions
|
||||
): Promise<AsyncIterable<IOllamaStreamChunk>> {
|
||||
const model = optionsArg.model || this.model;
|
||||
const timeout = optionsArg.timeout || this.defaultTimeout;
|
||||
const modelOptions = { ...this.defaultOptions, ...optionsArg.options };
|
||||
|
||||
// Format history messages with optional images, reasoning, and tool_calls
|
||||
const historyMessages = optionsArg.messageHistory.map((msg) => {
|
||||
const formatted: { role: string; content: string; images?: string[]; reasoning?: string; tool_calls?: any[] } = {
|
||||
role: msg.role,
|
||||
content: msg.content,
|
||||
};
|
||||
if (msg.images && msg.images.length > 0) {
|
||||
formatted.images = msg.images;
|
||||
}
|
||||
if (msg.reasoning) {
|
||||
formatted.reasoning = msg.reasoning;
|
||||
}
|
||||
// CRITICAL: Include tool_calls in history for native tool calling
|
||||
// Without this, the model doesn't know it already called a tool and may call it again
|
||||
if ((msg as any).tool_calls && Array.isArray((msg as any).tool_calls)) {
|
||||
formatted.tool_calls = (msg as any).tool_calls;
|
||||
}
|
||||
return formatted;
|
||||
});
|
||||
|
||||
// Build user message with optional images
|
||||
const userMessage: { role: string; content: string; images?: string[] } = {
|
||||
role: 'user',
|
||||
content: optionsArg.userMessage,
|
||||
};
|
||||
if (optionsArg.images && optionsArg.images.length > 0) {
|
||||
userMessage.images = optionsArg.images;
|
||||
}
|
||||
|
||||
const messages = [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...historyMessages,
|
||||
userMessage,
|
||||
];
|
||||
|
||||
// Build request body with optional tools and think parameters
|
||||
const requestBody: Record<string, unknown> = {
|
||||
model,
|
||||
messages,
|
||||
stream: true,
|
||||
options: modelOptions,
|
||||
};
|
||||
|
||||
// Add think parameter for reasoning models (GPT-OSS, QwQ, etc.)
|
||||
if (modelOptions.think !== undefined) {
|
||||
requestBody.think = modelOptions.think;
|
||||
}
|
||||
|
||||
// Add tools for native function calling
|
||||
if (optionsArg.tools && optionsArg.tools.length > 0) {
|
||||
requestBody.tools = optionsArg.tools;
|
||||
}
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(requestBody),
|
||||
signal: AbortSignal.timeout(timeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const reader = response.body!.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
return {
|
||||
[Symbol.asyncIterator]: async function* () {
|
||||
let buffer = '';
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() || '';
|
||||
for (const line of lines) {
|
||||
if (!line.trim()) continue;
|
||||
try {
|
||||
const json = JSON.parse(line);
|
||||
|
||||
// Parse tool_calls from response
|
||||
let toolCalls: IOllamaToolCall[] | undefined;
|
||||
if (json.message?.tool_calls && Array.isArray(json.message.tool_calls)) {
|
||||
toolCalls = json.message.tool_calls.map((tc: any) => ({
|
||||
function: {
|
||||
name: tc.function?.name || '',
|
||||
arguments: typeof tc.function?.arguments === 'string'
|
||||
? JSON.parse(tc.function.arguments)
|
||||
: tc.function?.arguments || {},
|
||||
index: tc.index,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
yield {
|
||||
content: json.message?.content || '',
|
||||
thinking: json.message?.thinking,
|
||||
toolCalls,
|
||||
done: json.done || false,
|
||||
stats: json.done ? {
|
||||
totalDuration: json.total_duration,
|
||||
evalCount: json.eval_count,
|
||||
} : undefined,
|
||||
} as IOllamaStreamChunk;
|
||||
} catch { /* skip malformed */ }
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream and collect full response with optional progress callback
|
||||
*/
|
||||
public async collectStreamResponse(
|
||||
optionsArg: IOllamaChatOptions,
|
||||
onChunk?: (chunk: IOllamaStreamChunk) => void
|
||||
): Promise<IOllamaChatResponse> {
|
||||
const stream = await this.chatStreamResponse(optionsArg);
|
||||
let content = '';
|
||||
let thinking = '';
|
||||
let toolCalls: IOllamaToolCall[] = [];
|
||||
let stats: IOllamaChatResponse['stats'];
|
||||
|
||||
for await (const chunk of stream) {
|
||||
if (chunk.content) content += chunk.content;
|
||||
if (chunk.thinking) thinking += chunk.thinking;
|
||||
if (chunk.toolCalls) toolCalls = toolCalls.concat(chunk.toolCalls);
|
||||
if (chunk.stats) stats = chunk.stats;
|
||||
if (onChunk) onChunk(chunk);
|
||||
}
|
||||
|
||||
return {
|
||||
role: 'assistant' as const,
|
||||
message: content,
|
||||
thinking: thinking || undefined,
|
||||
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
|
||||
stats,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Non-streaming chat with full options support
|
||||
*/
|
||||
public async chatWithOptions(optionsArg: IOllamaChatOptions): Promise<IOllamaChatResponse> {
|
||||
const model = optionsArg.model || this.model;
|
||||
const timeout = optionsArg.timeout || this.defaultTimeout;
|
||||
const modelOptions = { ...this.defaultOptions, ...optionsArg.options };
|
||||
|
||||
// Format history messages with optional images, reasoning, tool_calls, and tool role
|
||||
const historyMessages = optionsArg.messageHistory.map((msg) => {
|
||||
// Handle tool result messages
|
||||
if ((msg as any).role === 'tool') {
|
||||
return {
|
||||
role: 'tool',
|
||||
content: msg.content,
|
||||
tool_name: (msg as any).toolName,
|
||||
};
|
||||
}
|
||||
|
||||
const formatted: { role: string; content: string; images?: string[]; reasoning?: string; tool_calls?: any[] } = {
|
||||
role: msg.role,
|
||||
content: msg.content,
|
||||
};
|
||||
if (msg.images && msg.images.length > 0) {
|
||||
formatted.images = msg.images;
|
||||
}
|
||||
if (msg.reasoning) {
|
||||
formatted.reasoning = msg.reasoning;
|
||||
}
|
||||
// CRITICAL: Include tool_calls in history for native tool calling
|
||||
// Without this, the model doesn't know it already called a tool and may call it again
|
||||
if ((msg as any).tool_calls && Array.isArray((msg as any).tool_calls)) {
|
||||
formatted.tool_calls = (msg as any).tool_calls;
|
||||
}
|
||||
return formatted;
|
||||
});
|
||||
|
||||
// Build user message with optional images
|
||||
const userMessage: { role: string; content: string; images?: string[] } = {
|
||||
role: 'user',
|
||||
content: optionsArg.userMessage,
|
||||
};
|
||||
if (optionsArg.images && optionsArg.images.length > 0) {
|
||||
userMessage.images = optionsArg.images;
|
||||
}
|
||||
|
||||
const messages = [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...historyMessages,
|
||||
userMessage,
|
||||
];
|
||||
|
||||
// Build request body with optional tools and think parameters
|
||||
const requestBody: Record<string, unknown> = {
|
||||
model,
|
||||
messages,
|
||||
stream: false,
|
||||
options: modelOptions,
|
||||
};
|
||||
|
||||
// Add think parameter for reasoning models (GPT-OSS, QwQ, etc.)
|
||||
if (modelOptions.think !== undefined) {
|
||||
requestBody.think = modelOptions.think;
|
||||
}
|
||||
|
||||
// Add tools for native function calling
|
||||
if (optionsArg.tools && optionsArg.tools.length > 0) {
|
||||
requestBody.tools = optionsArg.tools;
|
||||
}
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(requestBody),
|
||||
signal: AbortSignal.timeout(timeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
// Parse tool_calls from response
|
||||
let toolCalls: IOllamaToolCall[] | undefined;
|
||||
if (result.message?.tool_calls && Array.isArray(result.message.tool_calls)) {
|
||||
toolCalls = result.message.tool_calls.map((tc: any) => ({
|
||||
function: {
|
||||
name: tc.function?.name || '',
|
||||
arguments: typeof tc.function?.arguments === 'string'
|
||||
? JSON.parse(tc.function.arguments)
|
||||
: tc.function?.arguments || {},
|
||||
index: tc.index,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
return {
|
||||
role: 'assistant' as const,
|
||||
message: result.message.content || '',
|
||||
thinking: result.message.thinking,
|
||||
toolCalls,
|
||||
stats: {
|
||||
totalDuration: result.total_duration,
|
||||
evalCount: result.eval_count,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
throw new Error('Audio generation is not supported by Ollama.');
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
const base64Image = optionsArg.image.toString('base64');
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.visionModel,
|
||||
messages: [{
|
||||
role: 'user',
|
||||
content: optionsArg.prompt,
|
||||
images: [base64Image]
|
||||
}],
|
||||
stream: false
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
return result.message.content;
|
||||
}
|
||||
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: ChatMessage[];
|
||||
}): Promise<{ message: any }> {
|
||||
// Ensure SmartPdf is initialized before processing documents
|
||||
await this.ensureSmartpdfReady();
|
||||
|
||||
// Convert PDF documents to images using SmartPDF
|
||||
let documentImageBytesArray: Uint8Array[] = [];
|
||||
|
||||
for (const pdfDocument of optionsArg.pdfDocuments) {
|
||||
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
|
||||
documentImageBytesArray = documentImageBytesArray.concat(documentImageArray);
|
||||
}
|
||||
|
||||
// Convert images to base64
|
||||
const base64Images = documentImageBytesArray.map(bytes => Buffer.from(bytes).toString('base64'));
|
||||
|
||||
// Send request to Ollama with images
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.visionModel,
|
||||
messages: [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...optionsArg.messageHistory,
|
||||
{
|
||||
role: 'user',
|
||||
content: optionsArg.userMessage,
|
||||
images: base64Images
|
||||
}
|
||||
],
|
||||
stream: false
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
return {
|
||||
message: {
|
||||
role: 'assistant',
|
||||
content: result.message.content
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
throw new Error('Research capabilities are not yet supported by Ollama provider.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image generation is not supported by Ollama
|
||||
*/
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image generation is not supported by Ollama. Please use OpenAI provider for image generation.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image editing is not supported by Ollama
|
||||
*/
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image editing is not supported by Ollama. Please use OpenAI provider for image editing.');
|
||||
}
|
||||
}
|
||||
@@ -1,462 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
import { Readable } from 'stream';
|
||||
import { toFile } from 'openai';
|
||||
|
||||
// Custom type definition for chat completion messages
|
||||
export type TChatCompletionRequestMessage = {
|
||||
role: "system" | "user" | "assistant";
|
||||
content: string;
|
||||
};
|
||||
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
|
||||
export interface IOpenaiProviderOptions {
|
||||
openaiToken: string;
|
||||
chatModel?: string;
|
||||
audioModel?: string;
|
||||
visionModel?: string;
|
||||
researchModel?: string;
|
||||
imageModel?: string;
|
||||
enableWebSearch?: boolean;
|
||||
}
|
||||
|
||||
export class OpenAiProvider extends MultiModalModel {
|
||||
private options: IOpenaiProviderOptions;
|
||||
public openAiApiClient: plugins.openai.default;
|
||||
|
||||
constructor(optionsArg: IOpenaiProviderOptions) {
|
||||
super();
|
||||
this.options = optionsArg;
|
||||
}
|
||||
|
||||
public async start() {
|
||||
await super.start();
|
||||
this.openAiApiClient = new plugins.openai.default({
|
||||
apiKey: this.options.openaiToken,
|
||||
dangerouslyAllowBrowser: true,
|
||||
});
|
||||
}
|
||||
|
||||
public async stop() {
|
||||
await super.stop();
|
||||
}
|
||||
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
// Create a TextDecoder to handle incoming chunks
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let currentMessage: {
|
||||
role: "function" | "user" | "system" | "assistant" | "tool" | "developer";
|
||||
content: string;
|
||||
} | null = null;
|
||||
|
||||
// Create a TransformStream to process the input
|
||||
const transform = new TransformStream<Uint8Array, string>({
|
||||
transform: async (chunk, controller) => {
|
||||
buffer += decoder.decode(chunk, { stream: true });
|
||||
|
||||
// Try to parse complete JSON messages from the buffer
|
||||
while (true) {
|
||||
const newlineIndex = buffer.indexOf('\n');
|
||||
if (newlineIndex === -1) break;
|
||||
|
||||
const line = buffer.slice(0, newlineIndex);
|
||||
buffer = buffer.slice(newlineIndex + 1);
|
||||
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(line);
|
||||
currentMessage = {
|
||||
role: (message.role || 'user') as "function" | "user" | "system" | "assistant" | "tool" | "developer",
|
||||
content: message.content || '',
|
||||
};
|
||||
} catch (e) {
|
||||
console.error('Failed to parse message:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a complete message, send it to OpenAI
|
||||
if (currentMessage) {
|
||||
const messageToSend = { role: "user" as const, content: currentMessage.content };
|
||||
const chatModel = this.options.chatModel ?? 'gpt-5-mini';
|
||||
const requestParams: any = {
|
||||
model: chatModel,
|
||||
messages: [messageToSend],
|
||||
stream: true,
|
||||
};
|
||||
// Temperature is omitted since the model does not support it.
|
||||
const stream = await this.openAiApiClient.chat.completions.create(requestParams);
|
||||
// Explicitly cast the stream as an async iterable to satisfy TypeScript.
|
||||
const streamAsyncIterable = stream as unknown as AsyncIterableIterator<any>;
|
||||
// Process each chunk from OpenAI
|
||||
for await (const chunk of streamAsyncIterable) {
|
||||
const content = chunk.choices[0]?.delta?.content;
|
||||
if (content) {
|
||||
controller.enqueue(content);
|
||||
}
|
||||
}
|
||||
currentMessage = null;
|
||||
}
|
||||
},
|
||||
|
||||
flush(controller) {
|
||||
if (buffer) {
|
||||
try {
|
||||
const message = JSON.parse(buffer);
|
||||
controller.enqueue(message.content || '');
|
||||
} catch (e) {
|
||||
console.error('Failed to parse remaining buffer:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Connect the input to our transform stream
|
||||
return input.pipeThrough(transform);
|
||||
}
|
||||
|
||||
// Implementing the synchronous chat interaction
|
||||
public async chat(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
messageHistory: {
|
||||
role: 'assistant' | 'user';
|
||||
content: string;
|
||||
}[];
|
||||
}) {
|
||||
const chatModel = this.options.chatModel ?? 'gpt-5-mini';
|
||||
const requestParams: any = {
|
||||
model: chatModel,
|
||||
messages: [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...optionsArg.messageHistory,
|
||||
{ role: 'user', content: optionsArg.userMessage },
|
||||
],
|
||||
};
|
||||
// Temperature parameter removed to avoid unsupported error.
|
||||
const result = await this.openAiApiClient.chat.completions.create(requestParams);
|
||||
return {
|
||||
role: result.choices[0].message.role as 'assistant',
|
||||
message: result.choices[0].message.content,
|
||||
};
|
||||
}
|
||||
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
const done = plugins.smartpromise.defer<NodeJS.ReadableStream>();
|
||||
const result = await this.openAiApiClient.audio.speech.create({
|
||||
model: this.options.audioModel ?? 'tts-1-hd',
|
||||
input: optionsArg.message,
|
||||
voice: 'nova',
|
||||
response_format: 'mp3',
|
||||
speed: 1,
|
||||
});
|
||||
const stream = result.body;
|
||||
const nodeStream = Readable.fromWeb(stream as any);
|
||||
done.resolve(nodeStream);
|
||||
return done.promise;
|
||||
}
|
||||
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: {
|
||||
role: 'assistant' | 'user';
|
||||
content: any;
|
||||
}[];
|
||||
}) {
|
||||
// Ensure SmartPdf is initialized before processing documents
|
||||
await this.ensureSmartpdfReady();
|
||||
|
||||
let pdfDocumentImageBytesArray: Uint8Array[] = [];
|
||||
|
||||
// Convert each PDF into one or more image byte arrays.
|
||||
for (const pdfDocument of optionsArg.pdfDocuments) {
|
||||
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
|
||||
pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
|
||||
}
|
||||
|
||||
console.log(`image smartfile array`);
|
||||
console.log(pdfDocumentImageBytesArray.map((smartfile) => smartfile.length));
|
||||
|
||||
// Filter out any empty buffers to avoid sending invalid image URLs.
|
||||
const validImageBytesArray = pdfDocumentImageBytesArray.filter(imageBytes => imageBytes && imageBytes.length > 0);
|
||||
const imageAttachments = validImageBytesArray.map(imageBytes => ({
|
||||
type: 'image_url',
|
||||
image_url: {
|
||||
url: 'data:image/png;base64,' + Buffer.from(imageBytes).toString('base64'),
|
||||
},
|
||||
}));
|
||||
|
||||
const chatModel = this.options.chatModel ?? 'gpt-5-mini';
|
||||
const requestParams: any = {
|
||||
model: chatModel,
|
||||
messages: [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...optionsArg.messageHistory,
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: optionsArg.userMessage },
|
||||
...imageAttachments,
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
// Temperature parameter removed.
|
||||
const result = await this.openAiApiClient.chat.completions.create(requestParams);
|
||||
return {
|
||||
message: result.choices[0].message,
|
||||
};
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
const visionModel = this.options.visionModel ?? '04-mini';
|
||||
const requestParams: any = {
|
||||
model: visionModel,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: optionsArg.prompt },
|
||||
{
|
||||
type: 'image_url',
|
||||
image_url: {
|
||||
url: `data:image/jpeg;base64,${optionsArg.image.toString('base64')}`
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
max_tokens: 300
|
||||
};
|
||||
const result = await this.openAiApiClient.chat.completions.create(requestParams);
|
||||
return result.choices[0].message.content || '';
|
||||
}
|
||||
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
// Determine which model to use - Deep Research API requires specific models
|
||||
let model: string;
|
||||
if (optionsArg.searchDepth === 'deep') {
|
||||
model = this.options.researchModel || 'o4-mini-deep-research-2025-06-26';
|
||||
} else {
|
||||
// For basic/advanced, still use deep research models if web search is needed
|
||||
if (optionsArg.includeWebSearch) {
|
||||
model = this.options.researchModel || 'o4-mini-deep-research-2025-06-26';
|
||||
} else {
|
||||
model = this.options.chatModel || 'gpt-5-mini';
|
||||
}
|
||||
}
|
||||
|
||||
const systemMessage = 'You are a research assistant. Provide comprehensive answers with citations and sources when available.';
|
||||
|
||||
// Prepare request parameters using Deep Research API format
|
||||
const requestParams: any = {
|
||||
model,
|
||||
instructions: systemMessage,
|
||||
input: optionsArg.query
|
||||
};
|
||||
|
||||
// Add web search tool if requested
|
||||
if (optionsArg.includeWebSearch || optionsArg.searchDepth === 'deep') {
|
||||
requestParams.tools = [
|
||||
{
|
||||
type: 'web_search_preview',
|
||||
search_context_size: optionsArg.searchDepth === 'deep' ? 'high' :
|
||||
optionsArg.searchDepth === 'advanced' ? 'medium' : 'low'
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
// Add background flag for deep research
|
||||
if (optionsArg.background && optionsArg.searchDepth === 'deep') {
|
||||
requestParams.background = true;
|
||||
}
|
||||
|
||||
try {
|
||||
// Execute the research request using Deep Research API
|
||||
const result = await this.openAiApiClient.responses.create(requestParams);
|
||||
|
||||
// Extract the answer from output items
|
||||
let answer = '';
|
||||
const sources: Array<{ url: string; title: string; snippet: string }> = [];
|
||||
const searchQueries: string[] = [];
|
||||
|
||||
// Process output items
|
||||
for (const item of result.output || []) {
|
||||
// Extract message content
|
||||
if (item.type === 'message' && 'content' in item) {
|
||||
const messageItem = item as any;
|
||||
for (const contentItem of messageItem.content || []) {
|
||||
if (contentItem.type === 'output_text' && 'text' in contentItem) {
|
||||
answer += contentItem.text;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract web search queries
|
||||
if (item.type === 'web_search_call' && 'action' in item) {
|
||||
const searchItem = item as any;
|
||||
if (searchItem.action && searchItem.action.type === 'search' && 'query' in searchItem.action) {
|
||||
searchQueries.push(searchItem.action.query);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parse sources from markdown links in the answer
|
||||
const urlRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
|
||||
let match: RegExpExecArray | null;
|
||||
|
||||
while ((match = urlRegex.exec(answer)) !== null) {
|
||||
sources.push({
|
||||
title: match[1],
|
||||
url: match[2],
|
||||
snippet: ''
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
answer,
|
||||
sources,
|
||||
searchQueries: searchQueries.length > 0 ? searchQueries : undefined,
|
||||
metadata: {
|
||||
model,
|
||||
searchDepth: optionsArg.searchDepth || 'basic',
|
||||
tokensUsed: result.usage?.total_tokens
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Research API error:', error);
|
||||
throw new Error(`Failed to perform research: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Image generation using OpenAI's gpt-image-1 or DALL-E models
|
||||
*/
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
const model = optionsArg.model || this.options.imageModel || 'gpt-image-1';
|
||||
|
||||
try {
|
||||
const requestParams: any = {
|
||||
model,
|
||||
prompt: optionsArg.prompt,
|
||||
n: optionsArg.n || 1,
|
||||
};
|
||||
|
||||
// Add gpt-image-1 specific parameters
|
||||
if (model === 'gpt-image-1') {
|
||||
if (optionsArg.quality) requestParams.quality = optionsArg.quality;
|
||||
if (optionsArg.size) requestParams.size = optionsArg.size;
|
||||
if (optionsArg.background) requestParams.background = optionsArg.background;
|
||||
if (optionsArg.outputFormat) requestParams.output_format = optionsArg.outputFormat;
|
||||
if (optionsArg.outputCompression !== undefined) requestParams.output_compression = optionsArg.outputCompression;
|
||||
if (optionsArg.moderation) requestParams.moderation = optionsArg.moderation;
|
||||
if (optionsArg.stream !== undefined) requestParams.stream = optionsArg.stream;
|
||||
if (optionsArg.partialImages !== undefined) requestParams.partial_images = optionsArg.partialImages;
|
||||
} else if (model === 'dall-e-3') {
|
||||
// DALL-E 3 specific parameters
|
||||
if (optionsArg.quality) requestParams.quality = optionsArg.quality;
|
||||
if (optionsArg.size) requestParams.size = optionsArg.size;
|
||||
if (optionsArg.style) requestParams.style = optionsArg.style;
|
||||
requestParams.response_format = 'b64_json'; // Always use base64 for consistency
|
||||
} else if (model === 'dall-e-2') {
|
||||
// DALL-E 2 specific parameters
|
||||
if (optionsArg.size) requestParams.size = optionsArg.size;
|
||||
requestParams.response_format = 'b64_json';
|
||||
}
|
||||
|
||||
const result = await this.openAiApiClient.images.generate(requestParams);
|
||||
|
||||
const images = (result.data || []).map(img => ({
|
||||
b64_json: img.b64_json,
|
||||
url: img.url,
|
||||
revisedPrompt: img.revised_prompt
|
||||
}));
|
||||
|
||||
return {
|
||||
images,
|
||||
metadata: {
|
||||
model,
|
||||
quality: result.quality,
|
||||
size: result.size,
|
||||
outputFormat: result.output_format,
|
||||
tokensUsed: result.usage?.total_tokens
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Image generation error:', error);
|
||||
throw new Error(`Failed to generate image: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Image editing using OpenAI's gpt-image-1 or DALL-E 2 models
|
||||
*/
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
const model = optionsArg.model || this.options.imageModel || 'gpt-image-1';
|
||||
|
||||
try {
|
||||
// Convert Buffer to uploadable file format for OpenAI API
|
||||
const imageFile = await toFile(optionsArg.image, 'image.png', { type: 'image/png' });
|
||||
|
||||
const requestParams: any = {
|
||||
model,
|
||||
image: imageFile,
|
||||
prompt: optionsArg.prompt,
|
||||
n: optionsArg.n || 1,
|
||||
};
|
||||
|
||||
// Add mask if provided (also convert to file format)
|
||||
if (optionsArg.mask) {
|
||||
requestParams.mask = await toFile(optionsArg.mask, 'mask.png', { type: 'image/png' });
|
||||
}
|
||||
|
||||
// Add gpt-image-1 specific parameters
|
||||
if (model === 'gpt-image-1') {
|
||||
if (optionsArg.quality) requestParams.quality = optionsArg.quality;
|
||||
if (optionsArg.size) requestParams.size = optionsArg.size;
|
||||
if (optionsArg.background) requestParams.background = optionsArg.background;
|
||||
if (optionsArg.outputFormat) requestParams.output_format = optionsArg.outputFormat;
|
||||
if (optionsArg.outputCompression !== undefined) requestParams.output_compression = optionsArg.outputCompression;
|
||||
if (optionsArg.stream !== undefined) requestParams.stream = optionsArg.stream;
|
||||
if (optionsArg.partialImages !== undefined) requestParams.partial_images = optionsArg.partialImages;
|
||||
} else if (model === 'dall-e-2') {
|
||||
// DALL-E 2 specific parameters
|
||||
if (optionsArg.size) requestParams.size = optionsArg.size;
|
||||
requestParams.response_format = 'b64_json';
|
||||
}
|
||||
|
||||
const result = await this.openAiApiClient.images.edit(requestParams);
|
||||
|
||||
const images = (result.data || []).map(img => ({
|
||||
b64_json: img.b64_json,
|
||||
url: img.url,
|
||||
revisedPrompt: img.revised_prompt
|
||||
}));
|
||||
|
||||
return {
|
||||
images,
|
||||
metadata: {
|
||||
model,
|
||||
quality: result.quality,
|
||||
size: result.size,
|
||||
outputFormat: result.output_format,
|
||||
tokensUsed: result.usage?.total_tokens
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Image edit error:', error);
|
||||
throw new Error(`Failed to edit image: ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,259 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ChatOptions,
|
||||
ChatResponse,
|
||||
ChatMessage,
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
|
||||
export interface IPerplexityProviderOptions {
|
||||
perplexityToken: string;
|
||||
}
|
||||
|
||||
export class PerplexityProvider extends MultiModalModel {
|
||||
private options: IPerplexityProviderOptions;
|
||||
|
||||
constructor(optionsArg: IPerplexityProviderOptions) {
|
||||
super();
|
||||
this.options = optionsArg;
|
||||
}
|
||||
|
||||
async start() {
|
||||
// Initialize any necessary clients or resources
|
||||
}
|
||||
|
||||
async stop() {}
|
||||
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
// Create a TextDecoder to handle incoming chunks
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let currentMessage: { role: string; content: string; } | null = null;
|
||||
|
||||
// Create a TransformStream to process the input
|
||||
const transform = new TransformStream<Uint8Array, string>({
|
||||
async transform(chunk, controller) {
|
||||
buffer += decoder.decode(chunk, { stream: true });
|
||||
|
||||
// Try to parse complete JSON messages from the buffer
|
||||
while (true) {
|
||||
const newlineIndex = buffer.indexOf('\n');
|
||||
if (newlineIndex === -1) break;
|
||||
|
||||
const line = buffer.slice(0, newlineIndex);
|
||||
buffer = buffer.slice(newlineIndex + 1);
|
||||
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(line);
|
||||
currentMessage = {
|
||||
role: message.role || 'user',
|
||||
content: message.content || '',
|
||||
};
|
||||
} catch (e) {
|
||||
console.error('Failed to parse message:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a complete message, send it to Perplexity
|
||||
if (currentMessage) {
|
||||
const response = await fetch('https://api.perplexity.ai/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${this.options.perplexityToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'mixtral-8x7b-instruct',
|
||||
messages: [{ role: currentMessage.role, content: currentMessage.content }],
|
||||
stream: true,
|
||||
}),
|
||||
});
|
||||
|
||||
// Process each chunk from Perplexity
|
||||
const reader = response.body?.getReader();
|
||||
if (reader) {
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const chunk = new TextDecoder().decode(value);
|
||||
const lines = chunk.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('data: ')) {
|
||||
const data = line.slice(6);
|
||||
if (data === '[DONE]') break;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(data);
|
||||
const content = parsed.choices[0]?.delta?.content;
|
||||
if (content) {
|
||||
controller.enqueue(content);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to parse SSE data:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
}
|
||||
|
||||
currentMessage = null;
|
||||
}
|
||||
},
|
||||
|
||||
flush(controller) {
|
||||
if (buffer) {
|
||||
try {
|
||||
const message = JSON.parse(buffer);
|
||||
controller.enqueue(message.content || '');
|
||||
} catch (e) {
|
||||
console.error('Failed to parse remaining buffer:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Connect the input to our transform stream
|
||||
return input.pipeThrough(transform);
|
||||
}
|
||||
|
||||
// Implementing the synchronous chat interaction
|
||||
public async chat(optionsArg: ChatOptions): Promise<ChatResponse> {
|
||||
// Make API call to Perplexity
|
||||
const response = await fetch('https://api.perplexity.ai/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${this.options.perplexityToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'mixtral-8x7b-instruct', // Using Mixtral model
|
||||
messages: [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...optionsArg.messageHistory,
|
||||
{ role: 'user', content: optionsArg.userMessage }
|
||||
],
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Perplexity API error: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
return {
|
||||
role: 'assistant' as const,
|
||||
message: result.choices[0].message.content,
|
||||
};
|
||||
}
|
||||
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
throw new Error('Audio generation is not supported by Perplexity.');
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
throw new Error('Vision tasks are not supported by Perplexity.');
|
||||
}
|
||||
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: ChatMessage[];
|
||||
}): Promise<{ message: any }> {
|
||||
throw new Error('Document processing is not supported by Perplexity.');
|
||||
}
|
||||
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
// Perplexity has Sonar models that are optimized for search
|
||||
// sonar models: sonar, sonar-pro
|
||||
const model = optionsArg.searchDepth === 'deep' ? 'sonar-pro' : 'sonar';
|
||||
|
||||
try {
|
||||
const response = await fetch('https://api.perplexity.ai/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${this.options.perplexityToken}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: 'You are a helpful research assistant. Provide accurate information with sources.'
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: optionsArg.query
|
||||
}
|
||||
],
|
||||
temperature: 0.7,
|
||||
max_tokens: 4000
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Perplexity API error: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
const answer = result.choices[0].message.content;
|
||||
|
||||
// Parse citations from the response
|
||||
const sources: Array<{ url: string; title: string; snippet: string }> = [];
|
||||
|
||||
// Perplexity includes citations in the format [1], [2], etc. with sources listed
|
||||
// This is a simplified parser - could be enhanced based on actual Perplexity response format
|
||||
if (result.citations) {
|
||||
for (const citation of result.citations) {
|
||||
sources.push({
|
||||
url: citation.url || '',
|
||||
title: citation.title || '',
|
||||
snippet: citation.snippet || ''
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
answer,
|
||||
sources,
|
||||
metadata: {
|
||||
model,
|
||||
searchDepth: optionsArg.searchDepth || 'basic'
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Perplexity research error:', error);
|
||||
throw new Error(`Failed to perform research: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Image generation is not supported by Perplexity
|
||||
*/
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image generation is not supported by Perplexity. Please use OpenAI provider for image generation.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image editing is not supported by Perplexity
|
||||
*/
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image editing is not supported by Perplexity. Please use OpenAI provider for image editing.');
|
||||
}
|
||||
}
|
||||
@@ -1,214 +0,0 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
import { MultiModalModel } from './abstract.classes.multimodal.js';
|
||||
import type {
|
||||
ChatOptions,
|
||||
ChatResponse,
|
||||
ChatMessage,
|
||||
ResearchOptions,
|
||||
ResearchResponse,
|
||||
ImageGenerateOptions,
|
||||
ImageEditOptions,
|
||||
ImageResponse
|
||||
} from './abstract.classes.multimodal.js';
|
||||
import type { ChatCompletionMessageParam } from 'openai/resources/chat/completions';
|
||||
|
||||
export interface IXAIProviderOptions {
|
||||
xaiToken: string;
|
||||
}
|
||||
|
||||
export class XAIProvider extends MultiModalModel {
|
||||
private options: IXAIProviderOptions;
|
||||
public openAiApiClient: plugins.openai.default;
|
||||
|
||||
constructor(optionsArg: IXAIProviderOptions) {
|
||||
super();
|
||||
this.options = optionsArg;
|
||||
}
|
||||
|
||||
public async start() {
|
||||
await super.start();
|
||||
this.openAiApiClient = new plugins.openai.default({
|
||||
apiKey: this.options.xaiToken,
|
||||
baseURL: 'https://api.x.ai/v1',
|
||||
});
|
||||
}
|
||||
|
||||
public async stop() {
|
||||
await super.stop();
|
||||
}
|
||||
|
||||
public async chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>> {
|
||||
// Create a TextDecoder to handle incoming chunks
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let currentMessage: { role: string; content: string; } | null = null;
|
||||
|
||||
// Create a TransformStream to process the input
|
||||
const transform = new TransformStream<Uint8Array, string>({
|
||||
async transform(chunk, controller) {
|
||||
buffer += decoder.decode(chunk, { stream: true });
|
||||
|
||||
// Try to parse complete JSON messages from the buffer
|
||||
while (true) {
|
||||
const newlineIndex = buffer.indexOf('\n');
|
||||
if (newlineIndex === -1) break;
|
||||
|
||||
const line = buffer.slice(0, newlineIndex);
|
||||
buffer = buffer.slice(newlineIndex + 1);
|
||||
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const message = JSON.parse(line);
|
||||
currentMessage = {
|
||||
role: message.role || 'user',
|
||||
content: message.content || '',
|
||||
};
|
||||
} catch (e) {
|
||||
console.error('Failed to parse message:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a complete message, send it to X.AI
|
||||
if (currentMessage) {
|
||||
const stream = await this.openAiApiClient.chat.completions.create({
|
||||
model: 'grok-2-latest',
|
||||
messages: [{ role: currentMessage.role, content: currentMessage.content }],
|
||||
stream: true,
|
||||
});
|
||||
|
||||
// Process each chunk from X.AI
|
||||
for await (const chunk of stream) {
|
||||
const content = chunk.choices[0]?.delta?.content;
|
||||
if (content) {
|
||||
controller.enqueue(content);
|
||||
}
|
||||
}
|
||||
|
||||
currentMessage = null;
|
||||
}
|
||||
},
|
||||
|
||||
flush(controller) {
|
||||
if (buffer) {
|
||||
try {
|
||||
const message = JSON.parse(buffer);
|
||||
controller.enqueue(message.content || '');
|
||||
} catch (e) {
|
||||
console.error('Failed to parse remaining buffer:', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Connect the input to our transform stream
|
||||
return input.pipeThrough(transform);
|
||||
}
|
||||
|
||||
public async chat(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
messageHistory: { role: string; content: string; }[];
|
||||
}): Promise<{ role: 'assistant'; message: string; }> {
|
||||
// Prepare messages array with system message, history, and user message
|
||||
const messages: ChatCompletionMessageParam[] = [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...optionsArg.messageHistory.map(msg => ({
|
||||
role: msg.role as 'system' | 'user' | 'assistant',
|
||||
content: msg.content
|
||||
})),
|
||||
{ role: 'user', content: optionsArg.userMessage }
|
||||
];
|
||||
|
||||
// Call X.AI's chat completion API
|
||||
const completion = await this.openAiApiClient.chat.completions.create({
|
||||
model: 'grok-2-latest',
|
||||
messages: messages,
|
||||
stream: false,
|
||||
});
|
||||
|
||||
// Return the assistant's response
|
||||
return {
|
||||
role: 'assistant',
|
||||
message: completion.choices[0]?.message?.content || ''
|
||||
};
|
||||
}
|
||||
|
||||
public async audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream> {
|
||||
throw new Error('Audio generation is not supported by X.AI');
|
||||
}
|
||||
|
||||
public async vision(optionsArg: { image: Buffer; prompt: string }): Promise<string> {
|
||||
throw new Error('Vision tasks are not supported by X.AI');
|
||||
}
|
||||
|
||||
public async document(optionsArg: {
|
||||
systemMessage: string;
|
||||
userMessage: string;
|
||||
pdfDocuments: Uint8Array[];
|
||||
messageHistory: { role: string; content: string; }[];
|
||||
}): Promise<{ message: any }> {
|
||||
// Ensure SmartPdf is initialized before processing documents
|
||||
await this.ensureSmartpdfReady();
|
||||
|
||||
// First convert PDF documents to images
|
||||
let pdfDocumentImageBytesArray: Uint8Array[] = [];
|
||||
|
||||
for (const pdfDocument of optionsArg.pdfDocuments) {
|
||||
const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument);
|
||||
pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray);
|
||||
}
|
||||
|
||||
// Convert images to base64 for inclusion in the message
|
||||
const imageBase64Array = pdfDocumentImageBytesArray.map(bytes =>
|
||||
Buffer.from(bytes).toString('base64')
|
||||
);
|
||||
|
||||
// Combine document images into the user message
|
||||
const enhancedUserMessage = `
|
||||
${optionsArg.userMessage}
|
||||
|
||||
Document contents (as images):
|
||||
${imageBase64Array.map((img, i) => `Image ${i + 1}: <image data>`).join('\n')}
|
||||
`;
|
||||
|
||||
// Use chat completion to analyze the documents
|
||||
const messages: ChatCompletionMessageParam[] = [
|
||||
{ role: 'system', content: optionsArg.systemMessage },
|
||||
...optionsArg.messageHistory.map(msg => ({
|
||||
role: msg.role as 'system' | 'user' | 'assistant',
|
||||
content: msg.content
|
||||
})),
|
||||
{ role: 'user', content: enhancedUserMessage }
|
||||
];
|
||||
|
||||
const completion = await this.openAiApiClient.chat.completions.create({
|
||||
model: 'grok-2-latest',
|
||||
messages: messages,
|
||||
stream: false,
|
||||
});
|
||||
|
||||
return {
|
||||
message: completion.choices[0]?.message?.content || ''
|
||||
};
|
||||
}
|
||||
|
||||
public async research(optionsArg: ResearchOptions): Promise<ResearchResponse> {
|
||||
throw new Error('Research capabilities are not yet supported by xAI provider.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image generation is not supported by xAI
|
||||
*/
|
||||
public async imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image generation is not supported by xAI. Please use OpenAI provider for image generation.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Image editing is not supported by xAI
|
||||
*/
|
||||
public async imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse> {
|
||||
throw new Error('Image editing is not supported by xAI. Please use OpenAI provider for image editing.');
|
||||
}
|
||||
}
|
||||
51
ts/smartai.classes.smartai.ts
Normal file
51
ts/smartai.classes.smartai.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
import * as plugins from './plugins.js';
|
||||
import type { ISmartAiOptions, LanguageModelV3 } from './smartai.interfaces.js';
|
||||
import { createOllamaModel } from './smartai.provider.ollama.js';
|
||||
import { createAnthropicCachingMiddleware } from './smartai.middleware.anthropic.js';
|
||||
|
||||
/**
|
||||
* Returns a LanguageModelV3 for the given provider and model.
|
||||
* This is the primary API — consumers use the returned model with AI SDK's
|
||||
* generateText(), streamText(), etc.
|
||||
*/
|
||||
export function getModel(options: ISmartAiOptions): LanguageModelV3 {
|
||||
switch (options.provider) {
|
||||
case 'anthropic': {
|
||||
const p = plugins.createAnthropic({ apiKey: options.apiKey });
|
||||
const base = p(options.model) as LanguageModelV3;
|
||||
if (options.promptCaching === false) return base;
|
||||
return plugins.wrapLanguageModel({
|
||||
model: base,
|
||||
middleware: createAnthropicCachingMiddleware(),
|
||||
}) as unknown as LanguageModelV3;
|
||||
}
|
||||
case 'openai': {
|
||||
const p = plugins.createOpenAI({ apiKey: options.apiKey });
|
||||
return p(options.model) as LanguageModelV3;
|
||||
}
|
||||
case 'google': {
|
||||
const p = plugins.createGoogleGenerativeAI({ apiKey: options.apiKey });
|
||||
return p(options.model) as LanguageModelV3;
|
||||
}
|
||||
case 'groq': {
|
||||
const p = plugins.createGroq({ apiKey: options.apiKey });
|
||||
return p(options.model) as LanguageModelV3;
|
||||
}
|
||||
case 'mistral': {
|
||||
const p = plugins.createMistral({ apiKey: options.apiKey });
|
||||
return p(options.model) as LanguageModelV3;
|
||||
}
|
||||
case 'xai': {
|
||||
const p = plugins.createXai({ apiKey: options.apiKey });
|
||||
return p(options.model) as LanguageModelV3;
|
||||
}
|
||||
case 'perplexity': {
|
||||
const p = plugins.createPerplexity({ apiKey: options.apiKey });
|
||||
return p(options.model) as LanguageModelV3;
|
||||
}
|
||||
case 'ollama':
|
||||
return createOllamaModel(options);
|
||||
default:
|
||||
throw new Error(`Unknown provider: ${(options as ISmartAiOptions).provider}`);
|
||||
}
|
||||
}
|
||||
53
ts/smartai.interfaces.ts
Normal file
53
ts/smartai.interfaces.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
import type { LanguageModelV3 } from '@ai-sdk/provider';
|
||||
|
||||
export type TProvider =
|
||||
| 'anthropic'
|
||||
| 'openai'
|
||||
| 'google'
|
||||
| 'groq'
|
||||
| 'mistral'
|
||||
| 'xai'
|
||||
| 'perplexity'
|
||||
| 'ollama';
|
||||
|
||||
export interface ISmartAiOptions {
|
||||
provider: TProvider;
|
||||
model: string;
|
||||
apiKey?: string;
|
||||
/** For Ollama: base URL of the local server. Default: http://localhost:11434 */
|
||||
baseUrl?: string;
|
||||
/**
|
||||
* Ollama-specific model runtime options.
|
||||
* Only used when provider === 'ollama'.
|
||||
*/
|
||||
ollamaOptions?: IOllamaModelOptions;
|
||||
/**
|
||||
* Enable Anthropic prompt caching on system + recent messages.
|
||||
* Only used when provider === 'anthropic'. Default: true.
|
||||
*/
|
||||
promptCaching?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ollama model runtime options passed in the request body `options` field.
|
||||
* @see https://github.com/ollama/ollama/blob/main/docs/modelfile.md
|
||||
*/
|
||||
export interface IOllamaModelOptions {
|
||||
/** Context window size. Default: 2048. */
|
||||
num_ctx?: number;
|
||||
/** 0 = deterministic. Default: 0.8. For Qwen models use 0.55. */
|
||||
temperature?: number;
|
||||
top_k?: number;
|
||||
top_p?: number;
|
||||
repeat_penalty?: number;
|
||||
num_predict?: number;
|
||||
stop?: string[];
|
||||
seed?: number;
|
||||
/**
|
||||
* Enable thinking/reasoning mode (Qwen3, QwQ, DeepSeek-R1 etc.).
|
||||
* The custom Ollama provider handles this directly.
|
||||
*/
|
||||
think?: boolean;
|
||||
}
|
||||
|
||||
export type { LanguageModelV3 };
|
||||
38
ts/smartai.middleware.anthropic.ts
Normal file
38
ts/smartai.middleware.anthropic.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import type { LanguageModelV3Middleware, LanguageModelV3Prompt } from '@ai-sdk/provider';
|
||||
|
||||
/**
|
||||
* Creates middleware that adds Anthropic prompt caching directives.
|
||||
* Marks the last system message and last user message with ephemeral cache control,
|
||||
* reducing input token cost and latency on repeated calls.
|
||||
*/
|
||||
export function createAnthropicCachingMiddleware(): LanguageModelV3Middleware {
|
||||
return {
|
||||
specificationVersion: 'v3',
|
||||
transformParams: async ({ params }) => {
|
||||
const messages = [...params.prompt] as Array<Record<string, unknown>>;
|
||||
|
||||
// Find the last system message and last user message
|
||||
let lastSystemIdx = -1;
|
||||
let lastUserIdx = -1;
|
||||
for (let i = 0; i < messages.length; i++) {
|
||||
if (messages[i].role === 'system') lastSystemIdx = i;
|
||||
if (messages[i].role === 'user') lastUserIdx = i;
|
||||
}
|
||||
|
||||
const targets = [lastSystemIdx, lastUserIdx].filter(i => i >= 0);
|
||||
for (const idx of targets) {
|
||||
const msg = { ...messages[idx] };
|
||||
msg.providerOptions = {
|
||||
...(msg.providerOptions as Record<string, unknown> || {}),
|
||||
anthropic: {
|
||||
...((msg.providerOptions as Record<string, unknown>)?.anthropic as Record<string, unknown> || {}),
|
||||
cacheControl: { type: 'ephemeral' },
|
||||
},
|
||||
};
|
||||
messages[idx] = msg;
|
||||
}
|
||||
|
||||
return { ...params, prompt: messages as unknown as LanguageModelV3Prompt };
|
||||
},
|
||||
};
|
||||
}
|
||||
426
ts/smartai.provider.ollama.ts
Normal file
426
ts/smartai.provider.ollama.ts
Normal file
@@ -0,0 +1,426 @@
|
||||
import type {
|
||||
LanguageModelV3,
|
||||
LanguageModelV3CallOptions,
|
||||
LanguageModelV3GenerateResult,
|
||||
LanguageModelV3StreamResult,
|
||||
LanguageModelV3StreamPart,
|
||||
LanguageModelV3Prompt,
|
||||
LanguageModelV3Content,
|
||||
LanguageModelV3Usage,
|
||||
LanguageModelV3FinishReason,
|
||||
} from '@ai-sdk/provider';
|
||||
import type { ISmartAiOptions, IOllamaModelOptions } from './smartai.interfaces.js';
|
||||
|
||||
interface IOllamaMessage {
|
||||
role: string;
|
||||
content: string;
|
||||
images?: string[];
|
||||
tool_calls?: Array<{
|
||||
function: { name: string; arguments: Record<string, unknown> };
|
||||
}>;
|
||||
thinking?: string;
|
||||
}
|
||||
|
||||
interface IOllamaTool {
|
||||
type: 'function';
|
||||
function: {
|
||||
name: string;
|
||||
description: string;
|
||||
parameters: Record<string, unknown>;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert AI SDK V3 prompt messages to Ollama's message format.
|
||||
*/
|
||||
function convertPromptToOllamaMessages(prompt: LanguageModelV3Prompt): IOllamaMessage[] {
|
||||
const messages: IOllamaMessage[] = [];
|
||||
|
||||
for (const msg of prompt) {
|
||||
if (msg.role === 'system') {
|
||||
// System message content is a plain string in V3
|
||||
messages.push({ role: 'system', content: msg.content });
|
||||
} else if (msg.role === 'user') {
|
||||
let text = '';
|
||||
const images: string[] = [];
|
||||
for (const part of msg.content) {
|
||||
if (part.type === 'text') {
|
||||
text += part.text;
|
||||
} else if (part.type === 'file' && part.mediaType?.startsWith('image/')) {
|
||||
// Handle image files — Ollama expects base64 images
|
||||
if (typeof part.data === 'string') {
|
||||
images.push(part.data);
|
||||
} else if (part.data instanceof Uint8Array) {
|
||||
images.push(Buffer.from(part.data).toString('base64'));
|
||||
}
|
||||
}
|
||||
}
|
||||
const m: IOllamaMessage = { role: 'user', content: text };
|
||||
if (images.length > 0) m.images = images;
|
||||
messages.push(m);
|
||||
} else if (msg.role === 'assistant') {
|
||||
let text = '';
|
||||
let thinking = '';
|
||||
const toolCalls: IOllamaMessage['tool_calls'] = [];
|
||||
for (const part of msg.content) {
|
||||
if (part.type === 'text') {
|
||||
text += part.text;
|
||||
} else if (part.type === 'reasoning') {
|
||||
thinking += part.text;
|
||||
} else if (part.type === 'tool-call') {
|
||||
const args = typeof part.input === 'string'
|
||||
? JSON.parse(part.input as string)
|
||||
: (part.input as Record<string, unknown>);
|
||||
toolCalls.push({
|
||||
function: {
|
||||
name: part.toolName,
|
||||
arguments: args,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
const m: IOllamaMessage = { role: 'assistant', content: text };
|
||||
if (toolCalls.length > 0) m.tool_calls = toolCalls;
|
||||
if (thinking) m.thinking = thinking;
|
||||
messages.push(m);
|
||||
} else if (msg.role === 'tool') {
|
||||
for (const part of msg.content) {
|
||||
if (part.type === 'tool-result') {
|
||||
let resultContent = '';
|
||||
if (part.output) {
|
||||
if (part.output.type === 'text') {
|
||||
resultContent = part.output.value;
|
||||
} else if (part.output.type === 'json') {
|
||||
resultContent = JSON.stringify(part.output.value);
|
||||
}
|
||||
}
|
||||
messages.push({ role: 'tool', content: resultContent });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return messages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert AI SDK V3 tools to Ollama's tool format.
|
||||
*/
|
||||
function convertToolsToOllamaTools(tools: LanguageModelV3CallOptions['tools']): IOllamaTool[] | undefined {
|
||||
if (!tools || tools.length === 0) return undefined;
|
||||
|
||||
return tools
|
||||
.filter((t): t is Extract<typeof t, { type: 'function' }> => t.type === 'function')
|
||||
.map(t => ({
|
||||
type: 'function' as const,
|
||||
function: {
|
||||
name: t.name,
|
||||
description: t.description ?? '',
|
||||
parameters: t.inputSchema as Record<string, unknown>,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
function makeUsage(promptTokens?: number, completionTokens?: number): LanguageModelV3Usage {
|
||||
return {
|
||||
inputTokens: {
|
||||
total: promptTokens,
|
||||
noCache: undefined,
|
||||
cacheRead: undefined,
|
||||
cacheWrite: undefined,
|
||||
},
|
||||
outputTokens: {
|
||||
total: completionTokens,
|
||||
text: completionTokens,
|
||||
reasoning: undefined,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function makeFinishReason(reason?: string): LanguageModelV3FinishReason {
|
||||
if (reason === 'tool_calls' || reason === 'tool-calls') {
|
||||
return { unified: 'tool-calls', raw: reason };
|
||||
}
|
||||
return { unified: 'stop', raw: reason ?? 'stop' };
|
||||
}
|
||||
|
||||
let idCounter = 0;
|
||||
function generateId(): string {
|
||||
return `ollama-${Date.now()}-${idCounter++}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Custom LanguageModelV3 implementation for Ollama.
|
||||
* Calls Ollama's native /api/chat endpoint directly to support
|
||||
* think, num_ctx, temperature, and other model options.
|
||||
*/
|
||||
export function createOllamaModel(options: ISmartAiOptions): LanguageModelV3 {
|
||||
const baseUrl = options.baseUrl ?? 'http://localhost:11434';
|
||||
const modelId = options.model;
|
||||
const ollamaOpts: IOllamaModelOptions = { ...options.ollamaOptions };
|
||||
|
||||
// Apply default temperature of 0.55 for Qwen models
|
||||
if (modelId.toLowerCase().includes('qwen') && ollamaOpts.temperature === undefined) {
|
||||
ollamaOpts.temperature = 0.55;
|
||||
}
|
||||
|
||||
const model: LanguageModelV3 = {
|
||||
specificationVersion: 'v3',
|
||||
provider: 'ollama',
|
||||
modelId,
|
||||
supportedUrls: {},
|
||||
|
||||
async doGenerate(callOptions: LanguageModelV3CallOptions): Promise<LanguageModelV3GenerateResult> {
|
||||
const messages = convertPromptToOllamaMessages(callOptions.prompt);
|
||||
const tools = convertToolsToOllamaTools(callOptions.tools);
|
||||
|
||||
const ollamaModelOptions: Record<string, unknown> = { ...ollamaOpts };
|
||||
// Override with call-level options if provided
|
||||
if (callOptions.temperature !== undefined) ollamaModelOptions.temperature = callOptions.temperature;
|
||||
if (callOptions.topP !== undefined) ollamaModelOptions.top_p = callOptions.topP;
|
||||
if (callOptions.topK !== undefined) ollamaModelOptions.top_k = callOptions.topK;
|
||||
if (callOptions.maxOutputTokens !== undefined) ollamaModelOptions.num_predict = callOptions.maxOutputTokens;
|
||||
if (callOptions.seed !== undefined) ollamaModelOptions.seed = callOptions.seed;
|
||||
if (callOptions.stopSequences) ollamaModelOptions.stop = callOptions.stopSequences;
|
||||
// Remove think from options — it goes at the top level
|
||||
const { think, ...modelOpts } = ollamaModelOptions;
|
||||
|
||||
const requestBody: Record<string, unknown> = {
|
||||
model: modelId,
|
||||
messages,
|
||||
stream: false,
|
||||
options: modelOpts,
|
||||
};
|
||||
|
||||
// Add think parameter at the top level (Ollama API requirement)
|
||||
if (ollamaOpts.think !== undefined) {
|
||||
requestBody.think = ollamaOpts.think;
|
||||
}
|
||||
|
||||
if (tools) requestBody.tools = tools;
|
||||
|
||||
const response = await fetch(`${baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(requestBody),
|
||||
signal: callOptions.abortSignal,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const body = await response.text();
|
||||
throw new Error(`Ollama API error ${response.status}: ${body}`);
|
||||
}
|
||||
|
||||
const result = await response.json() as Record<string, unknown>;
|
||||
const message = result.message as Record<string, unknown>;
|
||||
|
||||
// Build content array
|
||||
const content: LanguageModelV3Content[] = [];
|
||||
|
||||
// Add reasoning if present
|
||||
if (message.thinking && typeof message.thinking === 'string') {
|
||||
content.push({ type: 'reasoning', text: message.thinking });
|
||||
}
|
||||
|
||||
// Add text content
|
||||
if (message.content && typeof message.content === 'string') {
|
||||
content.push({ type: 'text', text: message.content });
|
||||
}
|
||||
|
||||
// Add tool calls if present
|
||||
if (Array.isArray(message.tool_calls)) {
|
||||
for (const tc of message.tool_calls as Array<Record<string, unknown>>) {
|
||||
const fn = tc.function as Record<string, unknown>;
|
||||
content.push({
|
||||
type: 'tool-call',
|
||||
toolCallId: generateId(),
|
||||
toolName: fn.name as string,
|
||||
input: JSON.stringify(fn.arguments),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const finishReason = Array.isArray(message.tool_calls) && (message.tool_calls as unknown[]).length > 0
|
||||
? makeFinishReason('tool_calls')
|
||||
: makeFinishReason('stop');
|
||||
|
||||
return {
|
||||
content,
|
||||
finishReason,
|
||||
usage: makeUsage(
|
||||
(result.prompt_eval_count as number) ?? undefined,
|
||||
(result.eval_count as number) ?? undefined,
|
||||
),
|
||||
warnings: [],
|
||||
request: { body: requestBody },
|
||||
};
|
||||
},
|
||||
|
||||
async doStream(callOptions: LanguageModelV3CallOptions): Promise<LanguageModelV3StreamResult> {
|
||||
const messages = convertPromptToOllamaMessages(callOptions.prompt);
|
||||
const tools = convertToolsToOllamaTools(callOptions.tools);
|
||||
|
||||
const ollamaModelOptions: Record<string, unknown> = { ...ollamaOpts };
|
||||
if (callOptions.temperature !== undefined) ollamaModelOptions.temperature = callOptions.temperature;
|
||||
if (callOptions.topP !== undefined) ollamaModelOptions.top_p = callOptions.topP;
|
||||
if (callOptions.topK !== undefined) ollamaModelOptions.top_k = callOptions.topK;
|
||||
if (callOptions.maxOutputTokens !== undefined) ollamaModelOptions.num_predict = callOptions.maxOutputTokens;
|
||||
if (callOptions.seed !== undefined) ollamaModelOptions.seed = callOptions.seed;
|
||||
if (callOptions.stopSequences) ollamaModelOptions.stop = callOptions.stopSequences;
|
||||
const { think, ...modelOpts } = ollamaModelOptions;
|
||||
|
||||
const requestBody: Record<string, unknown> = {
|
||||
model: modelId,
|
||||
messages,
|
||||
stream: true,
|
||||
options: modelOpts,
|
||||
};
|
||||
|
||||
if (ollamaOpts.think !== undefined) {
|
||||
requestBody.think = ollamaOpts.think;
|
||||
}
|
||||
|
||||
if (tools) requestBody.tools = tools;
|
||||
|
||||
const response = await fetch(`${baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(requestBody),
|
||||
signal: callOptions.abortSignal,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const body = await response.text();
|
||||
throw new Error(`Ollama API error ${response.status}: ${body}`);
|
||||
}
|
||||
|
||||
const reader = response.body!.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
const textId = generateId();
|
||||
const reasoningId = generateId();
|
||||
let textStarted = false;
|
||||
let reasoningStarted = false;
|
||||
let hasToolCalls = false;
|
||||
let closed = false;
|
||||
|
||||
const stream = new ReadableStream<LanguageModelV3StreamPart>({
|
||||
async pull(controller) {
|
||||
if (closed) return;
|
||||
|
||||
const processLine = (line: string) => {
|
||||
if (!line.trim()) return;
|
||||
let json: Record<string, unknown>;
|
||||
try {
|
||||
json = JSON.parse(line);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
|
||||
const msg = json.message as Record<string, unknown> | undefined;
|
||||
|
||||
// Handle thinking/reasoning content
|
||||
if (msg?.thinking && typeof msg.thinking === 'string') {
|
||||
if (!reasoningStarted) {
|
||||
reasoningStarted = true;
|
||||
controller.enqueue({ type: 'reasoning-start', id: reasoningId });
|
||||
}
|
||||
controller.enqueue({ type: 'reasoning-delta', id: reasoningId, delta: msg.thinking });
|
||||
}
|
||||
|
||||
// Handle text content
|
||||
if (msg?.content && typeof msg.content === 'string') {
|
||||
if (reasoningStarted && !textStarted) {
|
||||
controller.enqueue({ type: 'reasoning-end', id: reasoningId });
|
||||
}
|
||||
if (!textStarted) {
|
||||
textStarted = true;
|
||||
controller.enqueue({ type: 'text-start', id: textId });
|
||||
}
|
||||
controller.enqueue({ type: 'text-delta', id: textId, delta: msg.content });
|
||||
}
|
||||
|
||||
// Handle tool calls
|
||||
if (Array.isArray(msg?.tool_calls)) {
|
||||
hasToolCalls = true;
|
||||
for (const tc of msg!.tool_calls as Array<Record<string, unknown>>) {
|
||||
const fn = tc.function as Record<string, unknown>;
|
||||
const callId = generateId();
|
||||
controller.enqueue({
|
||||
type: 'tool-call',
|
||||
toolCallId: callId,
|
||||
toolName: fn.name as string,
|
||||
input: JSON.stringify(fn.arguments),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Handle done
|
||||
if (json.done) {
|
||||
if (reasoningStarted && !textStarted) {
|
||||
controller.enqueue({ type: 'reasoning-end', id: reasoningId });
|
||||
}
|
||||
if (textStarted) {
|
||||
controller.enqueue({ type: 'text-end', id: textId });
|
||||
}
|
||||
controller.enqueue({
|
||||
type: 'finish',
|
||||
finishReason: hasToolCalls
|
||||
? makeFinishReason('tool_calls')
|
||||
: makeFinishReason('stop'),
|
||||
usage: makeUsage(
|
||||
(json.prompt_eval_count as number) ?? undefined,
|
||||
(json.eval_count as number) ?? undefined,
|
||||
),
|
||||
});
|
||||
closed = true;
|
||||
controller.close();
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
let buffer = '';
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) {
|
||||
if (buffer.trim()) processLine(buffer);
|
||||
if (!closed) {
|
||||
controller.enqueue({
|
||||
type: 'finish',
|
||||
finishReason: makeFinishReason('stop'),
|
||||
usage: makeUsage(undefined, undefined),
|
||||
});
|
||||
closed = true;
|
||||
controller.close();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() || '';
|
||||
for (const line of lines) {
|
||||
processLine(line);
|
||||
if (closed) return;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
if (!closed) {
|
||||
controller.error(error);
|
||||
closed = true;
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
return {
|
||||
stream,
|
||||
request: { body: requestBody },
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
return model;
|
||||
}
|
||||
Reference in New Issue
Block a user