import * as plugins from './plugins.js'; /** * Message format for chat interactions */ export interface ChatMessage { role: 'assistant' | 'user' | 'system'; content: string; } /** * Options for chat interactions */ export interface ChatOptions { systemMessage: string; userMessage: string; messageHistory: ChatMessage[]; } /** * Response format for chat interactions */ export interface ChatResponse { role: 'assistant'; message: string; } /** * Options for research interactions */ export interface ResearchOptions { query: string; searchDepth?: 'basic' | 'advanced' | 'deep'; maxSources?: number; includeWebSearch?: boolean; background?: boolean; } /** * Response format for research interactions */ export interface ResearchResponse { answer: string; sources: Array<{ url: string; title: string; snippet: string; }>; searchQueries?: string[]; metadata?: any; } /** * Options for image generation */ export interface ImageGenerateOptions { prompt: string; model?: 'gpt-image-1' | 'dall-e-3' | 'dall-e-2'; quality?: 'low' | 'medium' | 'high' | 'standard' | 'hd' | 'auto'; size?: '256x256' | '512x512' | '1024x1024' | '1536x1024' | '1024x1536' | '1792x1024' | '1024x1792' | 'auto'; style?: 'vivid' | 'natural'; background?: 'transparent' | 'opaque' | 'auto'; outputFormat?: 'png' | 'jpeg' | 'webp'; outputCompression?: number; // 0-100 for webp/jpeg moderation?: 'low' | 'auto'; n?: number; // Number of images to generate stream?: boolean; partialImages?: number; // 0-3 for streaming } /** * Options for image editing */ export interface ImageEditOptions { image: Buffer; prompt: string; mask?: Buffer; model?: 'gpt-image-1' | 'dall-e-2'; quality?: 'low' | 'medium' | 'high' | 'standard' | 'auto'; size?: '256x256' | '512x512' | '1024x1024' | '1536x1024' | '1024x1536' | 'auto'; background?: 'transparent' | 'opaque' | 'auto'; outputFormat?: 'png' | 'jpeg' | 'webp'; outputCompression?: number; n?: number; stream?: boolean; partialImages?: number; } /** * Response format for image operations */ export interface ImageResponse { images: Array<{ b64_json?: string; url?: string; revisedPrompt?: string; }>; metadata?: { model: string; quality?: string; size?: string; outputFormat?: string; tokensUsed?: number; }; } /** * Abstract base class for multi-modal AI models. * Provides a common interface for different AI providers (OpenAI, Anthropic, Perplexity, Ollama) */ export abstract class MultiModalModel { /** * SmartPdf instance for document processing * Shared across all methods that need PDF functionality */ protected smartpdfInstance: plugins.smartpdf.SmartPdf; /** * Initializes the model and any necessary resources * Should be called before using any other methods */ public async start(): Promise { this.smartpdfInstance = new plugins.smartpdf.SmartPdf(); await this.smartpdfInstance.start(); } /** * Cleans up any resources used by the model * Should be called when the model is no longer needed */ public async stop(): Promise { if (this.smartpdfInstance) { await this.smartpdfInstance.stop(); } } /** * Synchronous chat interaction with the model * @param optionsArg Options containing system message, user message, and message history * @returns Promise resolving to the assistant's response */ public abstract chat(optionsArg: ChatOptions): Promise; /** * Streaming interface for chat interactions * Allows for real-time responses from the model * @param input Stream of user messages * @returns Stream of model responses */ public abstract chatStream(input: ReadableStream): Promise>; /** * Text-to-speech conversion * @param optionsArg Options containing the message to convert to speech * @returns Promise resolving to a readable stream of audio data * @throws Error if the provider doesn't support audio generation */ public abstract audio(optionsArg: { message: string }): Promise; /** * Vision-language processing * @param optionsArg Options containing the image and prompt for analysis * @returns Promise resolving to the model's description or analysis of the image * @throws Error if the provider doesn't support vision tasks */ public abstract vision(optionsArg: { image: Buffer; prompt: string }): Promise; /** * Document analysis and processing * @param optionsArg Options containing system message, user message, PDF documents, and message history * @returns Promise resolving to the model's analysis of the documents * @throws Error if the provider doesn't support document processing */ public abstract document(optionsArg: { systemMessage: string; userMessage: string; pdfDocuments: Uint8Array[]; messageHistory: ChatMessage[]; }): Promise<{ message: any }>; /** * Research and web search capabilities * @param optionsArg Options containing the research query and configuration * @returns Promise resolving to the research results with sources * @throws Error if the provider doesn't support research capabilities */ public abstract research(optionsArg: ResearchOptions): Promise; /** * Image generation from text prompts * @param optionsArg Options containing the prompt and generation parameters * @returns Promise resolving to the generated image(s) * @throws Error if the provider doesn't support image generation */ public abstract imageGenerate(optionsArg: ImageGenerateOptions): Promise; /** * Image editing and inpainting * @param optionsArg Options containing the image, prompt, and editing parameters * @returns Promise resolving to the edited image(s) * @throws Error if the provider doesn't support image editing */ public abstract imageEdit(optionsArg: ImageEditOptions): Promise; }