217 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			TypeScript
		
	
	
	
	
	
			
		
		
	
	
			217 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			TypeScript
		
	
	
	
	
	
import * as plugins from './plugins.js';
 | 
						|
 | 
						|
/**
 | 
						|
 * Message format for chat interactions
 | 
						|
 */
 | 
						|
export interface ChatMessage {
 | 
						|
  role: 'assistant' | 'user' | 'system';
 | 
						|
  content: string;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Options for chat interactions
 | 
						|
 */
 | 
						|
export interface ChatOptions {
 | 
						|
  systemMessage: string;
 | 
						|
  userMessage: string;
 | 
						|
  messageHistory: ChatMessage[];
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Response format for chat interactions
 | 
						|
 */
 | 
						|
export interface ChatResponse {
 | 
						|
  role: 'assistant';
 | 
						|
  message: string;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Options for research interactions
 | 
						|
 */
 | 
						|
export interface ResearchOptions {
 | 
						|
  query: string;
 | 
						|
  searchDepth?: 'basic' | 'advanced' | 'deep';
 | 
						|
  maxSources?: number;
 | 
						|
  includeWebSearch?: boolean;
 | 
						|
  background?: boolean;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Response format for research interactions
 | 
						|
 */
 | 
						|
export interface ResearchResponse {
 | 
						|
  answer: string;
 | 
						|
  sources: Array<{
 | 
						|
    url: string;
 | 
						|
    title: string;
 | 
						|
    snippet: string;
 | 
						|
  }>;
 | 
						|
  searchQueries?: string[];
 | 
						|
  metadata?: any;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Options for image generation
 | 
						|
 */
 | 
						|
export interface ImageGenerateOptions {
 | 
						|
  prompt: string;
 | 
						|
  model?: 'gpt-image-1' | 'dall-e-3' | 'dall-e-2';
 | 
						|
  quality?: 'low' | 'medium' | 'high' | 'standard' | 'hd' | 'auto';
 | 
						|
  size?: '256x256' | '512x512' | '1024x1024' | '1536x1024' | '1024x1536' | '1792x1024' | '1024x1792' | 'auto';
 | 
						|
  style?: 'vivid' | 'natural';
 | 
						|
  background?: 'transparent' | 'opaque' | 'auto';
 | 
						|
  outputFormat?: 'png' | 'jpeg' | 'webp';
 | 
						|
  outputCompression?: number; // 0-100 for webp/jpeg
 | 
						|
  moderation?: 'low' | 'auto';
 | 
						|
  n?: number; // Number of images to generate
 | 
						|
  stream?: boolean;
 | 
						|
  partialImages?: number; // 0-3 for streaming
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Options for image editing
 | 
						|
 */
 | 
						|
export interface ImageEditOptions {
 | 
						|
  image: Buffer;
 | 
						|
  prompt: string;
 | 
						|
  mask?: Buffer;
 | 
						|
  model?: 'gpt-image-1' | 'dall-e-2';
 | 
						|
  quality?: 'low' | 'medium' | 'high' | 'standard' | 'auto';
 | 
						|
  size?: '256x256' | '512x512' | '1024x1024' | '1536x1024' | '1024x1536' | 'auto';
 | 
						|
  background?: 'transparent' | 'opaque' | 'auto';
 | 
						|
  outputFormat?: 'png' | 'jpeg' | 'webp';
 | 
						|
  outputCompression?: number;
 | 
						|
  n?: number;
 | 
						|
  stream?: boolean;
 | 
						|
  partialImages?: number;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Response format for image operations
 | 
						|
 */
 | 
						|
export interface ImageResponse {
 | 
						|
  images: Array<{
 | 
						|
    b64_json?: string;
 | 
						|
    url?: string;
 | 
						|
    revisedPrompt?: string;
 | 
						|
  }>;
 | 
						|
  metadata?: {
 | 
						|
    model: string;
 | 
						|
    quality?: string;
 | 
						|
    size?: string;
 | 
						|
    outputFormat?: string;
 | 
						|
    tokensUsed?: number;
 | 
						|
  };
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Abstract base class for multi-modal AI models.
 | 
						|
 * Provides a common interface for different AI providers (OpenAI, Anthropic, Perplexity, Ollama)
 | 
						|
 */
 | 
						|
export abstract class MultiModalModel {
 | 
						|
  /**
 | 
						|
   * SmartPdf instance for document processing
 | 
						|
   * Lazy-loaded only when PDF processing is needed to avoid starting browser unnecessarily
 | 
						|
   */
 | 
						|
  protected smartpdfInstance: plugins.smartpdf.SmartPdf | null = null;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Ensures SmartPdf instance is initialized and ready
 | 
						|
   * Call this before using smartpdfInstance in document processing methods
 | 
						|
   */
 | 
						|
  protected async ensureSmartpdfReady(): Promise<void> {
 | 
						|
    if (!this.smartpdfInstance) {
 | 
						|
      this.smartpdfInstance = new plugins.smartpdf.SmartPdf();
 | 
						|
      await this.smartpdfInstance.start();
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Initializes the model and any necessary resources
 | 
						|
   * Should be called before using any other methods
 | 
						|
   */
 | 
						|
  public async start(): Promise<void> {
 | 
						|
    // SmartPdf is now lazy-loaded only when needed for PDF processing
 | 
						|
    // This avoids starting a browser unless document() method is actually used
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Cleans up any resources used by the model
 | 
						|
   * Should be called when the model is no longer needed
 | 
						|
   */
 | 
						|
  public async stop(): Promise<void> {
 | 
						|
    if (this.smartpdfInstance) {
 | 
						|
      await this.smartpdfInstance.stop();
 | 
						|
      this.smartpdfInstance = null;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  /**
 | 
						|
   * Synchronous chat interaction with the model
 | 
						|
   * @param optionsArg Options containing system message, user message, and message history
 | 
						|
   * @returns Promise resolving to the assistant's response
 | 
						|
   */
 | 
						|
  public abstract chat(optionsArg: ChatOptions): Promise<ChatResponse>;
 | 
						|
  
 | 
						|
  /**
 | 
						|
   * Streaming interface for chat interactions
 | 
						|
   * Allows for real-time responses from the model
 | 
						|
   * @param input Stream of user messages
 | 
						|
   * @returns Stream of model responses
 | 
						|
   */
 | 
						|
  public abstract chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>>;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Text-to-speech conversion
 | 
						|
   * @param optionsArg Options containing the message to convert to speech
 | 
						|
   * @returns Promise resolving to a readable stream of audio data
 | 
						|
   * @throws Error if the provider doesn't support audio generation
 | 
						|
   */
 | 
						|
  public abstract audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream>;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Vision-language processing
 | 
						|
   * @param optionsArg Options containing the image and prompt for analysis
 | 
						|
   * @returns Promise resolving to the model's description or analysis of the image
 | 
						|
   * @throws Error if the provider doesn't support vision tasks
 | 
						|
   */
 | 
						|
  public abstract vision(optionsArg: { image: Buffer; prompt: string }): Promise<string>;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Document analysis and processing
 | 
						|
   * @param optionsArg Options containing system message, user message, PDF documents, and message history
 | 
						|
   * @returns Promise resolving to the model's analysis of the documents
 | 
						|
   * @throws Error if the provider doesn't support document processing
 | 
						|
   */
 | 
						|
  public abstract document(optionsArg: {
 | 
						|
    systemMessage: string;
 | 
						|
    userMessage: string;
 | 
						|
    pdfDocuments: Uint8Array[];
 | 
						|
    messageHistory: ChatMessage[];
 | 
						|
  }): Promise<{ message: any }>;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Research and web search capabilities
 | 
						|
   * @param optionsArg Options containing the research query and configuration
 | 
						|
   * @returns Promise resolving to the research results with sources
 | 
						|
   * @throws Error if the provider doesn't support research capabilities
 | 
						|
   */
 | 
						|
  public abstract research(optionsArg: ResearchOptions): Promise<ResearchResponse>;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Image generation from text prompts
 | 
						|
   * @param optionsArg Options containing the prompt and generation parameters
 | 
						|
   * @returns Promise resolving to the generated image(s)
 | 
						|
   * @throws Error if the provider doesn't support image generation
 | 
						|
   */
 | 
						|
  public abstract imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse>;
 | 
						|
 | 
						|
  /**
 | 
						|
   * Image editing and inpainting
 | 
						|
   * @param optionsArg Options containing the image, prompt, and editing parameters
 | 
						|
   * @returns Promise resolving to the edited image(s)
 | 
						|
   * @throws Error if the provider doesn't support image editing
 | 
						|
   */
 | 
						|
  public abstract imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse>;
 | 
						|
}
 |