205 lines
5.9 KiB
TypeScript
205 lines
5.9 KiB
TypeScript
import * as plugins from './plugins.js';
|
|
|
|
/**
|
|
* Message format for chat interactions
|
|
*/
|
|
export interface ChatMessage {
|
|
role: 'assistant' | 'user' | 'system';
|
|
content: string;
|
|
}
|
|
|
|
/**
|
|
* Options for chat interactions
|
|
*/
|
|
export interface ChatOptions {
|
|
systemMessage: string;
|
|
userMessage: string;
|
|
messageHistory: ChatMessage[];
|
|
}
|
|
|
|
/**
|
|
* Response format for chat interactions
|
|
*/
|
|
export interface ChatResponse {
|
|
role: 'assistant';
|
|
message: string;
|
|
}
|
|
|
|
/**
|
|
* Options for research interactions
|
|
*/
|
|
export interface ResearchOptions {
|
|
query: string;
|
|
searchDepth?: 'basic' | 'advanced' | 'deep';
|
|
maxSources?: number;
|
|
includeWebSearch?: boolean;
|
|
background?: boolean;
|
|
}
|
|
|
|
/**
|
|
* Response format for research interactions
|
|
*/
|
|
export interface ResearchResponse {
|
|
answer: string;
|
|
sources: Array<{
|
|
url: string;
|
|
title: string;
|
|
snippet: string;
|
|
}>;
|
|
searchQueries?: string[];
|
|
metadata?: any;
|
|
}
|
|
|
|
/**
|
|
* Options for image generation
|
|
*/
|
|
export interface ImageGenerateOptions {
|
|
prompt: string;
|
|
model?: 'gpt-image-1' | 'dall-e-3' | 'dall-e-2';
|
|
quality?: 'low' | 'medium' | 'high' | 'standard' | 'hd' | 'auto';
|
|
size?: '256x256' | '512x512' | '1024x1024' | '1536x1024' | '1024x1536' | '1792x1024' | '1024x1792' | 'auto';
|
|
style?: 'vivid' | 'natural';
|
|
background?: 'transparent' | 'opaque' | 'auto';
|
|
outputFormat?: 'png' | 'jpeg' | 'webp';
|
|
outputCompression?: number; // 0-100 for webp/jpeg
|
|
moderation?: 'low' | 'auto';
|
|
n?: number; // Number of images to generate
|
|
stream?: boolean;
|
|
partialImages?: number; // 0-3 for streaming
|
|
}
|
|
|
|
/**
|
|
* Options for image editing
|
|
*/
|
|
export interface ImageEditOptions {
|
|
image: Buffer;
|
|
prompt: string;
|
|
mask?: Buffer;
|
|
model?: 'gpt-image-1' | 'dall-e-2';
|
|
quality?: 'low' | 'medium' | 'high' | 'standard' | 'auto';
|
|
size?: '256x256' | '512x512' | '1024x1024' | '1536x1024' | '1024x1536' | 'auto';
|
|
background?: 'transparent' | 'opaque' | 'auto';
|
|
outputFormat?: 'png' | 'jpeg' | 'webp';
|
|
outputCompression?: number;
|
|
n?: number;
|
|
stream?: boolean;
|
|
partialImages?: number;
|
|
}
|
|
|
|
/**
|
|
* Response format for image operations
|
|
*/
|
|
export interface ImageResponse {
|
|
images: Array<{
|
|
b64_json?: string;
|
|
url?: string;
|
|
revisedPrompt?: string;
|
|
}>;
|
|
metadata?: {
|
|
model: string;
|
|
quality?: string;
|
|
size?: string;
|
|
outputFormat?: string;
|
|
tokensUsed?: number;
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Abstract base class for multi-modal AI models.
|
|
* Provides a common interface for different AI providers (OpenAI, Anthropic, Perplexity, Ollama)
|
|
*/
|
|
export abstract class MultiModalModel {
|
|
/**
|
|
* SmartPdf instance for document processing
|
|
* Shared across all methods that need PDF functionality
|
|
*/
|
|
protected smartpdfInstance: plugins.smartpdf.SmartPdf;
|
|
|
|
/**
|
|
* Initializes the model and any necessary resources
|
|
* Should be called before using any other methods
|
|
*/
|
|
public async start(): Promise<void> {
|
|
this.smartpdfInstance = new plugins.smartpdf.SmartPdf();
|
|
await this.smartpdfInstance.start();
|
|
}
|
|
|
|
/**
|
|
* Cleans up any resources used by the model
|
|
* Should be called when the model is no longer needed
|
|
*/
|
|
public async stop(): Promise<void> {
|
|
if (this.smartpdfInstance) {
|
|
await this.smartpdfInstance.stop();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Synchronous chat interaction with the model
|
|
* @param optionsArg Options containing system message, user message, and message history
|
|
* @returns Promise resolving to the assistant's response
|
|
*/
|
|
public abstract chat(optionsArg: ChatOptions): Promise<ChatResponse>;
|
|
|
|
/**
|
|
* Streaming interface for chat interactions
|
|
* Allows for real-time responses from the model
|
|
* @param input Stream of user messages
|
|
* @returns Stream of model responses
|
|
*/
|
|
public abstract chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>>;
|
|
|
|
/**
|
|
* Text-to-speech conversion
|
|
* @param optionsArg Options containing the message to convert to speech
|
|
* @returns Promise resolving to a readable stream of audio data
|
|
* @throws Error if the provider doesn't support audio generation
|
|
*/
|
|
public abstract audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream>;
|
|
|
|
/**
|
|
* Vision-language processing
|
|
* @param optionsArg Options containing the image and prompt for analysis
|
|
* @returns Promise resolving to the model's description or analysis of the image
|
|
* @throws Error if the provider doesn't support vision tasks
|
|
*/
|
|
public abstract vision(optionsArg: { image: Buffer; prompt: string }): Promise<string>;
|
|
|
|
/**
|
|
* Document analysis and processing
|
|
* @param optionsArg Options containing system message, user message, PDF documents, and message history
|
|
* @returns Promise resolving to the model's analysis of the documents
|
|
* @throws Error if the provider doesn't support document processing
|
|
*/
|
|
public abstract document(optionsArg: {
|
|
systemMessage: string;
|
|
userMessage: string;
|
|
pdfDocuments: Uint8Array[];
|
|
messageHistory: ChatMessage[];
|
|
}): Promise<{ message: any }>;
|
|
|
|
/**
|
|
* Research and web search capabilities
|
|
* @param optionsArg Options containing the research query and configuration
|
|
* @returns Promise resolving to the research results with sources
|
|
* @throws Error if the provider doesn't support research capabilities
|
|
*/
|
|
public abstract research(optionsArg: ResearchOptions): Promise<ResearchResponse>;
|
|
|
|
/**
|
|
* Image generation from text prompts
|
|
* @param optionsArg Options containing the prompt and generation parameters
|
|
* @returns Promise resolving to the generated image(s)
|
|
* @throws Error if the provider doesn't support image generation
|
|
*/
|
|
public abstract imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse>;
|
|
|
|
/**
|
|
* Image editing and inpainting
|
|
* @param optionsArg Options containing the image, prompt, and editing parameters
|
|
* @returns Promise resolving to the edited image(s)
|
|
* @throws Error if the provider doesn't support image editing
|
|
*/
|
|
public abstract imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse>;
|
|
}
|