Files
smartai/ts/abstract.classes.multimodal.ts

217 lines
6.4 KiB
TypeScript
Raw Permalink Normal View History

import * as plugins from './plugins.js';
/**
* Message format for chat interactions
*/
export interface ChatMessage {
role: 'assistant' | 'user' | 'system';
content: string;
}
/**
* Options for chat interactions
*/
export interface ChatOptions {
systemMessage: string;
userMessage: string;
messageHistory: ChatMessage[];
}
/**
* Response format for chat interactions
*/
export interface ChatResponse {
role: 'assistant';
message: string;
}
/**
* Options for research interactions
*/
export interface ResearchOptions {
query: string;
searchDepth?: 'basic' | 'advanced' | 'deep';
maxSources?: number;
includeWebSearch?: boolean;
background?: boolean;
}
/**
* Response format for research interactions
*/
export interface ResearchResponse {
answer: string;
sources: Array<{
url: string;
title: string;
snippet: string;
}>;
searchQueries?: string[];
metadata?: any;
}
/**
* Options for image generation
*/
export interface ImageGenerateOptions {
prompt: string;
model?: 'gpt-image-1' | 'dall-e-3' | 'dall-e-2';
quality?: 'low' | 'medium' | 'high' | 'standard' | 'hd' | 'auto';
size?: '256x256' | '512x512' | '1024x1024' | '1536x1024' | '1024x1536' | '1792x1024' | '1024x1792' | 'auto';
style?: 'vivid' | 'natural';
background?: 'transparent' | 'opaque' | 'auto';
outputFormat?: 'png' | 'jpeg' | 'webp';
outputCompression?: number; // 0-100 for webp/jpeg
moderation?: 'low' | 'auto';
n?: number; // Number of images to generate
stream?: boolean;
partialImages?: number; // 0-3 for streaming
}
/**
* Options for image editing
*/
export interface ImageEditOptions {
image: Buffer;
prompt: string;
mask?: Buffer;
model?: 'gpt-image-1' | 'dall-e-2';
quality?: 'low' | 'medium' | 'high' | 'standard' | 'auto';
size?: '256x256' | '512x512' | '1024x1024' | '1536x1024' | '1024x1536' | 'auto';
background?: 'transparent' | 'opaque' | 'auto';
outputFormat?: 'png' | 'jpeg' | 'webp';
outputCompression?: number;
n?: number;
stream?: boolean;
partialImages?: number;
}
/**
* Response format for image operations
*/
export interface ImageResponse {
images: Array<{
b64_json?: string;
url?: string;
revisedPrompt?: string;
}>;
metadata?: {
model: string;
quality?: string;
size?: string;
outputFormat?: string;
tokensUsed?: number;
};
}
/**
* Abstract base class for multi-modal AI models.
* Provides a common interface for different AI providers (OpenAI, Anthropic, Perplexity, Ollama)
*/
2024-04-04 02:47:44 +02:00
export abstract class MultiModalModel {
/**
* SmartPdf instance for document processing
* Lazy-loaded only when PDF processing is needed to avoid starting browser unnecessarily
*/
protected smartpdfInstance: plugins.smartpdf.SmartPdf | null = null;
/**
* Ensures SmartPdf instance is initialized and ready
* Call this before using smartpdfInstance in document processing methods
*/
protected async ensureSmartpdfReady(): Promise<void> {
if (!this.smartpdfInstance) {
this.smartpdfInstance = new plugins.smartpdf.SmartPdf();
await this.smartpdfInstance.start();
}
}
2024-04-04 02:47:44 +02:00
/**
* Initializes the model and any necessary resources
* Should be called before using any other methods
2024-04-04 02:47:44 +02:00
*/
public async start(): Promise<void> {
// SmartPdf is now lazy-loaded only when needed for PDF processing
// This avoids starting a browser unless document() method is actually used
}
2024-04-04 02:47:44 +02:00
/**
* Cleans up any resources used by the model
* Should be called when the model is no longer needed
2024-04-04 02:47:44 +02:00
*/
public async stop(): Promise<void> {
if (this.smartpdfInstance) {
await this.smartpdfInstance.stop();
this.smartpdfInstance = null;
}
}
2024-04-25 10:49:07 +02:00
/**
* Synchronous chat interaction with the model
* @param optionsArg Options containing system message, user message, and message history
* @returns Promise resolving to the assistant's response
*/
public abstract chat(optionsArg: ChatOptions): Promise<ChatResponse>;
2024-03-31 01:32:37 +01:00
2024-04-25 10:49:07 +02:00
/**
* Streaming interface for chat interactions
* Allows for real-time responses from the model
* @param input Stream of user messages
* @returns Stream of model responses
2024-04-25 10:49:07 +02:00
*/
public abstract chatStream(input: ReadableStream<Uint8Array>): Promise<ReadableStream<string>>;
2024-04-25 10:49:07 +02:00
/**
* Text-to-speech conversion
* @param optionsArg Options containing the message to convert to speech
* @returns Promise resolving to a readable stream of audio data
* @throws Error if the provider doesn't support audio generation
*/
public abstract audio(optionsArg: { message: string }): Promise<NodeJS.ReadableStream>;
/**
* Vision-language processing
* @param optionsArg Options containing the image and prompt for analysis
* @returns Promise resolving to the model's description or analysis of the image
* @throws Error if the provider doesn't support vision tasks
*/
public abstract vision(optionsArg: { image: Buffer; prompt: string }): Promise<string>;
/**
* Document analysis and processing
* @param optionsArg Options containing system message, user message, PDF documents, and message history
* @returns Promise resolving to the model's analysis of the documents
* @throws Error if the provider doesn't support document processing
*/
public abstract document(optionsArg: {
systemMessage: string;
userMessage: string;
pdfDocuments: Uint8Array[];
messageHistory: ChatMessage[];
}): Promise<{ message: any }>;
/**
* Research and web search capabilities
* @param optionsArg Options containing the research query and configuration
* @returns Promise resolving to the research results with sources
* @throws Error if the provider doesn't support research capabilities
*/
public abstract research(optionsArg: ResearchOptions): Promise<ResearchResponse>;
/**
* Image generation from text prompts
* @param optionsArg Options containing the prompt and generation parameters
* @returns Promise resolving to the generated image(s)
* @throws Error if the provider doesn't support image generation
*/
public abstract imageGenerate(optionsArg: ImageGenerateOptions): Promise<ImageResponse>;
/**
* Image editing and inpainting
* @param optionsArg Options containing the image, prompt, and editing parameters
* @returns Promise resolving to the edited image(s)
* @throws Error if the provider doesn't support image editing
*/
public abstract imageEdit(optionsArg: ImageEditOptions): Promise<ImageResponse>;
2024-03-31 01:32:37 +01:00
}