From b9745a1869e663b104807428e1cf1f99979952e1 Mon Sep 17 00:00:00 2001 From: Juergen Kunz Date: Fri, 10 Oct 2025 07:32:21 +0000 Subject: [PATCH] fix(MultiModalModel): Lazy-load SmartPdf and guard document processing across providers; ensure SmartPdf is initialized only when needed --- changelog.md | 10 ++++++++++ ts/00_commitinfo_data.ts | 2 +- ts/abstract.classes.multimodal.ts | 22 +++++++++++++++++----- ts/provider.anthropic.ts | 5 ++++- ts/provider.ollama.ts | 5 ++++- ts/provider.openai.ts | 5 ++++- ts/provider.xai.ts | 7 +++++-- 7 files changed, 45 insertions(+), 11 deletions(-) diff --git a/changelog.md b/changelog.md index 655df75..3cba9d0 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,15 @@ # Changelog +## 2025-10-10 - 0.7.7 - fix(MultiModalModel) +Lazy-load SmartPdf and guard document processing across providers; ensure SmartPdf is initialized only when needed + +- Make SmartPdf lazy-loaded: smartpdfInstance is now nullable and no longer started automatically in start() +- Add ensureSmartpdfReady() to initialize and start SmartPdf on demand before document processing +- Providers updated (OpenAI, Anthropic, Ollama, xAI) to call ensureSmartpdfReady() and use the smartpdfInstance for PDF -> image conversion +- stop() now cleans up and nullifies smartpdfInstance to release resources +- Avoids starting a browser/process unless document() is actually used (reduces unnecessary resource usage) +- Add local Claude permissions file (.claude/settings.local.json) for tooling/configuration + ## 2025-10-09 - 0.7.6 - fix(provider.elevenlabs) Provide default ElevenLabs TTS voice fallback and add local tool/project configs diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts index ce7364d..545f843 100644 --- a/ts/00_commitinfo_data.ts +++ b/ts/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: '@push.rocks/smartai', - version: '0.7.6', + version: '0.7.7', description: 'SmartAi is a versatile TypeScript library designed to facilitate integration and interaction with various AI models, offering functionalities for chat, audio generation, document processing, and vision tasks.' } diff --git a/ts/abstract.classes.multimodal.ts b/ts/abstract.classes.multimodal.ts index 990205c..3b80d35 100644 --- a/ts/abstract.classes.multimodal.ts +++ b/ts/abstract.classes.multimodal.ts @@ -111,19 +111,30 @@ export interface ImageResponse { export abstract class MultiModalModel { /** * SmartPdf instance for document processing - * Shared across all methods that need PDF functionality + * Lazy-loaded only when PDF processing is needed to avoid starting browser unnecessarily */ - protected smartpdfInstance: plugins.smartpdf.SmartPdf; + protected smartpdfInstance: plugins.smartpdf.SmartPdf | null = null; + + /** + * Ensures SmartPdf instance is initialized and ready + * Call this before using smartpdfInstance in document processing methods + */ + protected async ensureSmartpdfReady(): Promise { + if (!this.smartpdfInstance) { + this.smartpdfInstance = new plugins.smartpdf.SmartPdf(); + await this.smartpdfInstance.start(); + } + } /** * Initializes the model and any necessary resources * Should be called before using any other methods */ public async start(): Promise { - this.smartpdfInstance = new plugins.smartpdf.SmartPdf(); - await this.smartpdfInstance.start(); + // SmartPdf is now lazy-loaded only when needed for PDF processing + // This avoids starting a browser unless document() method is actually used } - + /** * Cleans up any resources used by the model * Should be called when the model is no longer needed @@ -131,6 +142,7 @@ export abstract class MultiModalModel { public async stop(): Promise { if (this.smartpdfInstance) { await this.smartpdfInstance.stop(); + this.smartpdfInstance = null; } } diff --git a/ts/provider.anthropic.ts b/ts/provider.anthropic.ts index fb85d3a..288b0fe 100644 --- a/ts/provider.anthropic.ts +++ b/ts/provider.anthropic.ts @@ -192,11 +192,14 @@ export class AnthropicProvider extends MultiModalModel { pdfDocuments: Uint8Array[]; messageHistory: ChatMessage[]; }): Promise<{ message: any }> { + // Ensure SmartPdf is initialized before processing documents + await this.ensureSmartpdfReady(); + // Convert PDF documents to images using SmartPDF let documentImageBytesArray: Uint8Array[] = []; for (const pdfDocument of optionsArg.pdfDocuments) { - const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument); + const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument); documentImageBytesArray = documentImageBytesArray.concat(documentImageArray); } diff --git a/ts/provider.ollama.ts b/ts/provider.ollama.ts index e1f588e..e270305 100644 --- a/ts/provider.ollama.ts +++ b/ts/provider.ollama.ts @@ -216,11 +216,14 @@ export class OllamaProvider extends MultiModalModel { pdfDocuments: Uint8Array[]; messageHistory: ChatMessage[]; }): Promise<{ message: any }> { + // Ensure SmartPdf is initialized before processing documents + await this.ensureSmartpdfReady(); + // Convert PDF documents to images using SmartPDF let documentImageBytesArray: Uint8Array[] = []; for (const pdfDocument of optionsArg.pdfDocuments) { - const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument); + const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument); documentImageBytesArray = documentImageBytesArray.concat(documentImageArray); } diff --git a/ts/provider.openai.ts b/ts/provider.openai.ts index 47a1614..4b2984a 100644 --- a/ts/provider.openai.ts +++ b/ts/provider.openai.ts @@ -173,11 +173,14 @@ export class OpenAiProvider extends MultiModalModel { content: any; }[]; }) { + // Ensure SmartPdf is initialized before processing documents + await this.ensureSmartpdfReady(); + let pdfDocumentImageBytesArray: Uint8Array[] = []; // Convert each PDF into one or more image byte arrays. for (const pdfDocument of optionsArg.pdfDocuments) { - const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument); + const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument); pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray); } diff --git a/ts/provider.xai.ts b/ts/provider.xai.ts index ea6c0f3..4b8e148 100644 --- a/ts/provider.xai.ts +++ b/ts/provider.xai.ts @@ -149,11 +149,14 @@ export class XAIProvider extends MultiModalModel { pdfDocuments: Uint8Array[]; messageHistory: { role: string; content: string; }[]; }): Promise<{ message: any }> { + // Ensure SmartPdf is initialized before processing documents + await this.ensureSmartpdfReady(); + // First convert PDF documents to images let pdfDocumentImageBytesArray: Uint8Array[] = []; - + for (const pdfDocument of optionsArg.pdfDocuments) { - const documentImageArray = await this.smartpdfInstance.convertPDFToPngBytes(pdfDocument); + const documentImageArray = await this.smartpdfInstance!.convertPDFToPngBytes(pdfDocument); pdfDocumentImageBytesArray = pdfDocumentImageBytesArray.concat(documentImageArray); }