import * as plugins from './smartocr.plugins.js'; import * as paths from './smartocr.paths.js'; import type { ISmartOcrConstructorOptions, ISmartOcrImageAiOptions, ISmartOcrPdfAiOptions, TSmartOcrAiResult, } from './smartocr.interfaces.js'; const defaultSmartAiPdfOcrSystemMessage = 'You are a precise OCR engine. Extract text faithfully and do not summarize.'; const defaultSmartAiPdfOcrUserMessage = 'Extract all readable text from this PDF. Preserve page order, line breaks, table structure, and obvious headings. Return only the extracted text.'; export class SmartOcr { // STATIC public static async createAndInit(optionsArg: ISmartOcrConstructorOptions = {}) { const smartocrInstance = new SmartOcr(optionsArg); await smartocrInstance.init(); return smartocrInstance; } // INSTANCE private options: ISmartOcrConstructorOptions; public readyDeferred = plugins.smartpromise.defer(); public smartfileFactory = plugins.smartfile.SmartFileFactory.nodeFs(); public smartshellInstance: plugins.smartshell.Smartshell; public async processPdfBuffer (pdfBufferArg: Buffer): Promise { const uniqueString = plugins.smartunique.uni('doc_'); const originalPath = plugins.path.join(paths.noGitDir, `${uniqueString}.pdf`); const processedPath = plugins.path.join(paths.noGitDir, `${uniqueString}_processed.pdf`); const originalSmartfile = this.smartfileFactory.fromBuffer(originalPath, pdfBufferArg, paths.noGitDir); await originalSmartfile.write(); await this.smartshellInstance.exec(`ocrmypdf --rotate-pages ${originalPath} ${processedPath}`); const processedSmartfile = await this.smartfileFactory.fromFilePath(processedPath, paths.noGitDir); await originalSmartfile.delete(); await processedSmartfile.delete(); return processedSmartfile.contentBuffer; } public async recognizeImageBufferWithSmartAi( imageBufferArg: Buffer, optionsArg: ISmartOcrImageAiOptions, ): Promise { const smartAiOcrEngine = this.getSmartAiOcrEngine(optionsArg); return smartAiOcrEngine.recognizeImage( { dataBase64: imageBufferArg.toString('base64'), mimeType: optionsArg.mimeType, }, optionsArg.recognizeOptions, ); } public async extractTextFromPdfBufferWithSmartAi( pdfBufferArg: Buffer, optionsArg: ISmartOcrPdfAiOptions, ): Promise { return plugins.smartaiDocument.analyzeDocuments({ model: optionsArg.model, systemMessage: optionsArg.systemMessage ?? defaultSmartAiPdfOcrSystemMessage, userMessage: optionsArg.userMessage ?? defaultSmartAiPdfOcrUserMessage, pdfDocuments: [pdfBufferArg], messageHistory: optionsArg.messageHistory, }); } constructor(optionsArg: ISmartOcrConstructorOptions = {}) { this.options = optionsArg; this.smartshellInstance = new plugins.smartshell.Smartshell({ executor: 'bash' }); } public async init() { await this.smartfileFactory.getSmartFs().directory(paths.noGitDir).recursive().create(); const result = await plugins.smartshell.which('ocrmypdf'); } private getSmartAiOcrEngine(optionsArg: ISmartOcrImageAiOptions): plugins.smartaiOcr.ISmartAiOcrEngine { if (optionsArg.smartAiOcrEngine) { return optionsArg.smartAiOcrEngine; } if (this.options.smartAiOcrEngine) { return this.options.smartAiOcrEngine; } const mistralOcrOptions = { ...this.options.mistralOcrOptions, ...optionsArg.mistralOcrOptions, }; const apiKey = mistralOcrOptions.apiKey ?? process.env.MISTRAL_API_KEY; if (!apiKey) { throw new Error('SmartAI OCR requires smartAiOcrEngine, mistralOcrOptions.apiKey, or MISTRAL_API_KEY.'); } return plugins.smartaiOcr.createMistralOcrEngine({ ...mistralOcrOptions, apiKey, }); } }