Files
smartocr/ts/smartocr.classes.smartocr.ts
T

103 lines
3.8 KiB
TypeScript

import * as plugins from './smartocr.plugins.js';
import * as paths from './smartocr.paths.js';
import type {
ISmartOcrConstructorOptions,
ISmartOcrImageAiOptions,
ISmartOcrPdfAiOptions,
TSmartOcrAiResult,
} from './smartocr.interfaces.js';
const defaultSmartAiPdfOcrSystemMessage = 'You are a precise OCR engine. Extract text faithfully and do not summarize.';
const defaultSmartAiPdfOcrUserMessage = 'Extract all readable text from this PDF. Preserve page order, line breaks, table structure, and obvious headings. Return only the extracted text.';
export class SmartOcr {
// STATIC
public static async createAndInit(optionsArg: ISmartOcrConstructorOptions = {}) {
const smartocrInstance = new SmartOcr(optionsArg);
await smartocrInstance.init();
return smartocrInstance;
}
// INSTANCE
private options: ISmartOcrConstructorOptions;
public readyDeferred = plugins.smartpromise.defer();
public smartfileFactory = plugins.smartfile.SmartFileFactory.nodeFs();
public smartshellInstance: plugins.smartshell.Smartshell;
public async processPdfBuffer (pdfBufferArg: Buffer): Promise<Buffer> {
const uniqueString = plugins.smartunique.uni('doc_');
const originalPath = plugins.path.join(paths.noGitDir, `${uniqueString}.pdf`);
const processedPath = plugins.path.join(paths.noGitDir, `${uniqueString}_processed.pdf`);
const originalSmartfile = this.smartfileFactory.fromBuffer(originalPath, pdfBufferArg, paths.noGitDir);
await originalSmartfile.write();
await this.smartshellInstance.exec(`ocrmypdf --rotate-pages ${originalPath} ${processedPath}`);
const processedSmartfile = await this.smartfileFactory.fromFilePath(processedPath, paths.noGitDir);
await originalSmartfile.delete();
await processedSmartfile.delete();
return processedSmartfile.contentBuffer;
}
public async recognizeImageBufferWithSmartAi(
imageBufferArg: Buffer,
optionsArg: ISmartOcrImageAiOptions,
): Promise<TSmartOcrAiResult> {
const smartAiOcrEngine = this.getSmartAiOcrEngine(optionsArg);
return smartAiOcrEngine.recognizeImage(
{
dataBase64: imageBufferArg.toString('base64'),
mimeType: optionsArg.mimeType,
},
optionsArg.recognizeOptions,
);
}
public async extractTextFromPdfBufferWithSmartAi(
pdfBufferArg: Buffer,
optionsArg: ISmartOcrPdfAiOptions,
): Promise<string> {
return plugins.smartaiDocument.analyzeDocuments({
model: optionsArg.model,
systemMessage: optionsArg.systemMessage ?? defaultSmartAiPdfOcrSystemMessage,
userMessage: optionsArg.userMessage ?? defaultSmartAiPdfOcrUserMessage,
pdfDocuments: [pdfBufferArg],
messageHistory: optionsArg.messageHistory,
});
}
constructor(optionsArg: ISmartOcrConstructorOptions = {}) {
this.options = optionsArg;
this.smartshellInstance = new plugins.smartshell.Smartshell({
executor: 'bash'
});
}
public async init() {
await this.smartfileFactory.getSmartFs().directory(paths.noGitDir).recursive().create();
const result = await plugins.smartshell.which('ocrmypdf');
}
private getSmartAiOcrEngine(optionsArg: ISmartOcrImageAiOptions): plugins.smartaiOcr.ISmartAiOcrEngine {
if (optionsArg.smartAiOcrEngine) {
return optionsArg.smartAiOcrEngine;
}
if (this.options.smartAiOcrEngine) {
return this.options.smartAiOcrEngine;
}
const mistralOcrOptions = {
...this.options.mistralOcrOptions,
...optionsArg.mistralOcrOptions,
};
const apiKey = mistralOcrOptions.apiKey ?? process.env.MISTRAL_API_KEY;
if (!apiKey) {
throw new Error('SmartAI OCR requires smartAiOcrEngine, mistralOcrOptions.apiKey, or MISTRAL_API_KEY.');
}
return plugins.smartaiOcr.createMistralOcrEngine({
...mistralOcrOptions,
apiKey,
});
}
}