feat(ocr): add smartai extraction support

This commit is contained in:
2026-05-19 06:42:42 +00:00
parent d86a83d515
commit 30780e7514
8 changed files with 9864 additions and 3355 deletions
+1
View File
@@ -1 +1,2 @@
export * from './smartocr.classes.smartocr.js';
export type * from './smartocr.interfaces.js';
+69 -7
View File
@@ -1,40 +1,102 @@
import * as plugins from './smartocr.plugins.js';
import * as paths from './smartocr.paths.js';
import type {
ISmartOcrConstructorOptions,
ISmartOcrImageAiOptions,
ISmartOcrPdfAiOptions,
TSmartOcrAiResult,
} from './smartocr.interfaces.js';
const defaultSmartAiPdfOcrSystemMessage = 'You are a precise OCR engine. Extract text faithfully and do not summarize.';
const defaultSmartAiPdfOcrUserMessage = 'Extract all readable text from this PDF. Preserve page order, line breaks, table structure, and obvious headings. Return only the extracted text.';
export class SmartOcr {
// STATIC
public static async createAndInit() {
const smartocrInstance = new SmartOcr();
public static async createAndInit(optionsArg: ISmartOcrConstructorOptions = {}) {
const smartocrInstance = new SmartOcr(optionsArg);
await smartocrInstance.init();
return smartocrInstance;
}
// INSTANCE
private options: ISmartOcrConstructorOptions;
public readyDeferred = plugins.smartpromise.defer();
public smartfileFactory = plugins.smartfile.SmartFileFactory.nodeFs();
public smartshellInstance: plugins.smartshell.Smartshell;
public async processPdfBuffer (pdfBufferArg: Buffer): Promise<Buffer> {
const uniqueString = plugins.smartunique.uni('doc_');
const originalPath = plugins.path.join(paths.noGitDir, `${uniqueString}.pdf`);
const processedPath = plugins.path.join(paths.noGitDir, `${uniqueString}_processed.pdf`);
const originalSmartfile = await plugins.smartfile.SmartFile.fromBuffer(originalPath, pdfBufferArg);
const originalSmartfile = this.smartfileFactory.fromBuffer(originalPath, pdfBufferArg, paths.noGitDir);
await originalSmartfile.write();
await this.smartshellInstance.exec(`ocrmypdf --rotate-pages ${originalPath} ${processedPath}`);
const processedSmartfile = await plugins.smartfile.SmartFile.fromFilePath(processedPath);
const processedSmartfile = await this.smartfileFactory.fromFilePath(processedPath, paths.noGitDir);
await originalSmartfile.delete();
await processedSmartfile.delete();
return processedSmartfile.contentBuffer;
}
constructor() {
public async recognizeImageBufferWithSmartAi(
imageBufferArg: Buffer,
optionsArg: ISmartOcrImageAiOptions,
): Promise<TSmartOcrAiResult> {
const smartAiOcrEngine = this.getSmartAiOcrEngine(optionsArg);
return smartAiOcrEngine.recognizeImage(
{
dataBase64: imageBufferArg.toString('base64'),
mimeType: optionsArg.mimeType,
},
optionsArg.recognizeOptions,
);
}
public async extractTextFromPdfBufferWithSmartAi(
pdfBufferArg: Buffer,
optionsArg: ISmartOcrPdfAiOptions,
): Promise<string> {
return plugins.smartaiDocument.analyzeDocuments({
model: optionsArg.model,
systemMessage: optionsArg.systemMessage ?? defaultSmartAiPdfOcrSystemMessage,
userMessage: optionsArg.userMessage ?? defaultSmartAiPdfOcrUserMessage,
pdfDocuments: [pdfBufferArg],
messageHistory: optionsArg.messageHistory,
});
}
constructor(optionsArg: ISmartOcrConstructorOptions = {}) {
this.options = optionsArg;
this.smartshellInstance = new plugins.smartshell.Smartshell({
executor: 'bash'
});
}
public async init() {
await plugins.smartfile.fs.ensureDir(paths.noGitDir);
await this.smartfileFactory.getSmartFs().directory(paths.noGitDir).recursive().create();
const result = await plugins.smartshell.which('ocrmypdf');
}
}
private getSmartAiOcrEngine(optionsArg: ISmartOcrImageAiOptions): plugins.smartaiOcr.ISmartAiOcrEngine {
if (optionsArg.smartAiOcrEngine) {
return optionsArg.smartAiOcrEngine;
}
if (this.options.smartAiOcrEngine) {
return this.options.smartAiOcrEngine;
}
const mistralOcrOptions = {
...this.options.mistralOcrOptions,
...optionsArg.mistralOcrOptions,
};
const apiKey = mistralOcrOptions.apiKey ?? process.env.MISTRAL_API_KEY;
if (!apiKey) {
throw new Error('SmartAI OCR requires smartAiOcrEngine, mistralOcrOptions.apiKey, or MISTRAL_API_KEY.');
}
return plugins.smartaiOcr.createMistralOcrEngine({
...mistralOcrOptions,
apiKey,
});
}
}
+29
View File
@@ -0,0 +1,29 @@
import type { ModelMessage, LanguageModelV3 } from '@push.rocks/smartai';
import type {
ISmartAiMistralOcrOptions,
ISmartAiMistralOcrRecognizeOptions,
ISmartAiOcrEngine,
ISmartAiOcrResult,
TSmartAiOcrImageMimeType,
} from '@push.rocks/smartai/ocr';
export interface ISmartOcrConstructorOptions {
smartAiOcrEngine?: ISmartAiOcrEngine;
mistralOcrOptions?: ISmartAiMistralOcrOptions;
}
export interface ISmartOcrImageAiOptions {
mimeType: TSmartAiOcrImageMimeType;
smartAiOcrEngine?: ISmartAiOcrEngine;
mistralOcrOptions?: ISmartAiMistralOcrOptions;
recognizeOptions?: ISmartAiMistralOcrRecognizeOptions;
}
export interface ISmartOcrPdfAiOptions {
model: LanguageModelV3;
systemMessage?: string;
userMessage?: string;
messageHistory?: ModelMessage[];
}
export type TSmartOcrAiResult<TRaw = unknown> = ISmartAiOcrResult<TRaw>;
+4
View File
@@ -7,6 +7,8 @@ export {
// @pushrocks scope
import * as smartfile from '@push.rocks/smartfile';
import * as smartaiDocument from '@push.rocks/smartai/document';
import * as smartaiOcr from '@push.rocks/smartai/ocr';
import * as smartshell from '@push.rocks/smartshell';
import * as smartunique from '@push.rocks/smartunique';
import * as smartpath from '@push.rocks/smartpath';
@@ -14,6 +16,8 @@ import * as smartpromise from '@push.rocks/smartpromise';
export {
smartfile,
smartaiDocument,
smartaiOcr,
smartshell,
smartunique,
smartpath,