feat(ocr): add Mistral OCR engine with package export, tests, and documentation
This commit is contained in:
@@ -479,6 +479,38 @@ console.log(analysis);
|
||||
await stopSmartpdf();
|
||||
```
|
||||
|
||||
### 🔎 OCR — `@push.rocks/smartai/ocr`
|
||||
|
||||
Extract text from images using Mistral Document AI OCR. This uses the documented `https://api.mistral.ai/v1/ocr` endpoint with `mistral-ocr-latest` and returns normalized text plus page-level confidence when requested.
|
||||
|
||||
```typescript
|
||||
import { createMistralOcrEngine } from '@push.rocks/smartai/ocr';
|
||||
import * as fs from 'fs';
|
||||
|
||||
const ocr = createMistralOcrEngine({
|
||||
apiKey: process.env.MISTRAL_API_KEY,
|
||||
confidenceScoresGranularity: 'page',
|
||||
});
|
||||
|
||||
const result = await ocr.recognizeImage({
|
||||
dataBase64: fs.readFileSync('screenshot.png').toString('base64'),
|
||||
mimeType: 'image/png',
|
||||
});
|
||||
|
||||
console.log(result.text);
|
||||
console.log(result.confidence);
|
||||
```
|
||||
|
||||
**`createMistralOcrEngine(options)`** accepts:
|
||||
|
||||
- `apiKey` — Mistral API key, required unless a custom `transport` is supplied
|
||||
- `model` — defaults to `mistral-ocr-latest`
|
||||
- `endpointUrl` — defaults to `https://api.mistral.ai/v1/ocr`
|
||||
- `confidenceScoresGranularity` — `'page'` | `'word'`
|
||||
- `tableFormat` — `'markdown'` | `'html'`
|
||||
- `extractHeader` / `extractFooter` — optional document OCR flags
|
||||
- `transport` — injectable transport for tests or custom HTTP clients
|
||||
|
||||
### 🔬 Research — `@push.rocks/smartai/research`
|
||||
|
||||
Perform web-search-powered research using Anthropic's `web_search_20250305` tool.
|
||||
@@ -514,6 +546,7 @@ tstest test/test.image.ts --verbose # Image generation
|
||||
tstest test/test.research.ts --verbose # Web research
|
||||
tstest test/test.audio.ts --verbose # Text-to-speech
|
||||
tstest test/test.document.ts --verbose # Document analysis (needs Chromium)
|
||||
tstest test/test.ocr.ts --verbose # Mistral OCR transport (mocked)
|
||||
```
|
||||
|
||||
Most tests skip gracefully when API keys are not set. The Ollama tests are fully mocked and require no external services.
|
||||
@@ -533,6 +566,7 @@ Most tests skip gracefully when API keys are not set. The Ollama tests are fully
|
||||
├── ts_audio/ # @push.rocks/smartai/audio
|
||||
├── ts_image/ # @push.rocks/smartai/image
|
||||
├── ts_document/ # @push.rocks/smartai/document
|
||||
├── ts_ocr/ # @push.rocks/smartai/ocr
|
||||
└── ts_research/ # @push.rocks/smartai/research
|
||||
```
|
||||
|
||||
|
||||
Reference in New Issue
Block a user