feat(docker-images): add vLLM-based Nanonets-OCR2-3B image, Qwen3-VL Ollama image and refactor build/docs/tests to use new runtime/layout
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
/**
|
||||
* Invoice extraction using Nanonets-OCR-s + GPT-OSS 20B (sequential two-stage pipeline)
|
||||
* Invoice extraction using Nanonets-OCR2-3B + GPT-OSS 20B (sequential two-stage pipeline)
|
||||
*
|
||||
* Stage 1: Nanonets-OCR-s converts ALL document pages to markdown (stop after completion)
|
||||
* Stage 1: Nanonets-OCR2-3B converts ALL document pages to markdown (stop after completion)
|
||||
* Stage 2: GPT-OSS 20B extracts structured JSON from saved markdown (after Nanonets stops)
|
||||
*
|
||||
* This approach avoids GPU contention by running services sequentially.
|
||||
@@ -14,7 +14,7 @@ import * as os from 'os';
|
||||
import { ensureNanonetsOcr, ensureMiniCpm, isContainerRunning } from './helpers/docker.js';
|
||||
|
||||
const NANONETS_URL = 'http://localhost:8000/v1';
|
||||
const NANONETS_MODEL = 'nanonets/Nanonets-OCR-s';
|
||||
const NANONETS_MODEL = 'nanonets/Nanonets-OCR2-3B';
|
||||
|
||||
const OLLAMA_URL = 'http://localhost:11434';
|
||||
const EXTRACTION_MODEL = 'gpt-oss:20b';
|
||||
@@ -92,28 +92,11 @@ function estimateVisualTokens(width: number, height: number): number {
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch images to fit within context window
|
||||
* Process images one page at a time for reliability
|
||||
*/
|
||||
function batchImages(images: IImageData[]): IImageData[][] {
|
||||
const batches: IImageData[][] = [];
|
||||
let currentBatch: IImageData[] = [];
|
||||
let currentTokens = 0;
|
||||
|
||||
for (const img of images) {
|
||||
const imgTokens = estimateVisualTokens(img.width, img.height);
|
||||
|
||||
if (currentTokens + imgTokens > MAX_VISUAL_TOKENS && currentBatch.length > 0) {
|
||||
batches.push(currentBatch);
|
||||
currentBatch = [img];
|
||||
currentTokens = imgTokens;
|
||||
} else {
|
||||
currentBatch.push(img);
|
||||
currentTokens += imgTokens;
|
||||
}
|
||||
}
|
||||
if (currentBatch.length > 0) batches.push(currentBatch);
|
||||
|
||||
return batches;
|
||||
// One page per batch for reliable processing
|
||||
return images.map(img => [img]);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -194,6 +177,7 @@ async function convertBatchToMarkdown(batch: IImageData[]): Promise<string> {
|
||||
max_tokens: 4096 * batch.length, // Scale output tokens with batch size
|
||||
temperature: 0.0,
|
||||
}),
|
||||
signal: AbortSignal.timeout(600000), // 10 minute timeout for OCR
|
||||
});
|
||||
|
||||
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||
|
||||
Reference in New Issue
Block a user