feat(tests): integrate SmartAi/DualAgentOrchestrator into extraction tests and add JSON self-validation
This commit is contained in:
@@ -13,7 +13,7 @@ import { execSync } from 'child_process';
|
||||
import * as os from 'os';
|
||||
import { ensureNanonetsOcr, ensureMiniCpm, isContainerRunning } from './helpers/docker.js';
|
||||
import { SmartAi } from '@push.rocks/smartai';
|
||||
import { DualAgentOrchestrator } from '@push.rocks/smartagent';
|
||||
import { DualAgentOrchestrator, JsonValidatorTool } from '@push.rocks/smartagent';
|
||||
|
||||
const NANONETS_URL = 'http://localhost:8000/v1';
|
||||
const NANONETS_MODEL = 'nanonets/Nanonets-OCR2-3B';
|
||||
@@ -76,11 +76,11 @@ Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number>.`;
|
||||
const JSON_EXTRACTION_PROMPT = `Extract key fields from the invoice. Return ONLY valid JSON.
|
||||
|
||||
WHERE TO FIND DATA:
|
||||
- invoice_number, invoice_date, vendor_name: Look in the HEADER section at the TOP of PAGE 1 (near "Invoice no.", "Invoice date:", "Rechnungsnummer")
|
||||
- invoice_number, invoice_date, vendor_name: Look in the HEADER section at the TOP of PAGE 1 (near "Invoice no.", "Invoice date:", "Rechnungsnummer"). Use common sense. Btw. an invoice number might start on INV* .
|
||||
- net_amount, vat_amount, total_amount: Look in the SUMMARY section at the BOTTOM (look for "Total", "Amount due", "Gesamtbetrag")
|
||||
|
||||
RULES:
|
||||
1. invoice_number: Extract ONLY the value (e.g., "R0015632540"), NOT the label "Invoice no."
|
||||
1. Use common sense.
|
||||
2. invoice_date: Convert to YYYY-MM-DD format (e.g., "14/04/2022" → "2022-04-14")
|
||||
3. vendor_name: The company issuing the invoice
|
||||
4. currency: EUR, USD, or GBP
|
||||
@@ -643,18 +643,27 @@ tap.test('Stage 2: Setup Ollama + GPT-OSS 20B', async () => {
|
||||
guardianPolicyPrompt: `
|
||||
JSON EXTRACTION POLICY:
|
||||
- APPROVE all JSON extraction tasks
|
||||
- APPROVE all json.validate tool calls
|
||||
- This is a read-only operation - no file system or network access needed
|
||||
- The task is to extract structured data from document text
|
||||
`,
|
||||
driverSystemMessage: `You are a precise JSON extraction assistant. Your only job is to extract invoice data from documents.
|
||||
|
||||
CRITICAL RULES:
|
||||
1. Output ONLY valid JSON - no markdown, no explanations, no thinking
|
||||
2. Use the exact format requested
|
||||
3. If you cannot find a value, use empty string "" or 0 for numbers
|
||||
1. Output valid JSON with the exact format requested
|
||||
2. If you cannot find a value, use empty string "" or 0 for numbers
|
||||
3. IMPORTANT: Before completing, validate your JSON using the json.validate tool:
|
||||
|
||||
<tool_call>
|
||||
<tool>json</tool>
|
||||
<action>validate</action>
|
||||
<params>{"jsonString": "YOUR_JSON", "requiredFields": ["invoice_number", "invoice_date", "vendor_name", "currency", "net_amount", "vat_amount", "total_amount"]}</params>
|
||||
</tool_call>
|
||||
|
||||
4. Only complete after validation passes
|
||||
|
||||
When done, wrap your JSON in <task_complete></task_complete> tags.`,
|
||||
maxIterations: 3,
|
||||
maxIterations: 5,
|
||||
// Enable streaming for real-time progress visibility
|
||||
onToken: (token, source) => {
|
||||
if (source === 'driver') {
|
||||
@@ -663,7 +672,9 @@ When done, wrap your JSON in <task_complete></task_complete> tags.`,
|
||||
},
|
||||
});
|
||||
|
||||
// No tools needed for JSON extraction
|
||||
// Register JsonValidatorTool for self-validation
|
||||
orchestrator.registerTool(new JsonValidatorTool());
|
||||
|
||||
console.log(' [SmartAgent] Starting orchestrator...');
|
||||
await orchestrator.start();
|
||||
console.log(' [SmartAgent] Ready for extraction');
|
||||
|
||||
Reference in New Issue
Block a user