feat(tests): integrate smartagent DualAgentOrchestrator with streaming support

- Update test.invoices.nanonets.ts to use DualAgentOrchestrator for JSON extraction - Enable streaming token callback for real-time progress visibility - Add markdown caching to avoid re-running Nanonets OCR for cached files - Update test.bankstatements.minicpm.ts and test.invoices.minicpm.ts with streaming - Update dependencies to @push.rocks/smartai@0.11.1 and @push.rocks/smartagent@1.2.8
2026-01-20 00:39:36 +00:00
parent 6bd672da61
commit d384c1d79b
5 changed files with 1405 additions and 204 deletions
@@ -14,7 +14,9 @@
  },
  "devDependencies": {
    "@git.zone/tsrun": "^2.0.1",
-    "@git.zone/tstest": "^3.1.5"
+    "@git.zone/tstest": "^3.1.5",
+    "@push.rocks/smartagent": "^1.2.8",
+    "@push.rocks/smartai": "^0.11.1"
  },
  "repository": {
    "type": "git",
@@ -1,9 +1,9 @@
 /**
 * Bank statement extraction using MiniCPM-V (visual extraction)
 *
- * JSON per-page approach:
+ * JSON per-page approach with streaming output:
 * 1. Ask for structured JSON of all transactions per page
- * 2. Consensus: extract twice, compare, retry if mismatch
+ * 2. Single pass extraction (no consensus)
 */
 import { tap, expect } from '@git.zone/tstest/tapbundle';
 import * as fs from 'fs';
@@ -66,11 +66,11 @@ function convertPdfToImages(pdfPath: string): string[] {
 }

 /**
- * Query for JSON extraction
+ * Query for JSON extraction with streaming output
 */
 async function queryJson(image: string, queryId: string): Promise<string> {
-  console.log(`      [${queryId}] Sending request to ${MODEL}...`);
  const startTime = Date.now();
+  process.stdout.write(`      [${queryId}] `);

  const response = await fetch(`${OLLAMA_URL}/api/chat`, {
    method: 'POST',
@@ -82,25 +82,50 @@ async function queryJson(image: string, queryId: string): Promise<string> {
        content: JSON_PROMPT,
        images: [image],
      }],
-      stream: false,
+      stream: true,
      options: {
+        num_ctx: 32768,
        num_predict: 4000,
        temperature: 0.1,
      },
    }),
  });

-  const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
-
  if (!response.ok) {
-    console.log(`      [${queryId}] ERROR: ${response.status} (${elapsed}s)`);
+    const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
+    process.stdout.write(`ERROR: ${response.status} (${elapsed}s)\n`);
    throw new Error(`Ollama API error: ${response.status}`);
  }

-  const data = await response.json();
-  const content = (data.message?.content || '').trim();
-  console.log(`      [${queryId}] Response received (${elapsed}s, ${content.length} chars)`);
-  return content;
+  let content = '';
+  const reader = response.body!.getReader();
+  const decoder = new TextDecoder();
+
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+
+      const chunk = decoder.decode(value, { stream: true });
+      for (const line of chunk.split('\n').filter(l => l.trim())) {
+        try {
+          const json = JSON.parse(line);
+          const token = json.message?.content || '';
+          if (token) {
+            process.stdout.write(token);
+            content += token;
+          }
+        } catch {
+          // Ignore parse errors for partial chunks
+        }
+      }
+    }
+  } finally {
+    const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
+    process.stdout.write(` (${elapsed}s)\n`);
+  }
+
+  return content.trim();
 }

 /**
@@ -284,102 +309,29 @@ function parseAmount(value: unknown): number {
 }

 /**
- * Compare two transaction arrays for consensus
- */
-function transactionArraysMatch(a: ITransaction[], b: ITransaction[]): boolean {
-  if (a.length !== b.length) return false;
-
-  for (let i = 0; i < a.length; i++) {
-    const dateMatch = a[i].date === b[i].date;
-    const amountMatch = Math.abs(a[i].amount - b[i].amount) < 0.01;
-    if (!dateMatch || !amountMatch) return false;
-  }
-
-  return true;
-}
-
-/**
- * Compare two transaction arrays and log differences
- */
-function compareAndLogDifferences(txs1: ITransaction[], txs2: ITransaction[], pageNum: number): void {
-  if (txs1.length !== txs2.length) {
-    console.log(`    [Page ${pageNum}] Length mismatch: Q1=${txs1.length}, Q2=${txs2.length}`);
-    return;
-  }
-
-  for (let i = 0; i < txs1.length; i++) {
-    const dateMatch = txs1[i].date === txs2[i].date;
-    const amountMatch = Math.abs(txs1[i].amount - txs2[i].amount) < 0.01;
-
-    if (!dateMatch || !amountMatch) {
-      console.log(`    [Page ${pageNum}] Tx ${i + 1} differs:`);
-      console.log(`      Q1: ${txs1[i].date} | ${txs1[i].amount}`);
-      console.log(`      Q2: ${txs2[i].date} | ${txs2[i].amount}`);
-    }
-  }
-}
-
-/**
- * Extract transactions from a single page with consensus
+ * Extract transactions from a single page (single pass)
 */
 async function extractTransactionsFromPage(image: string, pageNum: number): Promise<ITransaction[]> {
-  const MAX_ATTEMPTS = 5;
  console.log(`\n    ======== Page ${pageNum} ========`);
-  console.log(`    [Page ${pageNum}] Starting JSON extraction...`);

-  for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
-    console.log(`\n    [Page ${pageNum}] --- Attempt ${attempt}/${MAX_ATTEMPTS} ---`);
+  const queryId = `P${pageNum}`;
+  const response = await queryJson(image, queryId);
+  const transactions = parseJsonResponse(response, queryId);

-    // Extract twice in parallel
-    const q1Id = `P${pageNum}A${attempt}Q1`;
-    const q2Id = `P${pageNum}A${attempt}Q2`;
-
-    const [response1, response2] = await Promise.all([
-      queryJson(image, q1Id),
-      queryJson(image, q2Id),
-    ]);
-
-    const txs1 = parseJsonResponse(response1, q1Id);
-    const txs2 = parseJsonResponse(response2, q2Id);
-
-    console.log(`    [Page ${pageNum}] Results: Q1=${txs1.length} txs, Q2=${txs2.length} txs`);
-
-    if (txs1.length > 0 && transactionArraysMatch(txs1, txs2)) {
-      console.log(`    [Page ${pageNum}] ✓ CONSENSUS REACHED: ${txs1.length} transactions`);
-      console.log(`    [Page ${pageNum}] Transactions:`);
-      for (let i = 0; i < txs1.length; i++) {
-        const tx = txs1[i];
+  console.log(`    [Page ${pageNum}] Extracted ${transactions.length} transactions:`);
+  for (let i = 0; i < transactions.length; i++) {
+    const tx = transactions[i];
    console.log(`      ${(i + 1).toString().padStart(2)}. ${tx.date} | ${tx.counterparty.substring(0, 30).padEnd(30)} | ${tx.amount >= 0 ? '+' : ''}${tx.amount.toFixed(2)}`);
  }
-      return txs1;
-    }

-    console.log(`    [Page ${pageNum}] ✗ NO CONSENSUS`);
-    compareAndLogDifferences(txs1, txs2, pageNum);
-
-    if (attempt < MAX_ATTEMPTS) {
-      console.log(`    [Page ${pageNum}] Retrying...`);
-    }
-  }
-
-  // Fallback: use last response
-  console.log(`\n    [Page ${pageNum}] === FALLBACK (no consensus after ${MAX_ATTEMPTS} attempts) ===`);
-  const fallbackId = `P${pageNum}FALLBACK`;
-  const fallbackResponse = await queryJson(image, fallbackId);
-  const fallback = parseJsonResponse(fallbackResponse, fallbackId);
-  console.log(`    [Page ${pageNum}] ~ FALLBACK RESULT: ${fallback.length} transactions`);
-  for (let i = 0; i < fallback.length; i++) {
-    const tx = fallback[i];
-    console.log(`      ${(i + 1).toString().padStart(2)}. ${tx.date} | ${tx.counterparty.substring(0, 30).padEnd(30)} | ${tx.amount >= 0 ? '+' : ''}${tx.amount.toFixed(2)}`);
-  }
-  return fallback;
+  return transactions;
 }

 /**
 * Extract all transactions from bank statement
 */
 async function extractTransactions(images: string[]): Promise<ITransaction[]> {
-  console.log(`    [Vision] Processing ${images.length} page(s) with ${MODEL} (JSON consensus)`);
+  console.log(`    [Vision] Processing ${images.length} page(s) with ${MODEL} (single pass)`);

  const allTransactions: ITransaction[] = [];

@@ -527,7 +479,7 @@ tap.test('summary', async () => {
  console.log(`\n======================================================`);
  console.log(`   Bank Statement Summary (${MODEL})`);
  console.log(`======================================================`);
-  console.log(`  Method:   JSON per-page + consensus`);
+  console.log(`  Method:   JSON per-page (single pass)`);
  console.log(`  Passed:   ${passedCount}/${total}`);
  console.log(`  Failed:   ${failedCount}/${total}`);
  console.log(`======================================================\n`);
@@ -67,9 +67,12 @@ const JSON_PROMPT = `Extract invoice data from this image. Return ONLY a JSON ob
 Return only the JSON, no explanation.`;

 /**
- * Query MiniCPM-V for JSON output (fast, no thinking)
+ * Query MiniCPM-V for JSON output (fast, no thinking) with streaming
 */
 async function queryJsonFast(images: string[]): Promise<string> {
+  const startTime = Date.now();
+  process.stdout.write(`      [Fast] `);
+
  const response = await fetch(`${OLLAMA_URL}/api/chat`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
@@ -80,8 +83,9 @@ async function queryJsonFast(images: string[]): Promise<string> {
        content: JSON_PROMPT,
        images: images,
      }],
-      stream: false,
+      stream: true,
      options: {
+        num_ctx: 32768,
        num_predict: 1000,
        temperature: 0.1,
      },
@@ -92,14 +96,44 @@ async function queryJsonFast(images: string[]): Promise<string> {
    throw new Error(`Ollama API error: ${response.status}`);
  }

-  const data = await response.json();
-  return (data.message?.content || '').trim();
+  let content = '';
+  const reader = response.body!.getReader();
+  const decoder = new TextDecoder();
+
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+
+      const chunk = decoder.decode(value, { stream: true });
+      for (const line of chunk.split('\n').filter(l => l.trim())) {
+        try {
+          const json = JSON.parse(line);
+          const token = json.message?.content || '';
+          if (token) {
+            process.stdout.write(token);
+            content += token;
+          }
+        } catch {
+          // Ignore parse errors for partial chunks
+        }
+      }
+    }
+  } finally {
+    const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
+    process.stdout.write(` (${elapsed}s)\n`);
+  }
+
+  return content.trim();
 }

 /**
- * Query MiniCPM-V for JSON output with thinking enabled (slower, more accurate)
+ * Query MiniCPM-V for JSON output with thinking enabled (slower, more accurate) with streaming
 */
 async function queryJsonWithThinking(images: string[]): Promise<string> {
+  const startTime = Date.now();
+  process.stdout.write(`      [Think] `);
+
  const response = await fetch(`${OLLAMA_URL}/api/chat`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
@@ -110,8 +144,9 @@ async function queryJsonWithThinking(images: string[]): Promise<string> {
        content: `Think carefully about this invoice image, then ${JSON_PROMPT}`,
        images: images,
      }],
-      stream: false,
+      stream: true,
      options: {
+        num_ctx: 32768,
        num_predict: 2000,
        temperature: 0.1,
      },
@@ -122,8 +157,56 @@ async function queryJsonWithThinking(images: string[]): Promise<string> {
    throw new Error(`Ollama API error: ${response.status}`);
  }

-  const data = await response.json();
-  return (data.message?.content || '').trim();
+  let content = '';
+  let thinkingContent = '';
+  let thinkingStarted = false;
+  let outputStarted = false;
+  const reader = response.body!.getReader();
+  const decoder = new TextDecoder();
+
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+
+      const chunk = decoder.decode(value, { stream: true });
+      for (const line of chunk.split('\n').filter(l => l.trim())) {
+        try {
+          const json = JSON.parse(line);
+
+          // Stream thinking tokens
+          const thinking = json.message?.thinking || '';
+          if (thinking) {
+            if (!thinkingStarted) {
+              process.stdout.write(`THINKING: `);
+              thinkingStarted = true;
+            }
+            process.stdout.write(thinking);
+            thinkingContent += thinking;
+          }
+
+          // Stream content tokens
+          const token = json.message?.content || '';
+          if (token) {
+            if (!outputStarted) {
+              if (thinkingStarted) process.stdout.write('\n      [Think] ');
+              process.stdout.write(`OUTPUT: `);
+              outputStarted = true;
+            }
+            process.stdout.write(token);
+            content += token;
+          }
+        } catch {
+          // Ignore parse errors for partial chunks
+        }
+      }
+    }
+  } finally {
+    const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
+    process.stdout.write(` (${elapsed}s)\n`);
+  }
+
+  return content.trim();
 }

 /**
@@ -12,6 +12,8 @@ import * as path from 'path';
 import { execSync } from 'child_process';
 import * as os from 'os';
 import { ensureNanonetsOcr, ensureMiniCpm, isContainerRunning } from './helpers/docker.js';
+import { SmartAi } from '@push.rocks/smartai';
+import { DualAgentOrchestrator } from '@push.rocks/smartagent';

 const NANONETS_URL = 'http://localhost:8000/v1';
 const NANONETS_MODEL = 'nanonets/Nanonets-OCR2-3B';
@@ -19,8 +21,24 @@ const NANONETS_MODEL = 'nanonets/Nanonets-OCR2-3B';
 const OLLAMA_URL = 'http://localhost:11434';
 const EXTRACTION_MODEL = 'gpt-oss:20b';

-// Temp directory for storing markdown between stages
-const TEMP_MD_DIR = path.join(os.tmpdir(), 'nanonets-invoices-markdown');
+// Persistent cache directory for storing markdown between runs
+const MD_CACHE_DIR = path.join(process.cwd(), '.nogit/invoices-md');
+
+// SmartAi instance for Ollama with optimized settings
+const smartAi = new SmartAi({
+  ollama: {
+    baseUrl: OLLAMA_URL,
+    model: EXTRACTION_MODEL,
+    defaultOptions: {
+      num_ctx: 32768,   // Larger context for long invoices + thinking
+      temperature: 0,   // Deterministic for JSON extraction
+    },
+    defaultTimeout: 600000,  // 10 minute timeout for large documents
+  },
+});
+
+// DualAgentOrchestrator for structured task execution
+let orchestrator: DualAgentOrchestrator;

 interface IInvoice {
  invoice_number: string;
@@ -71,10 +89,13 @@ RULES:
 7. total_amount: Final total with tax

 JSON only:
-{"invoice_number":"X","invoice_date":"YYYY-MM-DD","vendor_name":"X","currency":"EUR","net_amount":0,"vat_amount":0,"total_amount":0}`;
+{"invoice_number":"X","invoice_date":"YYYY-MM-DD","vendor_name":"X","currency":"EUR","net_amount":0,"vat_amount":0,"total_amount":0}
+
+Double check for valid JSON syntax.
+
+`;

 // Constants for smart batching
-const MAX_VISUAL_TOKENS = 28000;  // ~32K context minus prompt/output headroom
 const PATCH_SIZE = 14;  // Qwen2.5-VL uses 14x14 patches

 /**
@@ -359,95 +380,45 @@ function parseJsonToInvoice(response: string): IInvoice | null {
 }

 /**
- * Extract invoice from markdown using GPT-OSS 20B (streaming)
+ * Extract invoice from markdown using smartagent DualAgentOrchestrator
 */
 async function extractInvoiceFromMarkdown(markdown: string, queryId: string): Promise<IInvoice | null> {
  const startTime = Date.now();

-  console.log(`      [${queryId}] Invoice: ${markdown.length} chars, Prompt: ${JSON_EXTRACTION_PROMPT.length} chars`);
+  console.log(`      [${queryId}] Invoice: ${markdown.length} chars`);

-  const response = await fetch(`${OLLAMA_URL}/api/chat`, {
-    method: 'POST',
-    headers: { 'Content-Type': 'application/json' },
-    body: JSON.stringify({
-      model: EXTRACTION_MODEL,
-      messages: [
-        { role: 'user', content: 'Hi there, how are you?' },
-        { role: 'assistant', content: 'Good, how can I help you today?' },
-        { role: 'user', content: `Here is an invoice document:\n\n${markdown}` },
-        { role: 'assistant', content: 'I have read the invoice document you provided. I can see all the text content. What would you like me to do with it?' },
-        { role: 'user', content: JSON_EXTRACTION_PROMPT },
-      ],
-      stream: true,
-      options: {
-        num_ctx: 32768,  // Larger context for long invoices + thinking
-        temperature: 0,  // Deterministic for JSON extraction
-      },
-    }),
-    signal: AbortSignal.timeout(600000), // 10 minute timeout for large documents
-  });
+  // Build the extraction task with document context
+  const taskPrompt = `Extract the invoice data from this document and output ONLY the JSON:

-  if (!response.ok) {
-    const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
-    console.log(`      [${queryId}] ERROR: ${response.status} (${elapsed}s)`);
-    throw new Error(`Ollama API error: ${response.status}`);
-  }
+${markdown}

-  // Stream the response
-  let content = '';
-  let thinkingContent = '';
-  let thinkingStarted = false;
-  let outputStarted = false;
-  const reader = response.body!.getReader();
-  const decoder = new TextDecoder();
+${JSON_EXTRACTION_PROMPT}`;

  try {
-    while (true) {
-      const { done, value } = await reader.read();
-      if (done) break;
-
-      const chunk = decoder.decode(value, { stream: true });
-
-      // Each line is a JSON object
-      for (const line of chunk.split('\n').filter(l => l.trim())) {
-        try {
-          const json = JSON.parse(line);
-
-          // Stream thinking tokens
-          const thinking = json.message?.thinking || '';
-          if (thinking) {
-            if (!thinkingStarted) {
-              process.stdout.write(`      [${queryId}] THINKING: `);
-              thinkingStarted = true;
-            }
-            process.stdout.write(thinking);
-            thinkingContent += thinking;
-          }
-
-          // Stream content tokens
-          const token = json.message?.content || '';
-          if (token) {
-            if (!outputStarted) {
-              if (thinkingStarted) process.stdout.write('\n');
-              process.stdout.write(`      [${queryId}] OUTPUT: `);
-              outputStarted = true;
-            }
-            process.stdout.write(token);
-            content += token;
-          }
-        } catch {
-          // Ignore parse errors for partial chunks
-        }
-      }
-    }
-  } finally {
-    if (thinkingStarted || outputStarted) process.stdout.write('\n');
-  }
+    const result = await orchestrator.run(taskPrompt);

    const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
-  console.log(`      [${queryId}] Done: ${thinkingContent.length} thinking chars, ${content.length} output chars (${elapsed}s)`);
+    console.log(`      [${queryId}] Status: ${result.status}, Iterations: ${result.iterations} (${elapsed}s)`);

-  return parseJsonToInvoice(content);
+    if (result.success && result.result) {
+      console.log(`      [${queryId}] Result: ${result.result.substring(0, 100)}...`);
+      return parseJsonToInvoice(result.result);
+    }
+
+    // Fallback: try parsing from history
+    if (result.history?.length > 0) {
+      const lastMessage = result.history[result.history.length - 1];
+      if (lastMessage?.content) {
+        return parseJsonToInvoice(lastMessage.content);
+      }
+    }
+
+    return null;
+  } catch (error) {
+    const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
+    console.log(`      [${queryId}] ERROR: ${error} (${elapsed}s)`);
+    throw error;
+  }
 }

 /**
@@ -556,23 +527,45 @@ function findTestCases(): ITestCase[] {
 const testCases = findTestCases();
 console.log(`\nFound ${testCases.length} invoice test cases\n`);

-// Ensure temp directory exists
-if (!fs.existsSync(TEMP_MD_DIR)) {
-  fs.mkdirSync(TEMP_MD_DIR, { recursive: true });
+// Ensure cache directory exists
+if (!fs.existsSync(MD_CACHE_DIR)) {
+  fs.mkdirSync(MD_CACHE_DIR, { recursive: true });
 }

 // -------- STAGE 1: OCR with Nanonets --------

-tap.test('Stage 1: Setup Nanonets', async () => {
+tap.test('Stage 1: Convert invoices to markdown (with caching)', async () => {
  console.log('\n========== STAGE 1: Nanonets OCR ==========\n');
-  const ok = await ensureNanonetsOcr();
-  expect(ok).toBeTrue();
-});

-tap.test('Stage 1: Convert all invoices to markdown', async () => {
-  console.log('\n  Converting all invoice PDFs to markdown with Nanonets-OCR-s...\n');
+  // Check which invoices need OCR conversion
+  const needsConversion: ITestCase[] = [];
+  let cachedCount = 0;

  for (const tc of testCases) {
+    const mdPath = path.join(MD_CACHE_DIR, `${tc.name}.md`);
+    if (fs.existsSync(mdPath)) {
+      cachedCount++;
+      tc.markdownPath = mdPath;
+      console.log(`  [CACHED] ${tc.name} - using cached markdown`);
+    } else {
+      needsConversion.push(tc);
+    }
+  }
+
+  console.log(`\n  Summary: ${cachedCount} cached, ${needsConversion.length} need conversion\n`);
+
+  if (needsConversion.length === 0) {
+    console.log('  All invoices already cached, skipping Nanonets OCR\n');
+    return;
+  }
+
+  // Start Nanonets only if there are files to convert
+  console.log('  Starting Nanonets for OCR conversion...\n');
+  const ok = await ensureNanonetsOcr();
+  expect(ok).toBeTrue();
+
+  // Convert only the invoices that need conversion
+  for (const tc of needsConversion) {
    console.log(`\n    === ${tc.name} ===`);

    const images = convertPdfToImages(tc.pdfPath);
@@ -580,13 +573,13 @@ tap.test('Stage 1: Convert all invoices to markdown', async () => {

    const markdown = await convertDocumentToMarkdown(images, tc.name);

-    const mdPath = path.join(TEMP_MD_DIR, `${tc.name}.md`);
+    const mdPath = path.join(MD_CACHE_DIR, `${tc.name}.md`);
    fs.writeFileSync(mdPath, markdown);
    tc.markdownPath = mdPath;
    console.log(`      Saved: ${mdPath}`);
  }

-  console.log('\n  Stage 1 complete: All invoices converted to markdown\n');
+  console.log(`\n  Stage 1 complete: ${needsConversion.length} invoices converted to markdown\n`);
 });

 tap.test('Stage 1: Stop Nanonets', async () => {
@@ -605,6 +598,42 @@ tap.test('Stage 2: Setup Ollama + GPT-OSS 20B', async () => {

  const extractionOk = await ensureExtractionModel();
  expect(extractionOk).toBeTrue();
+
+  // Initialize SmartAi and DualAgentOrchestrator
+  console.log('  [SmartAgent] Starting SmartAi...');
+  await smartAi.start();
+
+  console.log('  [SmartAgent] Creating DualAgentOrchestrator...');
+  orchestrator = new DualAgentOrchestrator({
+    smartAiInstance: smartAi,
+    defaultProvider: 'ollama',
+    guardianPolicyPrompt: `
+      JSON EXTRACTION POLICY:
+      - APPROVE all JSON extraction tasks
+      - This is a read-only operation - no file system or network access needed
+      - The task is to extract structured data from document text
+    `,
+    driverSystemMessage: `You are a precise JSON extraction assistant. Your only job is to extract invoice data from documents.
+
+CRITICAL RULES:
+1. Output ONLY valid JSON - no markdown, no explanations, no thinking
+2. Use the exact format requested
+3. If you cannot find a value, use empty string "" or 0 for numbers
+
+When done, wrap your JSON in <task_complete></task_complete> tags.`,
+    maxIterations: 3,
+    // Enable streaming for real-time progress visibility
+    onToken: (token, source) => {
+      if (source === 'driver') {
+        process.stdout.write(token);
+      }
+    },
+  });
+
+  // No tools needed for JSON extraction
+  console.log('  [SmartAgent] Starting orchestrator...');
+  await orchestrator.start();
+  console.log('  [SmartAgent] Ready for extraction');
 });

 let passedCount = 0;
@@ -619,7 +648,7 @@ for (const tc of testCases) {

    const startTime = Date.now();

-    const mdPath = path.join(TEMP_MD_DIR, `${tc.name}.md`);
+    const mdPath = path.join(MD_CACHE_DIR, `${tc.name}.md`);
    if (!fs.existsSync(mdPath)) {
      throw new Error(`Markdown not found: ${mdPath}. Run Stage 1 first.`);
    }
@@ -649,6 +678,14 @@ for (const tc of testCases) {
 }

 tap.test('Summary', async () => {
+  // Cleanup orchestrator and SmartAi
+  if (orchestrator) {
+    console.log('\n  [SmartAgent] Stopping orchestrator...');
+    await orchestrator.stop();
+  }
+  console.log('  [SmartAgent] Stopping SmartAi...');
+  await smartAi.stop();
+
  const totalInvoices = testCases.length;
  const accuracy = totalInvoices > 0 ? (passedCount / totalInvoices) * 100 : 0;
  const totalTimeMs = processingTimes.reduce((a, b) => a + b, 0);
@@ -658,7 +695,7 @@ tap.test('Summary', async () => {
  console.log(`   Invoice Summary (Nanonets + GPT-OSS 20B)`);
  console.log(`========================================`);
  console.log(`  Stage 1:   Nanonets-OCR-s (doc -> md)`);
-  console.log(`  Stage 2:   GPT-OSS 20B (md -> JSON)`);
+  console.log(`  Stage 2:   GPT-OSS 20B + SmartAgent (md -> JSON)`);
  console.log(`  Passed:    ${passedCount}/${totalInvoices}`);
  console.log(`  Failed:    ${failedCount}/${totalInvoices}`);
  console.log(`  Accuracy:  ${accuracy.toFixed(1)}%`);
@@ -666,14 +703,7 @@ tap.test('Summary', async () => {
  console.log(`  Total time:   ${(totalTimeMs / 1000).toFixed(1)}s`);
  console.log(`  Avg per inv:  ${avgTimeSec.toFixed(1)}s`);
  console.log(`========================================\n`);
-
-  // Cleanup temp files
-  try {
-    fs.rmSync(TEMP_MD_DIR, { recursive: true, force: true });
-    console.log(`  Cleaned up temp directory: ${TEMP_MD_DIR}\n`);
-  } catch {
-    // Ignore
-  }
+  console.log(`  Cache location: ${MD_CACHE_DIR}\n`);
 });

 export default tap.start();