feat(tests): integrate smartagent DualAgentOrchestrator with streaming support

- Update test.invoices.nanonets.ts to use DualAgentOrchestrator for JSON extraction - Enable streaming token callback for real-time progress visibility - Add markdown caching to avoid re-running Nanonets OCR for cached files - Update test.bankstatements.minicpm.ts and test.invoices.minicpm.ts with streaming - Update dependencies to @push.rocks/smartai@0.11.1 and @push.rocks/smartagent@1.2.8
2026-01-20 00:39:36 +00:00
parent 6bd672da61
commit d384c1d79b
5 changed files with 1405 additions and 204 deletions
--- a/test/test.invoices.minicpm.ts
+++ b/test/test.invoices.minicpm.ts
@@ -67,9 +67,12 @@ const JSON_PROMPT = `Extract invoice data from this image. Return ONLY a JSON ob
 Return only the JSON, no explanation.`;

 /**
- * Query MiniCPM-V for JSON output (fast, no thinking)
+ * Query MiniCPM-V for JSON output (fast, no thinking) with streaming
 */
 async function queryJsonFast(images: string[]): Promise<string> {
+  const startTime = Date.now();
+  process.stdout.write(`      [Fast] `);
+
  const response = await fetch(`${OLLAMA_URL}/api/chat`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
@@ -80,8 +83,9 @@ async function queryJsonFast(images: string[]): Promise<string> {
        content: JSON_PROMPT,
        images: images,
      }],
-      stream: false,
+      stream: true,
      options: {
+        num_ctx: 32768,
        num_predict: 1000,
        temperature: 0.1,
      },
@@ -92,14 +96,44 @@ async function queryJsonFast(images: string[]): Promise<string> {
    throw new Error(`Ollama API error: ${response.status}`);
  }

-  const data = await response.json();
-  return (data.message?.content || '').trim();
+  let content = '';
+  const reader = response.body!.getReader();
+  const decoder = new TextDecoder();
+
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+
+      const chunk = decoder.decode(value, { stream: true });
+      for (const line of chunk.split('\n').filter(l => l.trim())) {
+        try {
+          const json = JSON.parse(line);
+          const token = json.message?.content || '';
+          if (token) {
+            process.stdout.write(token);
+            content += token;
+          }
+        } catch {
+          // Ignore parse errors for partial chunks
+        }
+      }
+    }
+  } finally {
+    const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
+    process.stdout.write(` (${elapsed}s)\n`);
+  }
+
+  return content.trim();
 }

 /**
- * Query MiniCPM-V for JSON output with thinking enabled (slower, more accurate)
+ * Query MiniCPM-V for JSON output with thinking enabled (slower, more accurate) with streaming
 */
 async function queryJsonWithThinking(images: string[]): Promise<string> {
+  const startTime = Date.now();
+  process.stdout.write(`      [Think] `);
+
  const response = await fetch(`${OLLAMA_URL}/api/chat`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
@@ -110,8 +144,9 @@ async function queryJsonWithThinking(images: string[]): Promise<string> {
        content: `Think carefully about this invoice image, then ${JSON_PROMPT}`,
        images: images,
      }],
-      stream: false,
+      stream: true,
      options: {
+        num_ctx: 32768,
        num_predict: 2000,
        temperature: 0.1,
      },
@@ -122,8 +157,56 @@ async function queryJsonWithThinking(images: string[]): Promise<string> {
    throw new Error(`Ollama API error: ${response.status}`);
  }

-  const data = await response.json();
-  return (data.message?.content || '').trim();
+  let content = '';
+  let thinkingContent = '';
+  let thinkingStarted = false;
+  let outputStarted = false;
+  const reader = response.body!.getReader();
+  const decoder = new TextDecoder();
+
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+
+      const chunk = decoder.decode(value, { stream: true });
+      for (const line of chunk.split('\n').filter(l => l.trim())) {
+        try {
+          const json = JSON.parse(line);
+
+          // Stream thinking tokens
+          const thinking = json.message?.thinking || '';
+          if (thinking) {
+            if (!thinkingStarted) {
+              process.stdout.write(`THINKING: `);
+              thinkingStarted = true;
+            }
+            process.stdout.write(thinking);
+            thinkingContent += thinking;
+          }
+
+          // Stream content tokens
+          const token = json.message?.content || '';
+          if (token) {
+            if (!outputStarted) {
+              if (thinkingStarted) process.stdout.write('\n      [Think] ');
+              process.stdout.write(`OUTPUT: `);
+              outputStarted = true;
+            }
+            process.stdout.write(token);
+            content += token;
+          }
+        } catch {
+          // Ignore parse errors for partial chunks
+        }
+      }
+    }
+  } finally {
+    const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
+    process.stdout.write(` (${elapsed}s)\n`);
+  }
+
+  return content.trim();
 }

 /**