feat(tests): integrate smartagent DualAgentOrchestrator with streaming support
- Update test.invoices.nanonets.ts to use DualAgentOrchestrator for JSON extraction - Enable streaming token callback for real-time progress visibility - Add markdown caching to avoid re-running Nanonets OCR for cached files - Update test.bankstatements.minicpm.ts and test.invoices.minicpm.ts with streaming - Update dependencies to @push.rocks/smartai@0.11.1 and @push.rocks/smartagent@1.2.8
This commit is contained in:
@@ -67,9 +67,12 @@ const JSON_PROMPT = `Extract invoice data from this image. Return ONLY a JSON ob
|
||||
Return only the JSON, no explanation.`;
|
||||
|
||||
/**
|
||||
* Query MiniCPM-V for JSON output (fast, no thinking)
|
||||
* Query MiniCPM-V for JSON output (fast, no thinking) with streaming
|
||||
*/
|
||||
async function queryJsonFast(images: string[]): Promise<string> {
|
||||
const startTime = Date.now();
|
||||
process.stdout.write(` [Fast] `);
|
||||
|
||||
const response = await fetch(`${OLLAMA_URL}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
@@ -80,8 +83,9 @@ async function queryJsonFast(images: string[]): Promise<string> {
|
||||
content: JSON_PROMPT,
|
||||
images: images,
|
||||
}],
|
||||
stream: false,
|
||||
stream: true,
|
||||
options: {
|
||||
num_ctx: 32768,
|
||||
num_predict: 1000,
|
||||
temperature: 0.1,
|
||||
},
|
||||
@@ -92,14 +96,44 @@ async function queryJsonFast(images: string[]): Promise<string> {
|
||||
throw new Error(`Ollama API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return (data.message?.content || '').trim();
|
||||
let content = '';
|
||||
const reader = response.body!.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const chunk = decoder.decode(value, { stream: true });
|
||||
for (const line of chunk.split('\n').filter(l => l.trim())) {
|
||||
try {
|
||||
const json = JSON.parse(line);
|
||||
const token = json.message?.content || '';
|
||||
if (token) {
|
||||
process.stdout.write(token);
|
||||
content += token;
|
||||
}
|
||||
} catch {
|
||||
// Ignore parse errors for partial chunks
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||
process.stdout.write(` (${elapsed}s)\n`);
|
||||
}
|
||||
|
||||
return content.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Query MiniCPM-V for JSON output with thinking enabled (slower, more accurate)
|
||||
* Query MiniCPM-V for JSON output with thinking enabled (slower, more accurate) with streaming
|
||||
*/
|
||||
async function queryJsonWithThinking(images: string[]): Promise<string> {
|
||||
const startTime = Date.now();
|
||||
process.stdout.write(` [Think] `);
|
||||
|
||||
const response = await fetch(`${OLLAMA_URL}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
@@ -110,8 +144,9 @@ async function queryJsonWithThinking(images: string[]): Promise<string> {
|
||||
content: `Think carefully about this invoice image, then ${JSON_PROMPT}`,
|
||||
images: images,
|
||||
}],
|
||||
stream: false,
|
||||
stream: true,
|
||||
options: {
|
||||
num_ctx: 32768,
|
||||
num_predict: 2000,
|
||||
temperature: 0.1,
|
||||
},
|
||||
@@ -122,8 +157,56 @@ async function queryJsonWithThinking(images: string[]): Promise<string> {
|
||||
throw new Error(`Ollama API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return (data.message?.content || '').trim();
|
||||
let content = '';
|
||||
let thinkingContent = '';
|
||||
let thinkingStarted = false;
|
||||
let outputStarted = false;
|
||||
const reader = response.body!.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const chunk = decoder.decode(value, { stream: true });
|
||||
for (const line of chunk.split('\n').filter(l => l.trim())) {
|
||||
try {
|
||||
const json = JSON.parse(line);
|
||||
|
||||
// Stream thinking tokens
|
||||
const thinking = json.message?.thinking || '';
|
||||
if (thinking) {
|
||||
if (!thinkingStarted) {
|
||||
process.stdout.write(`THINKING: `);
|
||||
thinkingStarted = true;
|
||||
}
|
||||
process.stdout.write(thinking);
|
||||
thinkingContent += thinking;
|
||||
}
|
||||
|
||||
// Stream content tokens
|
||||
const token = json.message?.content || '';
|
||||
if (token) {
|
||||
if (!outputStarted) {
|
||||
if (thinkingStarted) process.stdout.write('\n [Think] ');
|
||||
process.stdout.write(`OUTPUT: `);
|
||||
outputStarted = true;
|
||||
}
|
||||
process.stdout.write(token);
|
||||
content += token;
|
||||
}
|
||||
} catch {
|
||||
// Ignore parse errors for partial chunks
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||
process.stdout.write(` (${elapsed}s)\n`);
|
||||
}
|
||||
|
||||
return content.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user