diff --git a/test/test.invoices.nanonets.ts b/test/test.invoices.nanonets.ts index d04d01f..107cc5a 100644 --- a/test/test.invoices.nanonets.ts +++ b/test/test.invoices.nanonets.ts @@ -339,16 +339,20 @@ function extractCurrency(s: string | undefined): string { } /** - * Extract JSON from response + * Try to extract valid JSON from a response string */ -function extractJsonFromResponse(response: string): Record | null { - let cleanResponse = response.replace(/[\s\S]*?<\/think>/g, '').trim(); - const codeBlockMatch = cleanResponse.match(/```(?:json)?\s*([\s\S]*?)```/); - const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : cleanResponse; +function tryExtractJson(response: string): Record | null { + // Remove thinking tags + let clean = response.replace(/[\s\S]*?<\/think>/g, '').trim(); + + // Try code block + const codeBlockMatch = clean.match(/```(?:json)?\s*([\s\S]*?)```/); + const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : clean; try { return JSON.parse(jsonStr); } catch { + // Try to find JSON object const jsonMatch = jsonStr.match(/\{[\s\S]*\}/); if (jsonMatch) { try { @@ -361,29 +365,13 @@ function extractJsonFromResponse(response: string): Record | nu } } -/** - * Parse JSON response into IInvoice - */ -function parseJsonToInvoice(response: string): IInvoice | null { - const parsed = extractJsonFromResponse(response); - if (!parsed) return null; - - return { - invoice_number: extractInvoiceNumber(String(parsed.invoice_number || '')), - invoice_date: extractDate(String(parsed.invoice_date || '')), - vendor_name: String(parsed.vendor_name || '').replace(/\*\*/g, '').replace(/`/g, '').trim(), - currency: extractCurrency(String(parsed.currency || '')), - net_amount: parseAmount(parsed.net_amount as string | number), - vat_amount: parseAmount(parsed.vat_amount as string | number), - total_amount: parseAmount(parsed.total_amount as string | number), - }; -} - /** * Extract invoice from markdown using smartagent DualAgentOrchestrator + * Validates JSON and retries if invalid */ async function extractInvoiceFromMarkdown(markdown: string, queryId: string): Promise { const startTime = Date.now(); + const maxRetries = 2; console.log(` [${queryId}] Invoice: ${markdown.length} chars`); @@ -395,25 +383,70 @@ ${markdown} ${JSON_EXTRACTION_PROMPT}`; try { - const result = await orchestrator.run(taskPrompt); - - const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); + let result = await orchestrator.run(taskPrompt); + let elapsed = ((Date.now() - startTime) / 1000).toFixed(1); console.log(` [${queryId}] Status: ${result.status}, Iterations: ${result.iterations} (${elapsed}s)`); - if (result.success && result.result) { - console.log(` [${queryId}] Result: ${result.result.substring(0, 100)}...`); - return parseJsonToInvoice(result.result); + // Try to parse JSON from result + let jsonData: Record | null = null; + let responseText = result.result || ''; + + if (result.success && responseText) { + jsonData = tryExtractJson(responseText); } // Fallback: try parsing from history - if (result.history?.length > 0) { + if (!jsonData && result.history?.length > 0) { const lastMessage = result.history[result.history.length - 1]; if (lastMessage?.content) { - return parseJsonToInvoice(lastMessage.content); + responseText = lastMessage.content; + jsonData = tryExtractJson(responseText); } } - return null; + // If JSON is invalid, retry with correction request + let retries = 0; + while (!jsonData && retries < maxRetries) { + retries++; + console.log(` [${queryId}] Invalid JSON, requesting correction (retry ${retries}/${maxRetries})...`); + + result = await orchestrator.continueTask( + `Your response was not valid JSON. Please output ONLY the JSON object with no markdown, no explanation, no thinking tags. Just the raw JSON starting with { and ending with }. Format: +{"invoice_number":"X","invoice_date":"YYYY-MM-DD","vendor_name":"X","currency":"EUR","net_amount":0,"vat_amount":0,"total_amount":0}` + ); + + elapsed = ((Date.now() - startTime) / 1000).toFixed(1); + console.log(` [${queryId}] Retry ${retries}: ${result.status} (${elapsed}s)`); + + responseText = result.result || ''; + if (responseText) { + jsonData = tryExtractJson(responseText); + } + + if (!jsonData && result.history?.length > 0) { + const lastMessage = result.history[result.history.length - 1]; + if (lastMessage?.content) { + responseText = lastMessage.content; + jsonData = tryExtractJson(responseText); + } + } + } + + if (!jsonData) { + console.log(` [${queryId}] Failed to get valid JSON after ${retries} retries`); + return null; + } + + console.log(` [${queryId}] Valid JSON extracted`); + return { + invoice_number: extractInvoiceNumber(String(jsonData.invoice_number || '')), + invoice_date: extractDate(String(jsonData.invoice_date || '')), + vendor_name: String(jsonData.vendor_name || '').replace(/\*\*/g, '').replace(/`/g, '').trim(), + currency: extractCurrency(String(jsonData.currency || '')), + net_amount: parseAmount(jsonData.net_amount as string | number), + vat_amount: parseAmount(jsonData.vat_amount as string | number), + total_amount: parseAmount(jsonData.total_amount as string | number), + }; } catch (error) { const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); console.log(` [${queryId}] ERROR: ${error} (${elapsed}s)`);