fix(test): add JSON validation and retry logic to invoice extraction

- Add tryExtractJson function to validate JSON before accepting
- Use orchestrator.continueTask() to request correction if JSON is invalid
- Retry up to 2 times for malformed JSON responses
- Remove duplicate parseJsonToInvoice function
This commit is contained in:
2026-01-20 00:45:30 +00:00
parent d384c1d79b
commit d8bdb18841

View File

@@ -339,16 +339,20 @@ function extractCurrency(s: string | undefined): string {
} }
/** /**
* Extract JSON from response * Try to extract valid JSON from a response string
*/ */
function extractJsonFromResponse(response: string): Record<string, unknown> | null { function tryExtractJson(response: string): Record<string, unknown> | null {
let cleanResponse = response.replace(/<think>[\s\S]*?<\/think>/g, '').trim(); // Remove thinking tags
const codeBlockMatch = cleanResponse.match(/```(?:json)?\s*([\s\S]*?)```/); let clean = response.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : cleanResponse;
// Try code block
const codeBlockMatch = clean.match(/```(?:json)?\s*([\s\S]*?)```/);
const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : clean;
try { try {
return JSON.parse(jsonStr); return JSON.parse(jsonStr);
} catch { } catch {
// Try to find JSON object
const jsonMatch = jsonStr.match(/\{[\s\S]*\}/); const jsonMatch = jsonStr.match(/\{[\s\S]*\}/);
if (jsonMatch) { if (jsonMatch) {
try { try {
@@ -361,29 +365,13 @@ function extractJsonFromResponse(response: string): Record<string, unknown> | nu
} }
} }
/**
* Parse JSON response into IInvoice
*/
function parseJsonToInvoice(response: string): IInvoice | null {
const parsed = extractJsonFromResponse(response);
if (!parsed) return null;
return {
invoice_number: extractInvoiceNumber(String(parsed.invoice_number || '')),
invoice_date: extractDate(String(parsed.invoice_date || '')),
vendor_name: String(parsed.vendor_name || '').replace(/\*\*/g, '').replace(/`/g, '').trim(),
currency: extractCurrency(String(parsed.currency || '')),
net_amount: parseAmount(parsed.net_amount as string | number),
vat_amount: parseAmount(parsed.vat_amount as string | number),
total_amount: parseAmount(parsed.total_amount as string | number),
};
}
/** /**
* Extract invoice from markdown using smartagent DualAgentOrchestrator * Extract invoice from markdown using smartagent DualAgentOrchestrator
* Validates JSON and retries if invalid
*/ */
async function extractInvoiceFromMarkdown(markdown: string, queryId: string): Promise<IInvoice | null> { async function extractInvoiceFromMarkdown(markdown: string, queryId: string): Promise<IInvoice | null> {
const startTime = Date.now(); const startTime = Date.now();
const maxRetries = 2;
console.log(` [${queryId}] Invoice: ${markdown.length} chars`); console.log(` [${queryId}] Invoice: ${markdown.length} chars`);
@@ -395,25 +383,70 @@ ${markdown}
${JSON_EXTRACTION_PROMPT}`; ${JSON_EXTRACTION_PROMPT}`;
try { try {
const result = await orchestrator.run(taskPrompt); let result = await orchestrator.run(taskPrompt);
let elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
console.log(` [${queryId}] Status: ${result.status}, Iterations: ${result.iterations} (${elapsed}s)`); console.log(` [${queryId}] Status: ${result.status}, Iterations: ${result.iterations} (${elapsed}s)`);
if (result.success && result.result) { // Try to parse JSON from result
console.log(` [${queryId}] Result: ${result.result.substring(0, 100)}...`); let jsonData: Record<string, unknown> | null = null;
return parseJsonToInvoice(result.result); let responseText = result.result || '';
if (result.success && responseText) {
jsonData = tryExtractJson(responseText);
} }
// Fallback: try parsing from history // Fallback: try parsing from history
if (result.history?.length > 0) { if (!jsonData && result.history?.length > 0) {
const lastMessage = result.history[result.history.length - 1]; const lastMessage = result.history[result.history.length - 1];
if (lastMessage?.content) { if (lastMessage?.content) {
return parseJsonToInvoice(lastMessage.content); responseText = lastMessage.content;
jsonData = tryExtractJson(responseText);
} }
} }
return null; // If JSON is invalid, retry with correction request
let retries = 0;
while (!jsonData && retries < maxRetries) {
retries++;
console.log(` [${queryId}] Invalid JSON, requesting correction (retry ${retries}/${maxRetries})...`);
result = await orchestrator.continueTask(
`Your response was not valid JSON. Please output ONLY the JSON object with no markdown, no explanation, no thinking tags. Just the raw JSON starting with { and ending with }. Format:
{"invoice_number":"X","invoice_date":"YYYY-MM-DD","vendor_name":"X","currency":"EUR","net_amount":0,"vat_amount":0,"total_amount":0}`
);
elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
console.log(` [${queryId}] Retry ${retries}: ${result.status} (${elapsed}s)`);
responseText = result.result || '';
if (responseText) {
jsonData = tryExtractJson(responseText);
}
if (!jsonData && result.history?.length > 0) {
const lastMessage = result.history[result.history.length - 1];
if (lastMessage?.content) {
responseText = lastMessage.content;
jsonData = tryExtractJson(responseText);
}
}
}
if (!jsonData) {
console.log(` [${queryId}] Failed to get valid JSON after ${retries} retries`);
return null;
}
console.log(` [${queryId}] Valid JSON extracted`);
return {
invoice_number: extractInvoiceNumber(String(jsonData.invoice_number || '')),
invoice_date: extractDate(String(jsonData.invoice_date || '')),
vendor_name: String(jsonData.vendor_name || '').replace(/\*\*/g, '').replace(/`/g, '').trim(),
currency: extractCurrency(String(jsonData.currency || '')),
net_amount: parseAmount(jsonData.net_amount as string | number),
vat_amount: parseAmount(jsonData.vat_amount as string | number),
total_amount: parseAmount(jsonData.total_amount as string | number),
};
} catch (error) { } catch (error) {
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
console.log(` [${queryId}] ERROR: ${error} (${elapsed}s)`); console.log(` [${queryId}] ERROR: ${error} (${elapsed}s)`);