fix(test): add JSON validation and retry logic to invoice extraction

- Add tryExtractJson function to validate JSON before accepting
- Use orchestrator.continueTask() to request correction if JSON is invalid
- Retry up to 2 times for malformed JSON responses
- Remove duplicate parseJsonToInvoice function
This commit is contained in:
2026-01-20 00:45:30 +00:00
parent d384c1d79b
commit d8bdb18841

View File

@@ -339,16 +339,20 @@ function extractCurrency(s: string | undefined): string {
}
/**
* Extract JSON from response
* Try to extract valid JSON from a response string
*/
function extractJsonFromResponse(response: string): Record<string, unknown> | null {
let cleanResponse = response.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
const codeBlockMatch = cleanResponse.match(/```(?:json)?\s*([\s\S]*?)```/);
const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : cleanResponse;
function tryExtractJson(response: string): Record<string, unknown> | null {
// Remove thinking tags
let clean = response.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
// Try code block
const codeBlockMatch = clean.match(/```(?:json)?\s*([\s\S]*?)```/);
const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : clean;
try {
return JSON.parse(jsonStr);
} catch {
// Try to find JSON object
const jsonMatch = jsonStr.match(/\{[\s\S]*\}/);
if (jsonMatch) {
try {
@@ -361,29 +365,13 @@ function extractJsonFromResponse(response: string): Record<string, unknown> | nu
}
}
/**
* Parse JSON response into IInvoice
*/
function parseJsonToInvoice(response: string): IInvoice | null {
const parsed = extractJsonFromResponse(response);
if (!parsed) return null;
return {
invoice_number: extractInvoiceNumber(String(parsed.invoice_number || '')),
invoice_date: extractDate(String(parsed.invoice_date || '')),
vendor_name: String(parsed.vendor_name || '').replace(/\*\*/g, '').replace(/`/g, '').trim(),
currency: extractCurrency(String(parsed.currency || '')),
net_amount: parseAmount(parsed.net_amount as string | number),
vat_amount: parseAmount(parsed.vat_amount as string | number),
total_amount: parseAmount(parsed.total_amount as string | number),
};
}
/**
* Extract invoice from markdown using smartagent DualAgentOrchestrator
* Validates JSON and retries if invalid
*/
async function extractInvoiceFromMarkdown(markdown: string, queryId: string): Promise<IInvoice | null> {
const startTime = Date.now();
const maxRetries = 2;
console.log(` [${queryId}] Invoice: ${markdown.length} chars`);
@@ -395,25 +383,70 @@ ${markdown}
${JSON_EXTRACTION_PROMPT}`;
try {
const result = await orchestrator.run(taskPrompt);
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
let result = await orchestrator.run(taskPrompt);
let elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
console.log(` [${queryId}] Status: ${result.status}, Iterations: ${result.iterations} (${elapsed}s)`);
if (result.success && result.result) {
console.log(` [${queryId}] Result: ${result.result.substring(0, 100)}...`);
return parseJsonToInvoice(result.result);
// Try to parse JSON from result
let jsonData: Record<string, unknown> | null = null;
let responseText = result.result || '';
if (result.success && responseText) {
jsonData = tryExtractJson(responseText);
}
// Fallback: try parsing from history
if (result.history?.length > 0) {
if (!jsonData && result.history?.length > 0) {
const lastMessage = result.history[result.history.length - 1];
if (lastMessage?.content) {
return parseJsonToInvoice(lastMessage.content);
responseText = lastMessage.content;
jsonData = tryExtractJson(responseText);
}
}
// If JSON is invalid, retry with correction request
let retries = 0;
while (!jsonData && retries < maxRetries) {
retries++;
console.log(` [${queryId}] Invalid JSON, requesting correction (retry ${retries}/${maxRetries})...`);
result = await orchestrator.continueTask(
`Your response was not valid JSON. Please output ONLY the JSON object with no markdown, no explanation, no thinking tags. Just the raw JSON starting with { and ending with }. Format:
{"invoice_number":"X","invoice_date":"YYYY-MM-DD","vendor_name":"X","currency":"EUR","net_amount":0,"vat_amount":0,"total_amount":0}`
);
elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
console.log(` [${queryId}] Retry ${retries}: ${result.status} (${elapsed}s)`);
responseText = result.result || '';
if (responseText) {
jsonData = tryExtractJson(responseText);
}
if (!jsonData && result.history?.length > 0) {
const lastMessage = result.history[result.history.length - 1];
if (lastMessage?.content) {
responseText = lastMessage.content;
jsonData = tryExtractJson(responseText);
}
}
}
if (!jsonData) {
console.log(` [${queryId}] Failed to get valid JSON after ${retries} retries`);
return null;
}
console.log(` [${queryId}] Valid JSON extracted`);
return {
invoice_number: extractInvoiceNumber(String(jsonData.invoice_number || '')),
invoice_date: extractDate(String(jsonData.invoice_date || '')),
vendor_name: String(jsonData.vendor_name || '').replace(/\*\*/g, '').replace(/`/g, '').trim(),
currency: extractCurrency(String(jsonData.currency || '')),
net_amount: parseAmount(jsonData.net_amount as string | number),
vat_amount: parseAmount(jsonData.vat_amount as string | number),
total_amount: parseAmount(jsonData.total_amount as string | number),
};
} catch (error) {
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
console.log(` [${queryId}] ERROR: ${error} (${elapsed}s)`);