fix(test): add JSON validation and retry logic to invoice extraction
- Add tryExtractJson function to validate JSON before accepting - Use orchestrator.continueTask() to request correction if JSON is invalid - Retry up to 2 times for malformed JSON responses - Remove duplicate parseJsonToInvoice function
This commit is contained in:
@@ -339,16 +339,20 @@ function extractCurrency(s: string | undefined): string {
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract JSON from response
|
||||
* Try to extract valid JSON from a response string
|
||||
*/
|
||||
function extractJsonFromResponse(response: string): Record<string, unknown> | null {
|
||||
let cleanResponse = response.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
||||
const codeBlockMatch = cleanResponse.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||
const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : cleanResponse;
|
||||
function tryExtractJson(response: string): Record<string, unknown> | null {
|
||||
// Remove thinking tags
|
||||
let clean = response.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
||||
|
||||
// Try code block
|
||||
const codeBlockMatch = clean.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||
const jsonStr = codeBlockMatch ? codeBlockMatch[1].trim() : clean;
|
||||
|
||||
try {
|
||||
return JSON.parse(jsonStr);
|
||||
} catch {
|
||||
// Try to find JSON object
|
||||
const jsonMatch = jsonStr.match(/\{[\s\S]*\}/);
|
||||
if (jsonMatch) {
|
||||
try {
|
||||
@@ -361,29 +365,13 @@ function extractJsonFromResponse(response: string): Record<string, unknown> | nu
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse JSON response into IInvoice
|
||||
*/
|
||||
function parseJsonToInvoice(response: string): IInvoice | null {
|
||||
const parsed = extractJsonFromResponse(response);
|
||||
if (!parsed) return null;
|
||||
|
||||
return {
|
||||
invoice_number: extractInvoiceNumber(String(parsed.invoice_number || '')),
|
||||
invoice_date: extractDate(String(parsed.invoice_date || '')),
|
||||
vendor_name: String(parsed.vendor_name || '').replace(/\*\*/g, '').replace(/`/g, '').trim(),
|
||||
currency: extractCurrency(String(parsed.currency || '')),
|
||||
net_amount: parseAmount(parsed.net_amount as string | number),
|
||||
vat_amount: parseAmount(parsed.vat_amount as string | number),
|
||||
total_amount: parseAmount(parsed.total_amount as string | number),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract invoice from markdown using smartagent DualAgentOrchestrator
|
||||
* Validates JSON and retries if invalid
|
||||
*/
|
||||
async function extractInvoiceFromMarkdown(markdown: string, queryId: string): Promise<IInvoice | null> {
|
||||
const startTime = Date.now();
|
||||
const maxRetries = 2;
|
||||
|
||||
console.log(` [${queryId}] Invoice: ${markdown.length} chars`);
|
||||
|
||||
@@ -395,25 +383,70 @@ ${markdown}
|
||||
${JSON_EXTRACTION_PROMPT}`;
|
||||
|
||||
try {
|
||||
const result = await orchestrator.run(taskPrompt);
|
||||
|
||||
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||
let result = await orchestrator.run(taskPrompt);
|
||||
let elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||
console.log(` [${queryId}] Status: ${result.status}, Iterations: ${result.iterations} (${elapsed}s)`);
|
||||
|
||||
if (result.success && result.result) {
|
||||
console.log(` [${queryId}] Result: ${result.result.substring(0, 100)}...`);
|
||||
return parseJsonToInvoice(result.result);
|
||||
// Try to parse JSON from result
|
||||
let jsonData: Record<string, unknown> | null = null;
|
||||
let responseText = result.result || '';
|
||||
|
||||
if (result.success && responseText) {
|
||||
jsonData = tryExtractJson(responseText);
|
||||
}
|
||||
|
||||
// Fallback: try parsing from history
|
||||
if (result.history?.length > 0) {
|
||||
if (!jsonData && result.history?.length > 0) {
|
||||
const lastMessage = result.history[result.history.length - 1];
|
||||
if (lastMessage?.content) {
|
||||
return parseJsonToInvoice(lastMessage.content);
|
||||
responseText = lastMessage.content;
|
||||
jsonData = tryExtractJson(responseText);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
// If JSON is invalid, retry with correction request
|
||||
let retries = 0;
|
||||
while (!jsonData && retries < maxRetries) {
|
||||
retries++;
|
||||
console.log(` [${queryId}] Invalid JSON, requesting correction (retry ${retries}/${maxRetries})...`);
|
||||
|
||||
result = await orchestrator.continueTask(
|
||||
`Your response was not valid JSON. Please output ONLY the JSON object with no markdown, no explanation, no thinking tags. Just the raw JSON starting with { and ending with }. Format:
|
||||
{"invoice_number":"X","invoice_date":"YYYY-MM-DD","vendor_name":"X","currency":"EUR","net_amount":0,"vat_amount":0,"total_amount":0}`
|
||||
);
|
||||
|
||||
elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||
console.log(` [${queryId}] Retry ${retries}: ${result.status} (${elapsed}s)`);
|
||||
|
||||
responseText = result.result || '';
|
||||
if (responseText) {
|
||||
jsonData = tryExtractJson(responseText);
|
||||
}
|
||||
|
||||
if (!jsonData && result.history?.length > 0) {
|
||||
const lastMessage = result.history[result.history.length - 1];
|
||||
if (lastMessage?.content) {
|
||||
responseText = lastMessage.content;
|
||||
jsonData = tryExtractJson(responseText);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!jsonData) {
|
||||
console.log(` [${queryId}] Failed to get valid JSON after ${retries} retries`);
|
||||
return null;
|
||||
}
|
||||
|
||||
console.log(` [${queryId}] Valid JSON extracted`);
|
||||
return {
|
||||
invoice_number: extractInvoiceNumber(String(jsonData.invoice_number || '')),
|
||||
invoice_date: extractDate(String(jsonData.invoice_date || '')),
|
||||
vendor_name: String(jsonData.vendor_name || '').replace(/\*\*/g, '').replace(/`/g, '').trim(),
|
||||
currency: extractCurrency(String(jsonData.currency || '')),
|
||||
net_amount: parseAmount(jsonData.net_amount as string | number),
|
||||
vat_amount: parseAmount(jsonData.vat_amount as string | number),
|
||||
total_amount: parseAmount(jsonData.total_amount as string | number),
|
||||
};
|
||||
} catch (error) {
|
||||
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||
console.log(` [${queryId}] ERROR: ${error} (${elapsed}s)`);
|
||||
|
||||
Reference in New Issue
Block a user