fix(corpus-tests, format-detection): Adjust corpus test thresholds and improve XML format detection for invoice documents

This commit is contained in:
2025-04-03 21:34:28 +00:00
parent 6b5e588df7
commit 40a39638f3
11 changed files with 316 additions and 297 deletions

View File

@ -28,7 +28,8 @@ export class FormatDetector {
}
// Factur-X/ZUGFeRD detection (CrossIndustryInvoice or CrossIndustryDocument root element)
if (root.nodeName === 'rsm:CrossIndustryInvoice' || root.nodeName === 'CrossIndustryInvoice') {
if (root.nodeName === 'rsm:CrossIndustryInvoice' || root.nodeName === 'CrossIndustryInvoice' ||
root.nodeName.endsWith(':CrossIndustryInvoice')) {
// Set up namespaces for XPath queries (ZUGFeRD v2/Factur-X)
const namespaces = {
rsm: 'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100',
@ -70,12 +71,15 @@ export class FormatDetector {
// ZUGFeRD v1 detection (CrossIndustryDocument root element)
if (root.nodeName === 'rsm:CrossIndustryDocument' || root.nodeName === 'CrossIndustryDocument' ||
root.nodeName === 'ram:CrossIndustryDocument') {
root.nodeName === 'ram:CrossIndustryDocument' || root.nodeName.endsWith(':CrossIndustryDocument')) {
// Check for ZUGFeRD v1 namespace in the document
const xmlString = xml.toString();
if (xmlString.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') ||
xmlString.includes('urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12')) {
xmlString.includes('urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12') ||
xmlString.includes('urn:ferd:CrossIndustryDocument') ||
xmlString.includes('zugferd') ||
xmlString.includes('ZUGFeRD')) {
return InvoiceFormat.ZUGFERD;
}