fix(core): Improve PDF XML extraction, embedding, and format detection; update loadPdf/exportPdf error handling; add new validator implementations and enhance IPdf metadata.

This commit is contained in:
2025-04-04 12:14:41 +00:00
parent 68fd50fd4c
commit 5d43c1ce4e
15 changed files with 1957 additions and 418 deletions

View File

@ -1,6 +1,6 @@
# XInvoice Corpus Testing Summary
Generated on: 2025-04-03T21:33:20.326Z
Generated on: 2025-04-04T12:11:35.722Z
## Overall Summary

View File

@ -1,7 +1,7 @@
{
"cii": {
"success": 27,
"fail": 0,
"success": 23,
"fail": 4,
"details": [
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/EN16931_1_Teilrechnung.cii.xml",
@ -137,27 +137,27 @@
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Betriebskostenabrechnung.cii.xml",
"success": true,
"format": "cii",
"error": null
"success": false,
"format": "xrechnung",
"error": "Wrong format detected: xrechnung, expected: cii"
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Einfach.cii.xml",
"success": true,
"format": "cii",
"error": null
"success": false,
"format": "xrechnung",
"error": "Wrong format detected: xrechnung, expected: cii"
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Elektron.cii.xml",
"success": true,
"format": "cii",
"error": null
"success": false,
"format": "xrechnung",
"error": "Wrong format detected: xrechnung, expected: cii"
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Reisekostenabrechnung.cii.xml",
"success": true,
"format": "cii",
"error": null
"success": false,
"format": "xrechnung",
"error": "Wrong format detected: xrechnung, expected: cii"
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/not_validating_full_invoice_based_onTest_EeISI_300_CENfullmodel.cii.xml",
@ -174,133 +174,133 @@
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_1_Teilrechnung.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_2_Teilrechnung.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_AbweichenderZahlungsempf.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Betriebskostenabrechnung.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Einfach.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Einfach_DueDate.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Einfach_negativePaymentDue.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Elektron.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_ElektronischeAdresse.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Gutschrift.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Haftpflichtversicherung_Versicherungssteuer.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Innergemeinschaftliche_Lieferungen.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Kraftfahrversicherung_Bruttopreise.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Miete.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_OEPNV.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Physiotherapeut.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Rabatte.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_RechnungsUebertragung.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Rechnungskorrektur.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Reisekostenabrechnung.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_SEPA_Prenotification.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Sachversicherung_berechneter_Steuersatz.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
@ -330,13 +330,13 @@
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/not_validating_full_invoice_based_onTest_EeISI_300_CENfullmodel.ubl.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/ubl-tc434-creditnote1.xml",
"success": true,
"format": "xrechnung",
"format": "ubl",
"error": null
}
]
@ -346,5 +346,5 @@
"fail": 0,
"details": []
},
"totalSuccessRate": 1
"totalSuccessRate": 0.9272727272727272
}

View File

@ -1,13 +1,13 @@
{
"zugferdV1Correct": {
"success": 18,
"fail": 3,
"success": 21,
"fail": 0,
"details": [
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/4s4u/additional-data-sample-1.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "zugferd",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/Intarsys/ZUGFeRD_1p0_BASIC_Einfach.pdf",
@ -89,15 +89,15 @@
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/Mustangproject/MustangGnuaccountingBeispielRE-20140519_499.pdf",
"success": false,
"format": null,
"error": "Error: Unsupported invoice format: unknown"
"success": true,
"format": "zugferd",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/Mustangproject/MustangGnuaccountingBeispielRE-20140522_501.pdf",
"success": false,
"format": null,
"error": "Error: Unsupported invoice format: unknown"
"success": true,
"format": "zugferd",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/Mustangproject/MustangGnuaccountingBeispielRE-20140703_502.pdf",
@ -156,8 +156,8 @@
]
},
"zugferdV2Correct": {
"success": 48,
"fail": 30,
"success": 74,
"fail": 4,
"details": [
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/FNFE-factur-x-examples/Avoir_FR_type381_BASIC.pdf",
@ -221,183 +221,183 @@
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/PHP_@gpFacturX/sample_inofficial_20190125_atgp_factur-x_v_1_0.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/BASIC/zugferd_2p0_BASIC_Einfach.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "zugferd",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/BASIC/zugferd_2p0_BASIC_Rechnungskorrektur.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "zugferd",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/BASIC/zugferd_2p0_BASIC_Taxifahrt.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "zugferd",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_1_Teilrechnung.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_2_Teilrechnung.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_AbweichenderZahlungsempf.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Betriebskostenabrechnung.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Einfach.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Elektron.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_ElektronischeAdresse.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Gutschrift.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Haftpflichtversicherung_Versicherungssteuer.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Innergemeinschaftliche_Lieferungen.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"format": "xrechnung",
"error": "Wrong format detected: xrechnung"
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Kraftfahrversicherung_Bruttopreise.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Miete.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_OEPNV.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Physiotherapeut.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Rabatte.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_RechnungsUebertragung.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Rechnungskorrektur.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Reisekostenabrechnung.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_SEPA_Prenotification.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Sachversicherung_berechneter_Steuersatz.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_Fremdwaehrung.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "zugferd",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_InnergemeinschLieferungMehrereBestellungen.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "zugferd",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_Kostenrechnung.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "facturx",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_Rechnungskorrektur.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "zugferd",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_Warenrechnung.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "zugferd",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/MINIMUM/zugferd_2p0_MINIMUM.pdf",
"success": false,
"format": null,
"error": "Error: No XML found in PDF"
"success": true,
"format": "zugferd",
"error": null
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/BASIC/zugferd_2p1_BASIC_Einfach.pdf",
@ -455,9 +455,9 @@
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Betriebskostenabrechnung_XRechnung_embedded.pdf",
"success": true,
"format": "cii",
"error": null
"success": false,
"format": "xrechnung",
"error": "Wrong format detected: xrechnung"
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Einfach.pdf",
@ -485,9 +485,9 @@
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Elektron_XRechnung.pdf",
"success": true,
"format": "cii",
"error": null
"success": false,
"format": "xrechnung",
"error": "Wrong format detected: xrechnung"
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Elektron_embedded.pdf",
@ -569,9 +569,9 @@
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Reisekostenabrechnung_XRechnung_embedded.pdf",
"success": true,
"format": "cii",
"error": null
"success": false,
"format": "xrechnung",
"error": "Wrong format detected: xrechnung"
},
{
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_SEPA_Prenotification.pdf",
@ -749,5 +749,5 @@
}
]
},
"totalCorrectSuccessRate": 0.6666666666666666
"totalCorrectSuccessRate": 0.9595959595959596
}