fix(core): Improve PDF XML extraction, embedding, and format detection; update loadPdf/exportPdf error handling; add new validator implementations and enhance IPdf metadata.
This commit is contained in:
parent
68fd50fd4c
commit
5d43c1ce4e
10
changelog.md
10
changelog.md
@ -1,5 +1,15 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## 2025-04-04 - 4.1.6 - fix(core)
|
||||||
|
Improve PDF XML extraction, embedding, and format detection; update loadPdf/exportPdf error handling; add new validator implementations and enhance IPdf metadata.
|
||||||
|
|
||||||
|
- Update loadPdf to capture extraction result details including detected format and improve error messaging
|
||||||
|
- Enhance TextXMLExtractor with a chunked approach using both UTF-8 and Latin-1 decoding for reliable text extraction
|
||||||
|
- Refactor PDFEmbedder to return a structured PDFEmbedResult with proper filename normalization and robust error handling
|
||||||
|
- Extend format detection logic by adding quickFormatCheck, isUBLFormat, isXRechnungFormat, isCIIFormat, isZUGFERDV1Format, and FatturaPA checks
|
||||||
|
- Introduce new validator classes (UBLValidator, XRechnungValidator, FatturaPAValidator) and a generic fallback validator in ValidatorFactory
|
||||||
|
- Update IPdf interface to include embedded XML metadata (format, filename, description) for better traceability
|
||||||
|
|
||||||
## 2025-04-03 - 4.1.5 - fix(core)
|
## 2025-04-03 - 4.1.5 - fix(core)
|
||||||
No uncommitted changes detected in the repository. The project files and functionality remain unchanged.
|
No uncommitted changes detected in the repository. The project files and functionality remain unchanged.
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# XInvoice Corpus Testing Summary
|
# XInvoice Corpus Testing Summary
|
||||||
|
|
||||||
Generated on: 2025-04-03T21:33:20.326Z
|
Generated on: 2025-04-04T12:11:35.722Z
|
||||||
|
|
||||||
## Overall Summary
|
## Overall Summary
|
||||||
|
|
||||||
|
Binary file not shown.
@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"cii": {
|
"cii": {
|
||||||
"success": 27,
|
"success": 23,
|
||||||
"fail": 0,
|
"fail": 4,
|
||||||
"details": [
|
"details": [
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/EN16931_1_Teilrechnung.cii.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/EN16931_1_Teilrechnung.cii.xml",
|
||||||
@ -137,27 +137,27 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Betriebskostenabrechnung.cii.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Betriebskostenabrechnung.cii.xml",
|
||||||
"success": true,
|
"success": false,
|
||||||
"format": "cii",
|
"format": "xrechnung",
|
||||||
"error": null
|
"error": "Wrong format detected: xrechnung, expected: cii"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Einfach.cii.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Einfach.cii.xml",
|
||||||
"success": true,
|
"success": false,
|
||||||
"format": "cii",
|
"format": "xrechnung",
|
||||||
"error": null
|
"error": "Wrong format detected: xrechnung, expected: cii"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Elektron.cii.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Elektron.cii.xml",
|
||||||
"success": true,
|
"success": false,
|
||||||
"format": "cii",
|
"format": "xrechnung",
|
||||||
"error": null
|
"error": "Wrong format detected: xrechnung, expected: cii"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Reisekostenabrechnung.cii.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Reisekostenabrechnung.cii.xml",
|
||||||
"success": true,
|
"success": false,
|
||||||
"format": "cii",
|
"format": "xrechnung",
|
||||||
"error": null
|
"error": "Wrong format detected: xrechnung, expected: cii"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/not_validating_full_invoice_based_onTest_EeISI_300_CENfullmodel.cii.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/not_validating_full_invoice_based_onTest_EeISI_300_CENfullmodel.cii.xml",
|
||||||
@ -174,133 +174,133 @@
|
|||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_1_Teilrechnung.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_1_Teilrechnung.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_2_Teilrechnung.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_2_Teilrechnung.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_AbweichenderZahlungsempf.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_AbweichenderZahlungsempf.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Betriebskostenabrechnung.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Betriebskostenabrechnung.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Einfach.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Einfach.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Einfach_DueDate.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Einfach_DueDate.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Einfach_negativePaymentDue.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Einfach_negativePaymentDue.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Elektron.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Elektron.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_ElektronischeAdresse.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_ElektronischeAdresse.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Gutschrift.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Gutschrift.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Haftpflichtversicherung_Versicherungssteuer.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Haftpflichtversicherung_Versicherungssteuer.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Innergemeinschaftliche_Lieferungen.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Innergemeinschaftliche_Lieferungen.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Kraftfahrversicherung_Bruttopreise.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Kraftfahrversicherung_Bruttopreise.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Miete.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Miete.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_OEPNV.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_OEPNV.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Physiotherapeut.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Physiotherapeut.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Rabatte.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Rabatte.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_RechnungsUebertragung.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_RechnungsUebertragung.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Rechnungskorrektur.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Rechnungskorrektur.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Reisekostenabrechnung.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Reisekostenabrechnung.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_SEPA_Prenotification.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_SEPA_Prenotification.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Sachversicherung_berechneter_Steuersatz.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Sachversicherung_berechneter_Steuersatz.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -330,13 +330,13 @@
|
|||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/not_validating_full_invoice_based_onTest_EeISI_300_CENfullmodel.ubl.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/not_validating_full_invoice_based_onTest_EeISI_300_CENfullmodel.ubl.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/ubl-tc434-creditnote1.xml",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/ubl-tc434-creditnote1.xml",
|
||||||
"success": true,
|
"success": true,
|
||||||
"format": "xrechnung",
|
"format": "ubl",
|
||||||
"error": null
|
"error": null
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@ -346,5 +346,5 @@
|
|||||||
"fail": 0,
|
"fail": 0,
|
||||||
"details": []
|
"details": []
|
||||||
},
|
},
|
||||||
"totalSuccessRate": 1
|
"totalSuccessRate": 0.9272727272727272
|
||||||
}
|
}
|
@ -1,13 +1,13 @@
|
|||||||
{
|
{
|
||||||
"zugferdV1Correct": {
|
"zugferdV1Correct": {
|
||||||
"success": 18,
|
"success": 21,
|
||||||
"fail": 3,
|
"fail": 0,
|
||||||
"details": [
|
"details": [
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/4s4u/additional-data-sample-1.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/4s4u/additional-data-sample-1.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "zugferd",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/Intarsys/ZUGFeRD_1p0_BASIC_Einfach.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/Intarsys/ZUGFeRD_1p0_BASIC_Einfach.pdf",
|
||||||
@ -89,15 +89,15 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/Mustangproject/MustangGnuaccountingBeispielRE-20140519_499.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/Mustangproject/MustangGnuaccountingBeispielRE-20140519_499.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "zugferd",
|
||||||
"error": "Error: Unsupported invoice format: unknown"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/Mustangproject/MustangGnuaccountingBeispielRE-20140522_501.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/Mustangproject/MustangGnuaccountingBeispielRE-20140522_501.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "zugferd",
|
||||||
"error": "Error: Unsupported invoice format: unknown"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/Mustangproject/MustangGnuaccountingBeispielRE-20140703_502.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/Mustangproject/MustangGnuaccountingBeispielRE-20140703_502.pdf",
|
||||||
@ -156,8 +156,8 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"zugferdV2Correct": {
|
"zugferdV2Correct": {
|
||||||
"success": 48,
|
"success": 74,
|
||||||
"fail": 30,
|
"fail": 4,
|
||||||
"details": [
|
"details": [
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/FNFE-factur-x-examples/Avoir_FR_type381_BASIC.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/FNFE-factur-x-examples/Avoir_FR_type381_BASIC.pdf",
|
||||||
@ -221,183 +221,183 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/PHP_@gpFacturX/sample_inofficial_20190125_atgp_factur-x_v_1_0.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/PHP_@gpFacturX/sample_inofficial_20190125_atgp_factur-x_v_1_0.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/BASIC/zugferd_2p0_BASIC_Einfach.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/BASIC/zugferd_2p0_BASIC_Einfach.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "zugferd",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/BASIC/zugferd_2p0_BASIC_Rechnungskorrektur.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/BASIC/zugferd_2p0_BASIC_Rechnungskorrektur.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "zugferd",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/BASIC/zugferd_2p0_BASIC_Taxifahrt.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/BASIC/zugferd_2p0_BASIC_Taxifahrt.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "zugferd",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_1_Teilrechnung.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_1_Teilrechnung.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_2_Teilrechnung.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_2_Teilrechnung.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_AbweichenderZahlungsempf.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_AbweichenderZahlungsempf.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Betriebskostenabrechnung.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Betriebskostenabrechnung.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Einfach.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Einfach.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Elektron.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Elektron.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_ElektronischeAdresse.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_ElektronischeAdresse.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Gutschrift.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Gutschrift.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Haftpflichtversicherung_Versicherungssteuer.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Haftpflichtversicherung_Versicherungssteuer.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Innergemeinschaftliche_Lieferungen.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Innergemeinschaftliche_Lieferungen.pdf",
|
||||||
"success": false,
|
"success": false,
|
||||||
"format": null,
|
"format": "xrechnung",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": "Wrong format detected: xrechnung"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Kraftfahrversicherung_Bruttopreise.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Kraftfahrversicherung_Bruttopreise.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Miete.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Miete.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_OEPNV.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_OEPNV.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Physiotherapeut.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Physiotherapeut.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Rabatte.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Rabatte.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_RechnungsUebertragung.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_RechnungsUebertragung.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Rechnungskorrektur.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Rechnungskorrektur.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Reisekostenabrechnung.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Reisekostenabrechnung.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_SEPA_Prenotification.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_SEPA_Prenotification.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Sachversicherung_berechneter_Steuersatz.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Sachversicherung_berechneter_Steuersatz.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_Fremdwaehrung.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_Fremdwaehrung.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "zugferd",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_InnergemeinschLieferungMehrereBestellungen.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_InnergemeinschLieferungMehrereBestellungen.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "zugferd",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_Kostenrechnung.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_Kostenrechnung.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "facturx",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_Rechnungskorrektur.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_Rechnungskorrektur.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "zugferd",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_Warenrechnung.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_Warenrechnung.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "zugferd",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/MINIMUM/zugferd_2p0_MINIMUM.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/MINIMUM/zugferd_2p0_MINIMUM.pdf",
|
||||||
"success": false,
|
"success": true,
|
||||||
"format": null,
|
"format": "zugferd",
|
||||||
"error": "Error: No XML found in PDF"
|
"error": null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/BASIC/zugferd_2p1_BASIC_Einfach.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/BASIC/zugferd_2p1_BASIC_Einfach.pdf",
|
||||||
@ -455,9 +455,9 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Betriebskostenabrechnung_XRechnung_embedded.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Betriebskostenabrechnung_XRechnung_embedded.pdf",
|
||||||
"success": true,
|
"success": false,
|
||||||
"format": "cii",
|
"format": "xrechnung",
|
||||||
"error": null
|
"error": "Wrong format detected: xrechnung"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Einfach.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Einfach.pdf",
|
||||||
@ -485,9 +485,9 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Elektron_XRechnung.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Elektron_XRechnung.pdf",
|
||||||
"success": true,
|
"success": false,
|
||||||
"format": "cii",
|
"format": "xrechnung",
|
||||||
"error": null
|
"error": "Wrong format detected: xrechnung"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Elektron_embedded.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Elektron_embedded.pdf",
|
||||||
@ -569,9 +569,9 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Reisekostenabrechnung_XRechnung_embedded.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Reisekostenabrechnung_XRechnung_embedded.pdf",
|
||||||
"success": true,
|
"success": false,
|
||||||
"format": "cii",
|
"format": "xrechnung",
|
||||||
"error": null
|
"error": "Wrong format detected: xrechnung"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_SEPA_Prenotification.pdf",
|
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_SEPA_Prenotification.pdf",
|
||||||
@ -749,5 +749,5 @@
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"totalCorrectSuccessRate": 0.6666666666666666
|
"totalCorrectSuccessRate": 0.9595959595959596
|
||||||
}
|
}
|
@ -3,6 +3,6 @@
|
|||||||
*/
|
*/
|
||||||
export const commitinfo = {
|
export const commitinfo = {
|
||||||
name: '@fin.cx/xinvoice',
|
name: '@fin.cx/xinvoice',
|
||||||
version: '4.1.5',
|
version: '4.1.6',
|
||||||
description: 'A TypeScript module for creating, manipulating, and embedding XML data within PDF files specifically tailored for xinvoice packages.'
|
description: 'A TypeScript module for creating, manipulating, and embedding XML data within PDF files specifically tailored for xinvoice packages.'
|
||||||
}
|
}
|
||||||
|
@ -189,27 +189,31 @@ export class XInvoice {
|
|||||||
public async loadPdf(pdfBuffer: Uint8Array | Buffer, validate: boolean = false): Promise<XInvoice> {
|
public async loadPdf(pdfBuffer: Uint8Array | Buffer, validate: boolean = false): Promise<XInvoice> {
|
||||||
try {
|
try {
|
||||||
// Extract XML from PDF using the consolidated extractor
|
// Extract XML from PDF using the consolidated extractor
|
||||||
// which tries multiple extraction methods in sequence
|
const extractResult = await this.pdfExtractor.extractXml(pdfBuffer);
|
||||||
const xmlContent = await this.pdfExtractor.extractXml(pdfBuffer);
|
|
||||||
|
|
||||||
// Store the PDF buffer
|
// Store the PDF buffer
|
||||||
this.pdf = {
|
this.pdf = {
|
||||||
name: 'invoice.pdf',
|
name: 'invoice.pdf',
|
||||||
id: `invoice-${Date.now()}`,
|
id: `invoice-${Date.now()}`,
|
||||||
metadata: {
|
metadata: {
|
||||||
textExtraction: ''
|
textExtraction: '',
|
||||||
|
format: extractResult.success ? extractResult.format?.toString() : undefined
|
||||||
},
|
},
|
||||||
buffer: pdfBuffer instanceof Buffer ? new Uint8Array(pdfBuffer) : pdfBuffer
|
buffer: pdfBuffer instanceof Buffer ? new Uint8Array(pdfBuffer) : pdfBuffer
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!xmlContent) {
|
// Handle extraction result
|
||||||
// No XML found in PDF
|
if (!extractResult.success || !extractResult.xml) {
|
||||||
console.warn('No XML found in PDF');
|
const errorMessage = extractResult.error ? extractResult.error.message : 'Unknown error extracting XML from PDF';
|
||||||
throw new Error('No XML found in PDF');
|
console.warn('XML extraction failed:', errorMessage);
|
||||||
|
throw new Error(`No XML found in PDF: ${errorMessage}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load the extracted XML
|
// Load the extracted XML
|
||||||
await this.loadXml(xmlContent, validate);
|
await this.loadXml(extractResult.xml, validate);
|
||||||
|
|
||||||
|
// Store the detected format
|
||||||
|
this.detectedFormat = extractResult.format || InvoiceFormat.UNKNOWN;
|
||||||
|
|
||||||
return this;
|
return this;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@ -281,7 +285,7 @@ export class XInvoice {
|
|||||||
valid: false,
|
valid: false,
|
||||||
errors: [{
|
errors: [{
|
||||||
code: 'VAL-ERROR',
|
code: 'VAL-ERROR',
|
||||||
message: `Validation error: ${error.message}`
|
message: `Validation error: ${error instanceof Error ? error.message : String(error)}`
|
||||||
}],
|
}],
|
||||||
level
|
level
|
||||||
};
|
};
|
||||||
@ -356,7 +360,7 @@ export class XInvoice {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Embed XML into PDF
|
// Embed XML into PDF
|
||||||
const modifiedPdf = await this.pdfEmbedder.createPdfWithXml(
|
const result = await this.pdfEmbedder.createPdfWithXml(
|
||||||
this.pdf.buffer,
|
this.pdf.buffer,
|
||||||
xmlContent,
|
xmlContent,
|
||||||
filename,
|
filename,
|
||||||
@ -365,7 +369,14 @@ export class XInvoice {
|
|||||||
this.pdf.id
|
this.pdf.id
|
||||||
);
|
);
|
||||||
|
|
||||||
return modifiedPdf;
|
// Handle potential errors
|
||||||
|
if (!result.success || !result.pdf) {
|
||||||
|
const errorMessage = result.error ? result.error.message : 'Unknown error embedding XML into PDF';
|
||||||
|
console.error('Error exporting PDF:', errorMessage);
|
||||||
|
throw new Error(`Failed to export PDF: ${errorMessage}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.pdf;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -2,6 +2,7 @@ import { CIIBaseEncoder } from '../cii.encoder.js';
|
|||||||
import type { TInvoice, TCreditNote, TDebitNote } from '../../../interfaces/common.js';
|
import type { TInvoice, TCreditNote, TDebitNote } from '../../../interfaces/common.js';
|
||||||
import { ZUGFERD_PROFILE_IDS } from './zugferd.types.js';
|
import { ZUGFERD_PROFILE_IDS } from './zugferd.types.js';
|
||||||
import { CIIProfile } from '../cii.types.js';
|
import { CIIProfile } from '../cii.types.js';
|
||||||
|
import { DOMParser, XMLSerializer } from '../../../plugins.js';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Encoder for ZUGFeRD invoice format
|
* Encoder for ZUGFeRD invoice format
|
||||||
@ -19,12 +20,17 @@ export class ZUGFeRDEncoder extends CIIBaseEncoder {
|
|||||||
* @returns ZUGFeRD XML string
|
* @returns ZUGFeRD XML string
|
||||||
*/
|
*/
|
||||||
protected async encodeCreditNote(creditNote: TCreditNote): Promise<string> {
|
protected async encodeCreditNote(creditNote: TCreditNote): Promise<string> {
|
||||||
// Create XML root
|
// Create base XML
|
||||||
const xml = this.createXmlRoot();
|
const xmlDoc = this.createBaseXml();
|
||||||
|
|
||||||
// For now, return a basic XML structure
|
// Set document type code to credit note (381)
|
||||||
// In a real implementation, we would populate the XML with credit note data
|
this.setDocumentTypeCode(xmlDoc, '381');
|
||||||
return xml;
|
|
||||||
|
// Add common invoice data
|
||||||
|
this.addCommonInvoiceData(xmlDoc, creditNote);
|
||||||
|
|
||||||
|
// Serialize to string
|
||||||
|
return new XMLSerializer().serializeToString(xmlDoc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -33,11 +39,616 @@ export class ZUGFeRDEncoder extends CIIBaseEncoder {
|
|||||||
* @returns ZUGFeRD XML string
|
* @returns ZUGFeRD XML string
|
||||||
*/
|
*/
|
||||||
protected async encodeDebitNote(debitNote: TDebitNote): Promise<string> {
|
protected async encodeDebitNote(debitNote: TDebitNote): Promise<string> {
|
||||||
// Create XML root
|
// Create base XML
|
||||||
const xml = this.createXmlRoot();
|
const xmlDoc = this.createBaseXml();
|
||||||
|
|
||||||
// For now, return a basic XML structure
|
// Set document type code to invoice (380)
|
||||||
// In a real implementation, we would populate the XML with debit note data
|
this.setDocumentTypeCode(xmlDoc, '380');
|
||||||
return xml;
|
|
||||||
|
// Add common invoice data
|
||||||
|
this.addCommonInvoiceData(xmlDoc, debitNote);
|
||||||
|
|
||||||
|
// Serialize to string
|
||||||
|
return new XMLSerializer().serializeToString(xmlDoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a base ZUGFeRD XML document
|
||||||
|
* @returns XML document with basic structure
|
||||||
|
*/
|
||||||
|
private createBaseXml(): Document {
|
||||||
|
// Create XML document from template
|
||||||
|
const xmlString = this.createXmlRoot();
|
||||||
|
const doc = new DOMParser().parseFromString(xmlString, 'application/xml');
|
||||||
|
|
||||||
|
// Add ZUGFeRD profile
|
||||||
|
this.addProfile(doc);
|
||||||
|
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds ZUGFeRD profile information to the XML document
|
||||||
|
* @param doc XML document
|
||||||
|
*/
|
||||||
|
private addProfile(doc: Document): void {
|
||||||
|
// Get root element
|
||||||
|
const root = doc.documentElement;
|
||||||
|
|
||||||
|
// Create context element if it doesn't exist
|
||||||
|
let contextElement = root.getElementsByTagName('rsm:ExchangedDocumentContext')[0];
|
||||||
|
if (!contextElement) {
|
||||||
|
contextElement = doc.createElement('rsm:ExchangedDocumentContext');
|
||||||
|
root.appendChild(contextElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create guideline parameter element
|
||||||
|
const guidelineElement = doc.createElement('ram:GuidelineSpecifiedDocumentContextParameter');
|
||||||
|
contextElement.appendChild(guidelineElement);
|
||||||
|
|
||||||
|
// Add ID element with profile
|
||||||
|
const idElement = doc.createElement('ram:ID');
|
||||||
|
|
||||||
|
// Set profile based on the selected profile
|
||||||
|
let profileId = ZUGFERD_PROFILE_IDS.BASIC;
|
||||||
|
if (this.profile === CIIProfile.COMFORT) {
|
||||||
|
profileId = ZUGFERD_PROFILE_IDS.COMFORT;
|
||||||
|
} else if (this.profile === CIIProfile.EXTENDED) {
|
||||||
|
profileId = ZUGFERD_PROFILE_IDS.EXTENDED;
|
||||||
|
}
|
||||||
|
|
||||||
|
idElement.textContent = profileId;
|
||||||
|
guidelineElement.appendChild(idElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the document type code in the XML document
|
||||||
|
* @param doc XML document
|
||||||
|
* @param typeCode Document type code (380 for invoice, 381 for credit note)
|
||||||
|
*/
|
||||||
|
private setDocumentTypeCode(doc: Document, typeCode: string): void {
|
||||||
|
// Get root element
|
||||||
|
const root = doc.documentElement;
|
||||||
|
|
||||||
|
// Create document element if it doesn't exist
|
||||||
|
let documentElement = root.getElementsByTagName('rsm:ExchangedDocument')[0];
|
||||||
|
if (!documentElement) {
|
||||||
|
documentElement = doc.createElement('rsm:ExchangedDocument');
|
||||||
|
root.appendChild(documentElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add type code element
|
||||||
|
const typeCodeElement = doc.createElement('ram:TypeCode');
|
||||||
|
typeCodeElement.textContent = typeCode;
|
||||||
|
documentElement.appendChild(typeCodeElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds common invoice data to the XML document
|
||||||
|
* @param doc XML document
|
||||||
|
* @param invoice Invoice data
|
||||||
|
*/
|
||||||
|
private addCommonInvoiceData(doc: Document, invoice: TInvoice): void {
|
||||||
|
// Get root element
|
||||||
|
const root = doc.documentElement;
|
||||||
|
|
||||||
|
// Get document element or create it
|
||||||
|
let documentElement = root.getElementsByTagName('rsm:ExchangedDocument')[0];
|
||||||
|
if (!documentElement) {
|
||||||
|
documentElement = doc.createElement('rsm:ExchangedDocument');
|
||||||
|
root.appendChild(documentElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add ID element
|
||||||
|
const idElement = doc.createElement('ram:ID');
|
||||||
|
idElement.textContent = invoice.id;
|
||||||
|
documentElement.appendChild(idElement);
|
||||||
|
|
||||||
|
// Add issue date element
|
||||||
|
const issueDateElement = doc.createElement('ram:IssueDateTime');
|
||||||
|
const dateStringElement = doc.createElement('udt:DateTimeString');
|
||||||
|
dateStringElement.setAttribute('format', '102'); // YYYYMMDD format
|
||||||
|
dateStringElement.textContent = this.formatDateYYYYMMDD(invoice.date);
|
||||||
|
issueDateElement.appendChild(dateStringElement);
|
||||||
|
documentElement.appendChild(issueDateElement);
|
||||||
|
|
||||||
|
// Add notes if available
|
||||||
|
if (invoice.notes && invoice.notes.length > 0) {
|
||||||
|
for (const note of invoice.notes) {
|
||||||
|
const noteElement = doc.createElement('ram:IncludedNote');
|
||||||
|
const contentElement = doc.createElement('ram:Content');
|
||||||
|
contentElement.textContent = note;
|
||||||
|
noteElement.appendChild(contentElement);
|
||||||
|
documentElement.appendChild(noteElement);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create transaction element if it doesn't exist
|
||||||
|
let transactionElement = root.getElementsByTagName('rsm:SupplyChainTradeTransaction')[0];
|
||||||
|
if (!transactionElement) {
|
||||||
|
transactionElement = doc.createElement('rsm:SupplyChainTradeTransaction');
|
||||||
|
root.appendChild(transactionElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add agreement section with seller and buyer
|
||||||
|
this.addAgreementSection(doc, transactionElement, invoice);
|
||||||
|
|
||||||
|
// Add delivery section
|
||||||
|
this.addDeliverySection(doc, transactionElement, invoice);
|
||||||
|
|
||||||
|
// Add settlement section with payment terms and totals
|
||||||
|
this.addSettlementSection(doc, transactionElement, invoice);
|
||||||
|
|
||||||
|
// Add line items
|
||||||
|
this.addLineItems(doc, transactionElement, invoice);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds agreement section with seller and buyer information
|
||||||
|
* @param doc XML document
|
||||||
|
* @param transactionElement Transaction element
|
||||||
|
* @param invoice Invoice data
|
||||||
|
*/
|
||||||
|
private addAgreementSection(doc: Document, transactionElement: Element, invoice: TInvoice): void {
|
||||||
|
// Create agreement element
|
||||||
|
const agreementElement = doc.createElement('ram:ApplicableHeaderTradeAgreement');
|
||||||
|
transactionElement.appendChild(agreementElement);
|
||||||
|
|
||||||
|
// Add buyer reference if available
|
||||||
|
if (invoice.buyerReference) {
|
||||||
|
const buyerRefElement = doc.createElement('ram:BuyerReference');
|
||||||
|
buyerRefElement.textContent = invoice.buyerReference;
|
||||||
|
agreementElement.appendChild(buyerRefElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add seller
|
||||||
|
const sellerElement = doc.createElement('ram:SellerTradeParty');
|
||||||
|
this.addPartyInfo(doc, sellerElement, invoice.from);
|
||||||
|
|
||||||
|
// Add seller electronic address if available
|
||||||
|
if (invoice.electronicAddress && invoice.from.type === 'company') {
|
||||||
|
const contactElement = doc.createElement('ram:DefinedTradeContact');
|
||||||
|
const uriElement = doc.createElement('ram:URIID');
|
||||||
|
uriElement.setAttribute('schemeID', invoice.electronicAddress.scheme);
|
||||||
|
uriElement.textContent = invoice.electronicAddress.value;
|
||||||
|
contactElement.appendChild(uriElement);
|
||||||
|
sellerElement.appendChild(contactElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
agreementElement.appendChild(sellerElement);
|
||||||
|
|
||||||
|
// Add buyer
|
||||||
|
const buyerElement = doc.createElement('ram:BuyerTradeParty');
|
||||||
|
this.addPartyInfo(doc, buyerElement, invoice.to);
|
||||||
|
agreementElement.appendChild(buyerElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds party information to an element
|
||||||
|
* @param doc XML document
|
||||||
|
* @param partyElement Party element
|
||||||
|
* @param party Party data
|
||||||
|
*/
|
||||||
|
private addPartyInfo(doc: Document, partyElement: Element, party: any): void {
|
||||||
|
// Add name
|
||||||
|
const nameElement = doc.createElement('ram:Name');
|
||||||
|
nameElement.textContent = party.name;
|
||||||
|
partyElement.appendChild(nameElement);
|
||||||
|
|
||||||
|
// Add postal address
|
||||||
|
const addressElement = doc.createElement('ram:PostalTradeAddress');
|
||||||
|
|
||||||
|
// Add address line 1 (street)
|
||||||
|
if (party.address.streetName) {
|
||||||
|
const line1Element = doc.createElement('ram:LineOne');
|
||||||
|
line1Element.textContent = party.address.streetName;
|
||||||
|
addressElement.appendChild(line1Element);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add address line 2 (house number) if present
|
||||||
|
if (party.address.houseNumber && party.address.houseNumber !== '0') {
|
||||||
|
const line2Element = doc.createElement('ram:LineTwo');
|
||||||
|
line2Element.textContent = party.address.houseNumber;
|
||||||
|
addressElement.appendChild(line2Element);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add postal code
|
||||||
|
if (party.address.postalCode) {
|
||||||
|
const postalCodeElement = doc.createElement('ram:PostcodeCode');
|
||||||
|
postalCodeElement.textContent = party.address.postalCode;
|
||||||
|
addressElement.appendChild(postalCodeElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add city
|
||||||
|
if (party.address.city) {
|
||||||
|
const cityElement = doc.createElement('ram:CityName');
|
||||||
|
cityElement.textContent = party.address.city;
|
||||||
|
addressElement.appendChild(cityElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add country
|
||||||
|
if (party.address.country || party.address.countryCode) {
|
||||||
|
const countryElement = doc.createElement('ram:CountryID');
|
||||||
|
countryElement.textContent = party.address.countryCode || party.address.country;
|
||||||
|
addressElement.appendChild(countryElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
partyElement.appendChild(addressElement);
|
||||||
|
|
||||||
|
// Add VAT ID if available
|
||||||
|
if (party.registrationDetails && party.registrationDetails.vatId) {
|
||||||
|
const taxRegistrationElement = doc.createElement('ram:SpecifiedTaxRegistration');
|
||||||
|
const taxIdElement = doc.createElement('ram:ID');
|
||||||
|
taxIdElement.setAttribute('schemeID', 'VA');
|
||||||
|
taxIdElement.textContent = party.registrationDetails.vatId;
|
||||||
|
taxRegistrationElement.appendChild(taxIdElement);
|
||||||
|
partyElement.appendChild(taxRegistrationElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add registration ID if available
|
||||||
|
if (party.registrationDetails && party.registrationDetails.registrationId) {
|
||||||
|
const regRegistrationElement = doc.createElement('ram:SpecifiedTaxRegistration');
|
||||||
|
const regIdElement = doc.createElement('ram:ID');
|
||||||
|
regIdElement.setAttribute('schemeID', 'FC');
|
||||||
|
regIdElement.textContent = party.registrationDetails.registrationId;
|
||||||
|
regRegistrationElement.appendChild(regIdElement);
|
||||||
|
partyElement.appendChild(regRegistrationElement);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds delivery section with delivery information
|
||||||
|
* @param doc XML document
|
||||||
|
* @param transactionElement Transaction element
|
||||||
|
* @param invoice Invoice data
|
||||||
|
*/
|
||||||
|
private addDeliverySection(doc: Document, transactionElement: Element, invoice: TInvoice): void {
|
||||||
|
// Create delivery element
|
||||||
|
const deliveryElement = doc.createElement('ram:ApplicableHeaderTradeDelivery');
|
||||||
|
transactionElement.appendChild(deliveryElement);
|
||||||
|
|
||||||
|
// Add delivery date if available
|
||||||
|
if (invoice.deliveryDate) {
|
||||||
|
const deliveryDateElement = doc.createElement('ram:ActualDeliverySupplyChainEvent');
|
||||||
|
const occurrenceDateElement = doc.createElement('ram:OccurrenceDateTime');
|
||||||
|
const dateStringElement = doc.createElement('udt:DateTimeString');
|
||||||
|
dateStringElement.setAttribute('format', '102'); // YYYYMMDD format
|
||||||
|
dateStringElement.textContent = this.formatDateYYYYMMDD(invoice.deliveryDate);
|
||||||
|
occurrenceDateElement.appendChild(dateStringElement);
|
||||||
|
deliveryDateElement.appendChild(occurrenceDateElement);
|
||||||
|
deliveryElement.appendChild(deliveryDateElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add period of performance if available
|
||||||
|
if (invoice.periodOfPerformance) {
|
||||||
|
const periodElement = doc.createElement('ram:BillingSpecifiedPeriod');
|
||||||
|
|
||||||
|
// Start date
|
||||||
|
if (invoice.periodOfPerformance.from) {
|
||||||
|
const startDateElement = doc.createElement('ram:StartDateTime');
|
||||||
|
const startDateStringElement = doc.createElement('udt:DateTimeString');
|
||||||
|
startDateStringElement.setAttribute('format', '102'); // YYYYMMDD format
|
||||||
|
startDateStringElement.textContent = this.formatDateYYYYMMDD(invoice.periodOfPerformance.from);
|
||||||
|
startDateElement.appendChild(startDateStringElement);
|
||||||
|
periodElement.appendChild(startDateElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
// End date
|
||||||
|
if (invoice.periodOfPerformance.to) {
|
||||||
|
const endDateElement = doc.createElement('ram:EndDateTime');
|
||||||
|
const endDateStringElement = doc.createElement('udt:DateTimeString');
|
||||||
|
endDateStringElement.setAttribute('format', '102'); // YYYYMMDD format
|
||||||
|
endDateStringElement.textContent = this.formatDateYYYYMMDD(invoice.periodOfPerformance.to);
|
||||||
|
endDateElement.appendChild(endDateStringElement);
|
||||||
|
periodElement.appendChild(endDateElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
deliveryElement.appendChild(periodElement);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds settlement section with payment terms and totals
|
||||||
|
* @param doc XML document
|
||||||
|
* @param transactionElement Transaction element
|
||||||
|
* @param invoice Invoice data
|
||||||
|
*/
|
||||||
|
private addSettlementSection(doc: Document, transactionElement: Element, invoice: TInvoice): void {
|
||||||
|
// Create settlement element
|
||||||
|
const settlementElement = doc.createElement('ram:ApplicableHeaderTradeSettlement');
|
||||||
|
transactionElement.appendChild(settlementElement);
|
||||||
|
|
||||||
|
// Add currency
|
||||||
|
const currencyElement = doc.createElement('ram:InvoiceCurrencyCode');
|
||||||
|
currencyElement.textContent = invoice.currency;
|
||||||
|
settlementElement.appendChild(currencyElement);
|
||||||
|
|
||||||
|
// Add payment terms
|
||||||
|
const paymentTermsElement = doc.createElement('ram:SpecifiedTradePaymentTerms');
|
||||||
|
|
||||||
|
// Add payment instructions if available
|
||||||
|
if (invoice.paymentOptions) {
|
||||||
|
// Add payment instructions as description - this is generic enough to work with any payment type
|
||||||
|
const descriptionElement = doc.createElement('ram:Description');
|
||||||
|
descriptionElement.textContent = `Due in ${invoice.dueInDays} days. ${invoice.paymentOptions.info || ''}`;
|
||||||
|
paymentTermsElement.appendChild(descriptionElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add due date
|
||||||
|
const dueDateElement = doc.createElement('ram:DueDateDateTime');
|
||||||
|
const dateStringElement = doc.createElement('udt:DateTimeString');
|
||||||
|
dateStringElement.setAttribute('format', '102'); // YYYYMMDD format
|
||||||
|
|
||||||
|
// Calculate due date
|
||||||
|
const dueDate = new Date(invoice.date);
|
||||||
|
dueDate.setDate(dueDate.getDate() + invoice.dueInDays);
|
||||||
|
|
||||||
|
dateStringElement.textContent = this.formatDateYYYYMMDD(dueDate.getTime());
|
||||||
|
dueDateElement.appendChild(dateStringElement);
|
||||||
|
paymentTermsElement.appendChild(dueDateElement);
|
||||||
|
|
||||||
|
settlementElement.appendChild(paymentTermsElement);
|
||||||
|
|
||||||
|
// Add payment means if available (using a generic approach)
|
||||||
|
if (invoice.paymentOptions) {
|
||||||
|
const paymentMeansElement = doc.createElement('ram:SpecifiedTradeSettlementPaymentMeans');
|
||||||
|
|
||||||
|
// Payment type code (58 for SEPA transfer as default)
|
||||||
|
const typeCodeElement = doc.createElement('ram:TypeCode');
|
||||||
|
typeCodeElement.textContent = '58';
|
||||||
|
paymentMeansElement.appendChild(typeCodeElement);
|
||||||
|
|
||||||
|
// Information (optional)
|
||||||
|
if (invoice.paymentOptions.info) {
|
||||||
|
const infoElement = doc.createElement('ram:Information');
|
||||||
|
infoElement.textContent = invoice.paymentOptions.info;
|
||||||
|
paymentMeansElement.appendChild(infoElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If payment details are available in a standard format
|
||||||
|
if (invoice.paymentOptions.sepaConnection.iban) {
|
||||||
|
// Payee account
|
||||||
|
const payeeAccountElement = doc.createElement('ram:PayeePartyCreditorFinancialAccount');
|
||||||
|
const ibanElement = doc.createElement('ram:IBANID');
|
||||||
|
ibanElement.textContent = invoice.paymentOptions.sepaConnection.iban;
|
||||||
|
payeeAccountElement.appendChild(ibanElement);
|
||||||
|
paymentMeansElement.appendChild(payeeAccountElement);
|
||||||
|
|
||||||
|
// Payee financial institution if BIC available
|
||||||
|
if (invoice.paymentOptions.sepaConnection.bic) {
|
||||||
|
const institutionElement = doc.createElement('ram:PayeeSpecifiedCreditorFinancialInstitution');
|
||||||
|
const bicElement = doc.createElement('ram:BICID');
|
||||||
|
bicElement.textContent = invoice.paymentOptions.sepaConnection.bic;
|
||||||
|
institutionElement.appendChild(bicElement);
|
||||||
|
paymentMeansElement.appendChild(institutionElement);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
settlementElement.appendChild(paymentMeansElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add tax details
|
||||||
|
this.addTaxDetails(doc, settlementElement, invoice);
|
||||||
|
|
||||||
|
// Add totals
|
||||||
|
this.addMonetarySummation(doc, settlementElement, invoice);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds tax details to the settlement section
|
||||||
|
* @param doc XML document
|
||||||
|
* @param settlementElement Settlement element
|
||||||
|
* @param invoice Invoice data
|
||||||
|
*/
|
||||||
|
private addTaxDetails(doc: Document, settlementElement: Element, invoice: TInvoice): void {
|
||||||
|
// Calculate tax categories and totals
|
||||||
|
const taxCategories = new Map<number, number>(); // Map of VAT rate to net amount
|
||||||
|
|
||||||
|
// Calculate from items
|
||||||
|
if (invoice.items) {
|
||||||
|
for (const item of invoice.items) {
|
||||||
|
const itemNetAmount = item.unitNetPrice * item.unitQuantity;
|
||||||
|
const vatRate = item.vatPercentage;
|
||||||
|
|
||||||
|
const currentAmount = taxCategories.get(vatRate) || 0;
|
||||||
|
taxCategories.set(vatRate, currentAmount + itemNetAmount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add each tax category
|
||||||
|
for (const [rate, baseAmount] of taxCategories.entries()) {
|
||||||
|
const taxElement = doc.createElement('ram:ApplicableTradeTax');
|
||||||
|
|
||||||
|
// Calculate tax amount
|
||||||
|
const taxAmount = baseAmount * (rate / 100);
|
||||||
|
|
||||||
|
// Add calculated amount
|
||||||
|
const calculatedAmountElement = doc.createElement('ram:CalculatedAmount');
|
||||||
|
calculatedAmountElement.textContent = taxAmount.toFixed(2);
|
||||||
|
taxElement.appendChild(calculatedAmountElement);
|
||||||
|
|
||||||
|
// Add type code (VAT)
|
||||||
|
const typeCodeElement = doc.createElement('ram:TypeCode');
|
||||||
|
typeCodeElement.textContent = 'VAT';
|
||||||
|
taxElement.appendChild(typeCodeElement);
|
||||||
|
|
||||||
|
// Add basis amount
|
||||||
|
const basisAmountElement = doc.createElement('ram:BasisAmount');
|
||||||
|
basisAmountElement.textContent = baseAmount.toFixed(2);
|
||||||
|
taxElement.appendChild(basisAmountElement);
|
||||||
|
|
||||||
|
// Add category code
|
||||||
|
const categoryCodeElement = doc.createElement('ram:CategoryCode');
|
||||||
|
categoryCodeElement.textContent = invoice.reverseCharge ? 'AE' : 'S';
|
||||||
|
taxElement.appendChild(categoryCodeElement);
|
||||||
|
|
||||||
|
// Add rate
|
||||||
|
const rateElement = doc.createElement('ram:RateApplicablePercent');
|
||||||
|
rateElement.textContent = rate.toString();
|
||||||
|
taxElement.appendChild(rateElement);
|
||||||
|
|
||||||
|
settlementElement.appendChild(taxElement);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds monetary summation to the settlement section
|
||||||
|
* @param doc XML document
|
||||||
|
* @param settlementElement Settlement element
|
||||||
|
* @param invoice Invoice data
|
||||||
|
*/
|
||||||
|
private addMonetarySummation(doc: Document, settlementElement: Element, invoice: TInvoice): void {
|
||||||
|
const monetarySummationElement = doc.createElement('ram:SpecifiedTradeSettlementHeaderMonetarySummation');
|
||||||
|
|
||||||
|
// Calculate totals
|
||||||
|
let totalNetAmount = 0;
|
||||||
|
let totalTaxAmount = 0;
|
||||||
|
|
||||||
|
// Calculate from items
|
||||||
|
if (invoice.items) {
|
||||||
|
for (const item of invoice.items) {
|
||||||
|
const itemNetAmount = item.unitNetPrice * item.unitQuantity;
|
||||||
|
const itemTaxAmount = itemNetAmount * (item.vatPercentage / 100);
|
||||||
|
|
||||||
|
totalNetAmount += itemNetAmount;
|
||||||
|
totalTaxAmount += itemTaxAmount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const totalGrossAmount = totalNetAmount + totalTaxAmount;
|
||||||
|
|
||||||
|
// Add line total amount
|
||||||
|
const lineTotalElement = doc.createElement('ram:LineTotalAmount');
|
||||||
|
lineTotalElement.textContent = totalNetAmount.toFixed(2);
|
||||||
|
monetarySummationElement.appendChild(lineTotalElement);
|
||||||
|
|
||||||
|
// Add tax total amount
|
||||||
|
const taxTotalElement = doc.createElement('ram:TaxTotalAmount');
|
||||||
|
taxTotalElement.textContent = totalTaxAmount.toFixed(2);
|
||||||
|
taxTotalElement.setAttribute('currencyID', invoice.currency);
|
||||||
|
monetarySummationElement.appendChild(taxTotalElement);
|
||||||
|
|
||||||
|
// Add grand total amount
|
||||||
|
const grandTotalElement = doc.createElement('ram:GrandTotalAmount');
|
||||||
|
grandTotalElement.textContent = totalGrossAmount.toFixed(2);
|
||||||
|
monetarySummationElement.appendChild(grandTotalElement);
|
||||||
|
|
||||||
|
// Add due payable amount
|
||||||
|
const duePayableElement = doc.createElement('ram:DuePayableAmount');
|
||||||
|
duePayableElement.textContent = totalGrossAmount.toFixed(2);
|
||||||
|
monetarySummationElement.appendChild(duePayableElement);
|
||||||
|
|
||||||
|
settlementElement.appendChild(monetarySummationElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds line items to the XML document
|
||||||
|
* @param doc XML document
|
||||||
|
* @param transactionElement Transaction element
|
||||||
|
* @param invoice Invoice data
|
||||||
|
*/
|
||||||
|
private addLineItems(doc: Document, transactionElement: Element, invoice: TInvoice): void {
|
||||||
|
// Add each line item
|
||||||
|
if (invoice.items) {
|
||||||
|
for (const item of invoice.items) {
|
||||||
|
// Create line item element
|
||||||
|
const lineItemElement = doc.createElement('ram:IncludedSupplyChainTradeLineItem');
|
||||||
|
|
||||||
|
// Add line ID
|
||||||
|
const lineIdElement = doc.createElement('ram:AssociatedDocumentLineDocument');
|
||||||
|
const lineIdValueElement = doc.createElement('ram:LineID');
|
||||||
|
lineIdValueElement.textContent = item.position.toString();
|
||||||
|
lineIdElement.appendChild(lineIdValueElement);
|
||||||
|
lineItemElement.appendChild(lineIdElement);
|
||||||
|
|
||||||
|
// Add product information
|
||||||
|
const productElement = doc.createElement('ram:SpecifiedTradeProduct');
|
||||||
|
|
||||||
|
// Add name
|
||||||
|
const nameElement = doc.createElement('ram:Name');
|
||||||
|
nameElement.textContent = item.name;
|
||||||
|
productElement.appendChild(nameElement);
|
||||||
|
|
||||||
|
// Add article number if available
|
||||||
|
if (item.articleNumber) {
|
||||||
|
const articleNumberElement = doc.createElement('ram:SellerAssignedID');
|
||||||
|
articleNumberElement.textContent = item.articleNumber;
|
||||||
|
productElement.appendChild(articleNumberElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
lineItemElement.appendChild(productElement);
|
||||||
|
|
||||||
|
// Add agreement information (price)
|
||||||
|
const agreementElement = doc.createElement('ram:SpecifiedLineTradeAgreement');
|
||||||
|
const priceElement = doc.createElement('ram:NetPriceProductTradePrice');
|
||||||
|
const chargeAmountElement = doc.createElement('ram:ChargeAmount');
|
||||||
|
chargeAmountElement.textContent = item.unitNetPrice.toFixed(2);
|
||||||
|
priceElement.appendChild(chargeAmountElement);
|
||||||
|
agreementElement.appendChild(priceElement);
|
||||||
|
lineItemElement.appendChild(agreementElement);
|
||||||
|
|
||||||
|
// Add delivery information (quantity)
|
||||||
|
const deliveryElement = doc.createElement('ram:SpecifiedLineTradeDelivery');
|
||||||
|
const quantityElement = doc.createElement('ram:BilledQuantity');
|
||||||
|
quantityElement.textContent = item.unitQuantity.toString();
|
||||||
|
quantityElement.setAttribute('unitCode', item.unitType);
|
||||||
|
deliveryElement.appendChild(quantityElement);
|
||||||
|
lineItemElement.appendChild(deliveryElement);
|
||||||
|
|
||||||
|
// Add settlement information (tax)
|
||||||
|
const settlementElement = doc.createElement('ram:SpecifiedLineTradeSettlement');
|
||||||
|
|
||||||
|
// Add tax information
|
||||||
|
const taxElement = doc.createElement('ram:ApplicableTradeTax');
|
||||||
|
|
||||||
|
// Add tax type code
|
||||||
|
const taxTypeCodeElement = doc.createElement('ram:TypeCode');
|
||||||
|
taxTypeCodeElement.textContent = 'VAT';
|
||||||
|
taxElement.appendChild(taxTypeCodeElement);
|
||||||
|
|
||||||
|
// Add tax category code
|
||||||
|
const taxCategoryCodeElement = doc.createElement('ram:CategoryCode');
|
||||||
|
taxCategoryCodeElement.textContent = invoice.reverseCharge ? 'AE' : 'S';
|
||||||
|
taxElement.appendChild(taxCategoryCodeElement);
|
||||||
|
|
||||||
|
// Add tax rate
|
||||||
|
const taxRateElement = doc.createElement('ram:RateApplicablePercent');
|
||||||
|
taxRateElement.textContent = item.vatPercentage.toString();
|
||||||
|
taxElement.appendChild(taxRateElement);
|
||||||
|
|
||||||
|
settlementElement.appendChild(taxElement);
|
||||||
|
|
||||||
|
// Add monetary summation
|
||||||
|
const monetarySummationElement = doc.createElement('ram:SpecifiedLineTradeSettlementMonetarySummation');
|
||||||
|
|
||||||
|
// Calculate item total
|
||||||
|
const itemNetAmount = item.unitNetPrice * item.unitQuantity;
|
||||||
|
|
||||||
|
// Add line total amount
|
||||||
|
const lineTotalElement = doc.createElement('ram:LineTotalAmount');
|
||||||
|
lineTotalElement.textContent = itemNetAmount.toFixed(2);
|
||||||
|
monetarySummationElement.appendChild(lineTotalElement);
|
||||||
|
|
||||||
|
settlementElement.appendChild(monetarySummationElement);
|
||||||
|
|
||||||
|
lineItemElement.appendChild(settlementElement);
|
||||||
|
|
||||||
|
// Add line item to transaction
|
||||||
|
transactionElement.appendChild(lineItemElement);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Formats a date as YYYYMMDD
|
||||||
|
* @param timestamp Timestamp to format
|
||||||
|
* @returns Formatted date string
|
||||||
|
*/
|
||||||
|
private formatDateYYYYMMDD(timestamp: number): string {
|
||||||
|
const date = new Date(timestamp);
|
||||||
|
const year = date.getFullYear();
|
||||||
|
const month = (date.getMonth() + 1).toString().padStart(2, '0');
|
||||||
|
const day = date.getDate().toString().padStart(2, '0');
|
||||||
|
return `${year}${month}${day}`;
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -1,13 +1,181 @@
|
|||||||
import { BaseValidator } from '../base/base.validator.js';
|
import { BaseValidator } from '../base/base.validator.js';
|
||||||
import { InvoiceFormat } from '../../interfaces/common.js';
|
import { InvoiceFormat, ValidationLevel } from '../../interfaces/common.js';
|
||||||
|
import type { ValidationResult } from '../../interfaces/common.js';
|
||||||
import { FormatDetector } from '../utils/format.detector.js';
|
import { FormatDetector } from '../utils/format.detector.js';
|
||||||
|
|
||||||
// Import specific validators
|
// Import specific validators
|
||||||
// import { UBLValidator } from '../ubl/ubl.validator.js';
|
import { UBLBaseValidator } from '../ubl/ubl.validator.js';
|
||||||
// import { XRechnungValidator } from '../ubl/xrechnung/xrechnung.validator.js';
|
|
||||||
import { FacturXValidator } from '../cii/facturx/facturx.validator.js';
|
import { FacturXValidator } from '../cii/facturx/facturx.validator.js';
|
||||||
import { ZUGFeRDValidator } from '../cii/zugferd/zugferd.validator.js';
|
import { ZUGFeRDValidator } from '../cii/zugferd/zugferd.validator.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* UBL validator implementation
|
||||||
|
* Provides validation for standard UBL documents
|
||||||
|
*/
|
||||||
|
class UBLValidator extends UBLBaseValidator {
|
||||||
|
protected validateStructure(): boolean {
|
||||||
|
// Basic validation to check for required UBL invoice elements
|
||||||
|
if (!this.doc) return false;
|
||||||
|
|
||||||
|
let valid = true;
|
||||||
|
|
||||||
|
// Check for required UBL elements
|
||||||
|
const requiredElements = [
|
||||||
|
'cbc:ID',
|
||||||
|
'cbc:IssueDate',
|
||||||
|
'cac:AccountingSupplierParty',
|
||||||
|
'cac:AccountingCustomerParty'
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const element of requiredElements) {
|
||||||
|
if (!this.exists(`//${element}`)) {
|
||||||
|
this.addError(
|
||||||
|
'UBL-STRUCT-1',
|
||||||
|
`Required element ${element} is missing`,
|
||||||
|
`/${element}`
|
||||||
|
);
|
||||||
|
valid = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return valid;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected validateBusinessRules(): boolean {
|
||||||
|
// Basic business rule validation for UBL
|
||||||
|
if (!this.doc) return false;
|
||||||
|
|
||||||
|
let valid = true;
|
||||||
|
|
||||||
|
// Check that issue date is present and valid
|
||||||
|
const issueDateText = this.getText('//cbc:IssueDate');
|
||||||
|
if (!issueDateText) {
|
||||||
|
this.addError(
|
||||||
|
'UBL-BUS-1',
|
||||||
|
'Issue date is required',
|
||||||
|
'//cbc:IssueDate'
|
||||||
|
);
|
||||||
|
valid = false;
|
||||||
|
} else {
|
||||||
|
const issueDate = new Date(issueDateText);
|
||||||
|
if (isNaN(issueDate.getTime())) {
|
||||||
|
this.addError(
|
||||||
|
'UBL-BUS-2',
|
||||||
|
'Issue date is not a valid date',
|
||||||
|
'//cbc:IssueDate'
|
||||||
|
);
|
||||||
|
valid = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that at least one invoice line exists
|
||||||
|
if (!this.exists('//cac:InvoiceLine') && !this.exists('//cac:CreditNoteLine')) {
|
||||||
|
this.addError(
|
||||||
|
'UBL-BUS-3',
|
||||||
|
'At least one invoice line or credit note line is required',
|
||||||
|
'/'
|
||||||
|
);
|
||||||
|
valid = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return valid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* XRechnung validator implementation
|
||||||
|
* Extends UBL validator with additional XRechnung specific validation rules
|
||||||
|
*/
|
||||||
|
class XRechnungValidator extends UBLValidator {
|
||||||
|
protected validateStructure(): boolean {
|
||||||
|
// Call the base UBL validation first
|
||||||
|
const baseValid = super.validateStructure();
|
||||||
|
let valid = baseValid;
|
||||||
|
|
||||||
|
// Check for XRechnung-specific elements
|
||||||
|
if (!this.exists('//cbc:CustomizationID[contains(text(), "xrechnung")]')) {
|
||||||
|
this.addError(
|
||||||
|
'XRECH-STRUCT-1',
|
||||||
|
'XRechnung customization ID is missing or invalid',
|
||||||
|
'//cbc:CustomizationID'
|
||||||
|
);
|
||||||
|
valid = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for buyer reference which is mandatory in XRechnung
|
||||||
|
if (!this.exists('//cbc:BuyerReference')) {
|
||||||
|
this.addError(
|
||||||
|
'XRECH-STRUCT-2',
|
||||||
|
'BuyerReference is required in XRechnung',
|
||||||
|
'//'
|
||||||
|
);
|
||||||
|
valid = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return valid;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected validateBusinessRules(): boolean {
|
||||||
|
// Call the base UBL business rule validation
|
||||||
|
const baseValid = super.validateBusinessRules();
|
||||||
|
let valid = baseValid;
|
||||||
|
|
||||||
|
// German-specific validation rules
|
||||||
|
// Check for proper VAT ID structure for German VAT IDs
|
||||||
|
const supplierVatId = this.getText('//cac:AccountingSupplierParty//cbc:CompanyID[../cac:TaxScheme/cbc:ID="VAT"]');
|
||||||
|
if (supplierVatId && supplierVatId.startsWith('DE') && !/^DE[0-9]{9}$/.test(supplierVatId)) {
|
||||||
|
this.addError(
|
||||||
|
'XRECH-BUS-1',
|
||||||
|
'German VAT ID format is invalid (must be DE followed by 9 digits)',
|
||||||
|
'//cac:AccountingSupplierParty//cbc:CompanyID'
|
||||||
|
);
|
||||||
|
valid = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return valid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* FatturaPA validator implementation
|
||||||
|
* Basic implementation for Italian electronic invoices
|
||||||
|
*/
|
||||||
|
class FatturaPAValidator extends BaseValidator {
|
||||||
|
validate(level: ValidationLevel = ValidationLevel.SYNTAX): ValidationResult {
|
||||||
|
// Reset errors
|
||||||
|
this.errors = [];
|
||||||
|
|
||||||
|
let valid = true;
|
||||||
|
|
||||||
|
if (level === ValidationLevel.SYNTAX) {
|
||||||
|
valid = this.validateSchema();
|
||||||
|
} else if (level === ValidationLevel.SEMANTIC || level === ValidationLevel.BUSINESS) {
|
||||||
|
valid = this.validateSchema() && this.validateBusinessRules();
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
valid,
|
||||||
|
errors: this.errors,
|
||||||
|
level
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
protected validateSchema(): boolean {
|
||||||
|
// Basic schema validation for FatturaPA
|
||||||
|
if (!this.xml.includes('<FatturaElettronica')) {
|
||||||
|
this.addError('FATT-SCHEMA-1', 'Root element must be FatturaElettronica', '/');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected validateBusinessRules(): boolean {
|
||||||
|
// Basic placeholder implementation - would need more detailed rules
|
||||||
|
// for a real implementation
|
||||||
|
return this.validateSchema();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory to create the appropriate validator based on the XML format
|
* Factory to create the appropriate validator based on the XML format
|
||||||
*/
|
*/
|
||||||
@ -18,34 +186,73 @@ export class ValidatorFactory {
|
|||||||
* @returns Appropriate validator instance
|
* @returns Appropriate validator instance
|
||||||
*/
|
*/
|
||||||
public static createValidator(xml: string): BaseValidator {
|
public static createValidator(xml: string): BaseValidator {
|
||||||
|
try {
|
||||||
const format = FormatDetector.detectFormat(xml);
|
const format = FormatDetector.detectFormat(xml);
|
||||||
|
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case InvoiceFormat.UBL:
|
case InvoiceFormat.UBL:
|
||||||
// return new UBLValidator(xml);
|
return new UBLValidator(xml);
|
||||||
throw new Error('UBL validator not yet implemented');
|
|
||||||
|
|
||||||
case InvoiceFormat.XRECHNUNG:
|
case InvoiceFormat.XRECHNUNG:
|
||||||
// return new XRechnungValidator(xml);
|
return new XRechnungValidator(xml);
|
||||||
throw new Error('XRechnung validator not yet implemented');
|
|
||||||
|
|
||||||
case InvoiceFormat.CII:
|
case InvoiceFormat.CII:
|
||||||
// For now, use Factur-X validator for generic CII
|
// For now, use Factur-X validator for generic CII
|
||||||
return new FacturXValidator(xml);
|
return new FacturXValidator(xml);
|
||||||
|
|
||||||
case InvoiceFormat.ZUGFERD:
|
case InvoiceFormat.ZUGFERD:
|
||||||
// Use dedicated ZUGFeRD validator
|
|
||||||
return new ZUGFeRDValidator(xml);
|
return new ZUGFeRDValidator(xml);
|
||||||
|
|
||||||
case InvoiceFormat.FACTURX:
|
case InvoiceFormat.FACTURX:
|
||||||
return new FacturXValidator(xml);
|
return new FacturXValidator(xml);
|
||||||
|
|
||||||
case InvoiceFormat.FATTURAPA:
|
case InvoiceFormat.FATTURAPA:
|
||||||
// return new FatturaPAValidator(xml);
|
return new FatturaPAValidator(xml);
|
||||||
throw new Error('FatturaPA validator not yet implemented');
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
throw new Error(`Unsupported invoice format: ${format}`);
|
// For unknown formats, provide a generic validator that will
|
||||||
|
// mark the document as invalid but won't throw an exception
|
||||||
|
return new GenericValidator(xml, format);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
// If an error occurs during validator creation, return a generic validator
|
||||||
|
// that will provide meaningful error information instead of throwing
|
||||||
|
console.error(`Error creating validator: ${error}`);
|
||||||
|
return new GenericValidator(xml, 'unknown');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generic validator for unknown or unsupported formats
|
||||||
|
* Provides meaningful validation errors instead of throwing exceptions
|
||||||
|
*/
|
||||||
|
class GenericValidator extends BaseValidator {
|
||||||
|
private format: string;
|
||||||
|
|
||||||
|
constructor(xml: string, format: string) {
|
||||||
|
super(xml);
|
||||||
|
this.format = format;
|
||||||
|
this.addError(
|
||||||
|
'GEN-1',
|
||||||
|
`Unsupported invoice format: ${format}`,
|
||||||
|
'/'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
validate(level: ValidationLevel = ValidationLevel.SYNTAX): ValidationResult {
|
||||||
|
return {
|
||||||
|
valid: false,
|
||||||
|
errors: this.errors,
|
||||||
|
level
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
protected validateSchema(): boolean {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected validateBusinessRules(): boolean {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
@ -11,7 +11,10 @@ export abstract class BaseXMLExtractor {
|
|||||||
'factur-x.xml',
|
'factur-x.xml',
|
||||||
'zugferd-invoice.xml',
|
'zugferd-invoice.xml',
|
||||||
'ZUGFeRD-invoice.xml',
|
'ZUGFeRD-invoice.xml',
|
||||||
'xrechnung.xml'
|
'xrechnung.xml',
|
||||||
|
'ubl-invoice.xml',
|
||||||
|
'invoice.xml',
|
||||||
|
'metadata.xml'
|
||||||
];
|
];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -32,7 +35,8 @@ export abstract class BaseXMLExtractor {
|
|||||||
'urn:zugferd',
|
'urn:zugferd',
|
||||||
'urn:factur-x',
|
'urn:factur-x',
|
||||||
'factur-x.eu',
|
'factur-x.eu',
|
||||||
'ZUGFeRD'
|
'ZUGFeRD',
|
||||||
|
'FatturaElettronica'
|
||||||
];
|
];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -47,7 +51,8 @@ export abstract class BaseXMLExtractor {
|
|||||||
'</rsm:CrossIndustryDocument>',
|
'</rsm:CrossIndustryDocument>',
|
||||||
'</ram:CrossIndustryDocument>',
|
'</ram:CrossIndustryDocument>',
|
||||||
'</ubl:Invoice>',
|
'</ubl:Invoice>',
|
||||||
'</ubl:CreditNote>'
|
'</ubl:CreditNote>',
|
||||||
|
'</FatturaElettronica>'
|
||||||
];
|
];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -69,21 +74,19 @@ export abstract class BaseXMLExtractor {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if it starts with XML declaration
|
// Check if it starts with XML declaration or a valid element
|
||||||
if (!xmlString.includes('<?xml')) {
|
if (!xmlString.includes('<?xml') && !this.hasKnownXmlElement(xmlString)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if the XML string contains known invoice formats
|
// Check if the XML string contains known invoice formats
|
||||||
const hasKnownFormat = this.knownFormats.some(format => xmlString.includes(format));
|
const hasKnownFormat = this.hasKnownFormat(xmlString);
|
||||||
if (!hasKnownFormat) {
|
if (!hasKnownFormat) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if the XML string contains binary data or invalid characters
|
// Check if the XML string contains binary data or invalid characters
|
||||||
const invalidChars = ['\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005'];
|
if (this.hasBinaryData(xmlString)) {
|
||||||
const hasBinaryData = invalidChars.some(char => xmlString.includes(char));
|
|
||||||
if (hasBinaryData) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -92,6 +95,11 @@ export abstract class BaseXMLExtractor {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if XML has a proper structure (contains both opening and closing tags)
|
||||||
|
if (!this.hasProperXmlStructure(xmlString)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error validating XML:', error);
|
console.error('Error validating XML:', error);
|
||||||
@ -99,6 +107,85 @@ export abstract class BaseXMLExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the XML string contains a known element
|
||||||
|
* @param xmlString XML string to check
|
||||||
|
* @returns True if the XML contains a known element
|
||||||
|
*/
|
||||||
|
protected hasKnownXmlElement(xmlString: string): boolean {
|
||||||
|
for (const format of this.knownFormats) {
|
||||||
|
// Check for opening tag of format
|
||||||
|
if (xmlString.includes(`<${format}`)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the XML string contains a known format
|
||||||
|
* @param xmlString XML string to check
|
||||||
|
* @returns True if the XML contains a known format
|
||||||
|
*/
|
||||||
|
protected hasKnownFormat(xmlString: string): boolean {
|
||||||
|
for (const format of this.knownFormats) {
|
||||||
|
if (xmlString.includes(format)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the XML string has a proper structure
|
||||||
|
* @param xmlString XML string to check
|
||||||
|
* @returns True if the XML has a proper structure
|
||||||
|
*/
|
||||||
|
protected hasProperXmlStructure(xmlString: string): boolean {
|
||||||
|
// Check for at least one matching opening and closing tag
|
||||||
|
for (const endTag of this.knownEndTags) {
|
||||||
|
const startTag = endTag.replace('/', '');
|
||||||
|
if (xmlString.includes(startTag) && xmlString.includes(endTag)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no specific tag is found but it has a basic XML structure
|
||||||
|
return (
|
||||||
|
(xmlString.includes('<?xml') && xmlString.includes('?>')) ||
|
||||||
|
(xmlString.match(/<[^>]+>/) !== null && xmlString.match(/<\/[^>]+>/) !== null)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the XML string contains binary data
|
||||||
|
* @param xmlString XML string to check
|
||||||
|
* @returns True if the XML contains binary data
|
||||||
|
*/
|
||||||
|
protected hasBinaryData(xmlString: string): boolean {
|
||||||
|
// Check for common binary data indicators
|
||||||
|
const binaryChars = ['\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005'];
|
||||||
|
const consecutiveNulls = '\u0000\u0000\u0000';
|
||||||
|
|
||||||
|
// Check for control characters that shouldn't be in XML
|
||||||
|
if (binaryChars.some(char => xmlString.includes(char))) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for consecutive null bytes which indicate binary data
|
||||||
|
if (xmlString.includes(consecutiveNulls)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for high concentration of non-printable characters
|
||||||
|
const nonPrintableCount = (xmlString.match(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g) || []).length;
|
||||||
|
if (nonPrintableCount > xmlString.length * 0.05) { // More than 5% non-printable
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract XML from a string
|
* Extract XML from a string
|
||||||
* @param text Text to extract XML from
|
* @param text Text to extract XML from
|
||||||
@ -108,10 +195,23 @@ export abstract class BaseXMLExtractor {
|
|||||||
protected extractXmlFromString(text: string, startIndex: number = 0): string | null {
|
protected extractXmlFromString(text: string, startIndex: number = 0): string | null {
|
||||||
try {
|
try {
|
||||||
// Find the start of the XML document
|
// Find the start of the XML document
|
||||||
const xmlStartIndex = text.indexOf('<?xml', startIndex);
|
let xmlStartIndex = text.indexOf('<?xml', startIndex);
|
||||||
|
|
||||||
|
// If no XML declaration, try to find known elements
|
||||||
|
if (xmlStartIndex === -1) {
|
||||||
|
for (const format of this.knownFormats) {
|
||||||
|
const formatStartIndex = text.indexOf(`<${format.split(':').pop()}`, startIndex);
|
||||||
|
if (formatStartIndex !== -1) {
|
||||||
|
xmlStartIndex = formatStartIndex;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Still didn't find any start marker
|
||||||
if (xmlStartIndex === -1) {
|
if (xmlStartIndex === -1) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Try to find the end of the XML document
|
// Try to find the end of the XML document
|
||||||
let xmlEndIndex = -1;
|
let xmlEndIndex = -1;
|
||||||
@ -123,12 +223,26 @@ export abstract class BaseXMLExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If no known end tag found, try to use a heuristic approach
|
||||||
if (xmlEndIndex === -1) {
|
if (xmlEndIndex === -1) {
|
||||||
|
// Try to find the last closing tag
|
||||||
|
const lastClosingTagMatch = text.slice(xmlStartIndex).match(/<\/[^>]+>(?!.*<\/[^>]+>)/);
|
||||||
|
if (lastClosingTagMatch && lastClosingTagMatch.index !== undefined) {
|
||||||
|
xmlEndIndex = xmlStartIndex + lastClosingTagMatch.index + lastClosingTagMatch[0].length;
|
||||||
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Extract the XML content
|
// Extract the XML content
|
||||||
return text.substring(xmlStartIndex, xmlEndIndex);
|
const xmlContent = text.substring(xmlStartIndex, xmlEndIndex);
|
||||||
|
|
||||||
|
// Validate the extracted content
|
||||||
|
if (this.isValidXml(xmlContent)) {
|
||||||
|
return xmlContent;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error extracting XML from string:', error);
|
console.error('Error extracting XML from string:', error);
|
||||||
return null;
|
return null;
|
||||||
@ -143,28 +257,28 @@ export abstract class BaseXMLExtractor {
|
|||||||
*/
|
*/
|
||||||
protected async extractXmlFromStream(stream: PDFRawStream, fileName: string): Promise<string | null> {
|
protected async extractXmlFromStream(stream: PDFRawStream, fileName: string): Promise<string | null> {
|
||||||
try {
|
try {
|
||||||
// Try to decompress with pako
|
// Get the raw bytes from the stream
|
||||||
const compressedBytes = stream.getContents().buffer;
|
const rawBytes = stream.getContents();
|
||||||
try {
|
|
||||||
const decompressedBytes = pako.inflate(compressedBytes);
|
|
||||||
const xmlContent = new TextDecoder('utf-8').decode(decompressedBytes);
|
|
||||||
|
|
||||||
if (this.isValidXml(xmlContent)) {
|
// First try without decompression (in case the content is not compressed)
|
||||||
|
let xmlContent = this.tryDecodeBuffer(rawBytes);
|
||||||
|
if (xmlContent && this.isValidXml(xmlContent)) {
|
||||||
|
console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${fileName}`);
|
||||||
|
return xmlContent;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try with decompression
|
||||||
|
try {
|
||||||
|
const decompressedBytes = this.tryDecompress(rawBytes);
|
||||||
|
if (decompressedBytes) {
|
||||||
|
xmlContent = this.tryDecodeBuffer(decompressedBytes);
|
||||||
|
if (xmlContent && this.isValidXml(xmlContent)) {
|
||||||
console.log(`Successfully extracted decompressed XML from PDF file. File name: ${fileName}`);
|
console.log(`Successfully extracted decompressed XML from PDF file. File name: ${fileName}`);
|
||||||
return xmlContent;
|
return xmlContent;
|
||||||
}
|
}
|
||||||
} catch (decompressError) {
|
|
||||||
// Decompression failed, try without decompression
|
|
||||||
console.log(`Decompression failed for ${fileName}, trying without decompression...`);
|
|
||||||
}
|
}
|
||||||
|
} catch (decompressError) {
|
||||||
// Try without decompression
|
console.log(`Decompression failed for ${fileName}: ${decompressError}`);
|
||||||
const rawBytes = stream.getContents();
|
|
||||||
const rawContent = new TextDecoder('utf-8').decode(rawBytes);
|
|
||||||
|
|
||||||
if (this.isValidXml(rawContent)) {
|
|
||||||
console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${fileName}`);
|
|
||||||
return rawContent;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
@ -173,4 +287,69 @@ export abstract class BaseXMLExtractor {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to decompress a buffer using different methods
|
||||||
|
* @param buffer Buffer to decompress
|
||||||
|
* @returns Decompressed buffer or null if decompression failed
|
||||||
|
*/
|
||||||
|
protected tryDecompress(buffer: Uint8Array): Uint8Array | null {
|
||||||
|
try {
|
||||||
|
// Try pako inflate (for deflate/zlib compression)
|
||||||
|
return pako.inflate(buffer);
|
||||||
|
} catch (error) {
|
||||||
|
// If pako fails, try other methods if needed
|
||||||
|
console.warn('Pako decompression failed, might be uncompressed or using a different algorithm');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to decode a buffer to a string using different encodings
|
||||||
|
* @param buffer Buffer to decode
|
||||||
|
* @returns Decoded string or null if decoding failed
|
||||||
|
*/
|
||||||
|
protected tryDecodeBuffer(buffer: Uint8Array): string | null {
|
||||||
|
try {
|
||||||
|
// Try UTF-8 first
|
||||||
|
let content = new TextDecoder('utf-8').decode(buffer);
|
||||||
|
if (this.isPlausibleXml(content)) {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try ISO-8859-1 (Latin1)
|
||||||
|
content = this.decodeLatin1(buffer);
|
||||||
|
if (this.isPlausibleXml(content)) {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Error decoding buffer:', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decode a buffer using ISO-8859-1 (Latin1) encoding
|
||||||
|
* @param buffer Buffer to decode
|
||||||
|
* @returns Decoded string
|
||||||
|
*/
|
||||||
|
protected decodeLatin1(buffer: Uint8Array): string {
|
||||||
|
return Array.from(buffer)
|
||||||
|
.map(byte => String.fromCharCode(byte))
|
||||||
|
.join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a string is plausibly XML (quick check before validation)
|
||||||
|
* @param content String to check
|
||||||
|
* @returns True if the string is plausibly XML
|
||||||
|
*/
|
||||||
|
protected isPlausibleXml(content: string): boolean {
|
||||||
|
return content.includes('<') &&
|
||||||
|
content.includes('>') &&
|
||||||
|
(content.includes('<?xml') ||
|
||||||
|
this.knownFormats.some(format => content.includes(format)));
|
||||||
|
}
|
||||||
}
|
}
|
@ -6,50 +6,157 @@ import { BaseXMLExtractor } from './base.extractor.js';
|
|||||||
* Used as a fallback when other extraction methods fail
|
* Used as a fallback when other extraction methods fail
|
||||||
*/
|
*/
|
||||||
export class TextXMLExtractor extends BaseXMLExtractor {
|
export class TextXMLExtractor extends BaseXMLExtractor {
|
||||||
|
// Maximum chunk size to process at once (4MB)
|
||||||
|
private readonly CHUNK_SIZE = 4 * 1024 * 1024;
|
||||||
|
|
||||||
|
// Maximum number of chunks to check (effective 20MB search limit)
|
||||||
|
private readonly MAX_CHUNKS = 5;
|
||||||
|
|
||||||
|
// Common XML patterns to look for
|
||||||
|
private readonly XML_PATTERNS = [
|
||||||
|
'<?xml',
|
||||||
|
'<CrossIndustryInvoice',
|
||||||
|
'<CrossIndustryDocument',
|
||||||
|
'<Invoice',
|
||||||
|
'<CreditNote',
|
||||||
|
'<rsm:CrossIndustryInvoice',
|
||||||
|
'<rsm:CrossIndustryDocument',
|
||||||
|
'<ram:CrossIndustryDocument',
|
||||||
|
'<ubl:Invoice',
|
||||||
|
'<ubl:CreditNote',
|
||||||
|
'<FatturaElettronica'
|
||||||
|
];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract XML from a PDF buffer by searching for XML patterns in the text
|
* Extract XML from a PDF buffer by searching for XML patterns in the text
|
||||||
|
* Uses a chunked approach to handle large files efficiently
|
||||||
* @param pdfBuffer PDF buffer
|
* @param pdfBuffer PDF buffer
|
||||||
* @returns XML content or null if not found
|
* @returns XML content or null if not found
|
||||||
*/
|
*/
|
||||||
public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
|
public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
|
||||||
try {
|
try {
|
||||||
// Convert buffer to string and look for XML patterns
|
console.log('Attempting text-based XML extraction from PDF...');
|
||||||
// Increase the search range to handle larger PDFs
|
|
||||||
const pdfString = Buffer.from(pdfBuffer).toString('utf8', 0, Math.min(pdfBuffer.length, 50000));
|
|
||||||
|
|
||||||
// Look for common XML patterns in the PDF
|
// Convert Buffer to Uint8Array if needed
|
||||||
const xmlPatterns = [
|
const buffer = Buffer.isBuffer(pdfBuffer) ? new Uint8Array(pdfBuffer) : pdfBuffer;
|
||||||
/<\?xml[^>]*\?>/i,
|
|
||||||
/<CrossIndustryInvoice[^>]*>/i,
|
|
||||||
/<CrossIndustryDocument[^>]*>/i,
|
|
||||||
/<Invoice[^>]*>/i,
|
|
||||||
/<CreditNote[^>]*>/i,
|
|
||||||
/<rsm:CrossIndustryInvoice[^>]*>/i,
|
|
||||||
/<rsm:CrossIndustryDocument[^>]*>/i,
|
|
||||||
/<ram:CrossIndustryDocument[^>]*>/i,
|
|
||||||
/<ubl:Invoice[^>]*>/i,
|
|
||||||
/<ubl:CreditNote[^>]*>/i
|
|
||||||
];
|
|
||||||
|
|
||||||
for (const pattern of xmlPatterns) {
|
// Try extracting XML using the chunked approach
|
||||||
const match = pdfString.match(pattern);
|
return this.extractXmlFromBufferChunked(buffer);
|
||||||
if (match && match.index !== undefined) {
|
|
||||||
console.log(`Found XML pattern in PDF: ${match[0]}`);
|
|
||||||
|
|
||||||
// Try to extract the XML content
|
|
||||||
const xmlContent = this.extractXmlFromString(pdfString, match.index);
|
|
||||||
if (xmlContent && this.isValidXml(xmlContent)) {
|
|
||||||
console.log('Successfully extracted XML from PDF text');
|
|
||||||
return xmlContent;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.warn('No valid XML found in PDF text');
|
|
||||||
return null;
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error in text-based extraction:', error);
|
console.error('Error in text-based extraction:', error);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract XML from buffer using a chunked approach
|
||||||
|
* This helps avoid memory issues with large PDFs
|
||||||
|
* @param buffer Buffer to search in
|
||||||
|
* @returns XML content or null if not found
|
||||||
|
*/
|
||||||
|
private extractXmlFromBufferChunked(buffer: Uint8Array): string | null {
|
||||||
|
// Process the PDF in chunks
|
||||||
|
for (let chunkIndex = 0; chunkIndex < this.MAX_CHUNKS; chunkIndex++) {
|
||||||
|
const startPos = chunkIndex * this.CHUNK_SIZE;
|
||||||
|
if (startPos >= buffer.length) break;
|
||||||
|
|
||||||
|
const endPos = Math.min(startPos + this.CHUNK_SIZE, buffer.length);
|
||||||
|
const chunk = buffer.slice(startPos, endPos);
|
||||||
|
|
||||||
|
// Try to extract XML from this chunk
|
||||||
|
const chunkResult = this.processChunk(chunk, startPos);
|
||||||
|
if (chunkResult) {
|
||||||
|
return chunkResult;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.warn('No valid XML found in any chunk of the PDF');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process a single chunk of the PDF buffer
|
||||||
|
* @param chunk Chunk buffer to process
|
||||||
|
* @param chunkOffset Offset position of the chunk in the original buffer
|
||||||
|
* @returns XML content or null if not found
|
||||||
|
*/
|
||||||
|
private processChunk(chunk: Uint8Array, chunkOffset: number): string | null {
|
||||||
|
try {
|
||||||
|
// First try UTF-8 encoding for this chunk
|
||||||
|
const utf8String = this.decodeBufferToString(chunk, 'utf-8');
|
||||||
|
let xmlContent = this.searchForXmlInString(utf8String);
|
||||||
|
|
||||||
|
if (xmlContent) {
|
||||||
|
console.log(`Found XML content in chunk at offset ${chunkOffset} using UTF-8 encoding`);
|
||||||
|
return xmlContent;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If UTF-8 fails, try Latin-1 (ISO-8859-1) which can handle binary better
|
||||||
|
const latin1String = this.decodeBufferToString(chunk, 'latin1');
|
||||||
|
xmlContent = this.searchForXmlInString(latin1String);
|
||||||
|
|
||||||
|
if (xmlContent) {
|
||||||
|
console.log(`Found XML content in chunk at offset ${chunkOffset} using Latin-1 encoding`);
|
||||||
|
return xmlContent;
|
||||||
|
}
|
||||||
|
|
||||||
|
// No XML found in this chunk
|
||||||
|
return null;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn(`Error processing chunk at offset ${chunkOffset}:`, error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Safely decode a buffer to string using the specified encoding
|
||||||
|
* @param buffer Buffer to decode
|
||||||
|
* @param encoding Encoding to use ('utf-8' or 'latin1')
|
||||||
|
* @returns Decoded string
|
||||||
|
*/
|
||||||
|
private decodeBufferToString(buffer: Uint8Array, encoding: 'utf-8' | 'latin1'): string {
|
||||||
|
try {
|
||||||
|
if (encoding === 'utf-8') {
|
||||||
|
return new TextDecoder('utf-8', { fatal: false }).decode(buffer);
|
||||||
|
} else {
|
||||||
|
// For Latin-1 we can use a direct mapping (bytes 0-255 map directly to code points 0-255)
|
||||||
|
// This is more reliable for binary data than TextDecoder for legacy encodings
|
||||||
|
return Array.from(buffer)
|
||||||
|
.map(byte => String.fromCharCode(byte))
|
||||||
|
.join('');
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.warn(`Error decoding buffer using ${encoding}:`, error);
|
||||||
|
// Return empty string on error to allow processing to continue
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search for XML patterns in a string
|
||||||
|
* @param content String to search in
|
||||||
|
* @returns XML content or null if not found
|
||||||
|
*/
|
||||||
|
private searchForXmlInString(content: string): string | null {
|
||||||
|
if (!content) return null;
|
||||||
|
|
||||||
|
// Search for each XML pattern
|
||||||
|
for (const pattern of this.XML_PATTERNS) {
|
||||||
|
const patternIndex = content.indexOf(pattern);
|
||||||
|
if (patternIndex !== -1) {
|
||||||
|
console.log(`Found XML pattern "${pattern}" at position ${patternIndex}`);
|
||||||
|
|
||||||
|
// Try to extract the XML content starting from the pattern position
|
||||||
|
const xmlContent = this.extractXmlFromString(content, patternIndex);
|
||||||
|
|
||||||
|
// Validate the extracted content
|
||||||
|
if (xmlContent && this.isValidXml(xmlContent)) {
|
||||||
|
console.log('Successfully extracted and validated XML from text');
|
||||||
|
return xmlContent;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
@ -1,8 +1,33 @@
|
|||||||
import { PDFDocument, AFRelationship } from '../../plugins.js';
|
import { PDFDocument, AFRelationship } from '../../plugins.js';
|
||||||
import type { IPdf } from '../../interfaces/common.js';
|
import type { IPdf } from '../../interfaces/common.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Error types for PDF embedding operations
|
||||||
|
*/
|
||||||
|
export enum PDFEmbedError {
|
||||||
|
LOAD_ERROR = 'PDF loading failed',
|
||||||
|
EMBED_ERROR = 'XML embedding failed',
|
||||||
|
SAVE_ERROR = 'PDF saving failed',
|
||||||
|
INVALID_INPUT = 'Invalid input parameters'
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result of a PDF embedding operation
|
||||||
|
*/
|
||||||
|
export interface PDFEmbedResult {
|
||||||
|
success: boolean;
|
||||||
|
data?: Uint8Array;
|
||||||
|
pdf?: IPdf;
|
||||||
|
error?: {
|
||||||
|
type: PDFEmbedError;
|
||||||
|
message: string;
|
||||||
|
originalError?: Error;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class for embedding XML into PDF files
|
* Class for embedding XML into PDF files
|
||||||
|
* Provides robust error handling and support for different PDF formats
|
||||||
*/
|
*/
|
||||||
export class PDFEmbedder {
|
export class PDFEmbedder {
|
||||||
/**
|
/**
|
||||||
@ -11,24 +36,49 @@ export class PDFEmbedder {
|
|||||||
* @param xmlContent XML content to embed
|
* @param xmlContent XML content to embed
|
||||||
* @param filename Filename for the embedded XML
|
* @param filename Filename for the embedded XML
|
||||||
* @param description Description for the embedded XML
|
* @param description Description for the embedded XML
|
||||||
* @returns Modified PDF buffer
|
* @returns Result with either modified PDF buffer or error information
|
||||||
*/
|
*/
|
||||||
public async embedXml(
|
public async embedXml(
|
||||||
pdfBuffer: Uint8Array | Buffer,
|
pdfBuffer: Uint8Array | Buffer,
|
||||||
xmlContent: string,
|
xmlContent: string,
|
||||||
filename: string = 'invoice.xml',
|
filename: string = 'invoice.xml',
|
||||||
description: string = 'XML Invoice'
|
description: string = 'XML Invoice'
|
||||||
): Promise<Uint8Array> {
|
): Promise<PDFEmbedResult> {
|
||||||
try {
|
try {
|
||||||
|
// Validate inputs
|
||||||
|
if (!pdfBuffer || pdfBuffer.length === 0) {
|
||||||
|
return this.createErrorResult(PDFEmbedError.INVALID_INPUT, 'PDF buffer is empty or undefined');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!xmlContent) {
|
||||||
|
return this.createErrorResult(PDFEmbedError.INVALID_INPUT, 'XML content is empty or undefined');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure buffer is Uint8Array
|
||||||
|
const pdfBufferArray = Buffer.isBuffer(pdfBuffer) ? new Uint8Array(pdfBuffer) : pdfBuffer;
|
||||||
|
|
||||||
// Load the PDF
|
// Load the PDF
|
||||||
const pdfDoc = await PDFDocument.load(pdfBuffer);
|
let pdfDoc: PDFDocument;
|
||||||
|
try {
|
||||||
|
pdfDoc = await PDFDocument.load(pdfBufferArray, {
|
||||||
|
ignoreEncryption: true, // Try to load encrypted PDFs
|
||||||
|
updateMetadata: false // Don't automatically update metadata
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
return this.createErrorResult(
|
||||||
|
PDFEmbedError.LOAD_ERROR,
|
||||||
|
`Failed to load PDF: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
error instanceof Error ? error : undefined
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize filename (lowercase with XML extension)
|
||||||
|
filename = this.normalizeFilename(filename);
|
||||||
|
|
||||||
// Convert the XML string to a Uint8Array
|
// Convert the XML string to a Uint8Array
|
||||||
const xmlBuffer = new TextEncoder().encode(xmlContent);
|
const xmlBuffer = new TextEncoder().encode(xmlContent);
|
||||||
|
|
||||||
// Make sure filename is lowercase (as required by documentation)
|
try {
|
||||||
filename = filename.toLowerCase();
|
|
||||||
|
|
||||||
// Use pdf-lib's .attach() to embed the XML
|
// Use pdf-lib's .attach() to embed the XML
|
||||||
pdfDoc.attach(xmlBuffer, filename, {
|
pdfDoc.attach(xmlBuffer, filename, {
|
||||||
mimeType: 'text/xml',
|
mimeType: 'text/xml',
|
||||||
@ -37,14 +87,41 @@ export class PDFEmbedder {
|
|||||||
modificationDate: new Date(),
|
modificationDate: new Date(),
|
||||||
afRelationship: AFRelationship.Alternative,
|
afRelationship: AFRelationship.Alternative,
|
||||||
});
|
});
|
||||||
|
} catch (error) {
|
||||||
|
return this.createErrorResult(
|
||||||
|
PDFEmbedError.EMBED_ERROR,
|
||||||
|
`Failed to embed XML: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
error instanceof Error ? error : undefined
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Save the modified PDF
|
// Save the modified PDF
|
||||||
const modifiedPdfBytes = await pdfDoc.save();
|
let modifiedPdfBytes: Uint8Array;
|
||||||
|
try {
|
||||||
return modifiedPdfBytes;
|
modifiedPdfBytes = await pdfDoc.save({
|
||||||
|
addDefaultPage: false, // Don't add a page if the document is empty
|
||||||
|
useObjectStreams: false, // Better compatibility with older PDF readers
|
||||||
|
updateFieldAppearances: false // Don't update form fields
|
||||||
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error embedding XML into PDF:', error);
|
return this.createErrorResult(
|
||||||
throw error;
|
PDFEmbedError.SAVE_ERROR,
|
||||||
|
`Failed to save modified PDF: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
error instanceof Error ? error : undefined
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
data: modifiedPdfBytes
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
// Catch any uncaught errors
|
||||||
|
return this.createErrorResult(
|
||||||
|
PDFEmbedError.EMBED_ERROR,
|
||||||
|
`Unexpected error during XML embedding: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
error instanceof Error ? error : undefined
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -56,7 +133,7 @@ export class PDFEmbedder {
|
|||||||
* @param description Description for the embedded XML
|
* @param description Description for the embedded XML
|
||||||
* @param pdfName Name for the PDF
|
* @param pdfName Name for the PDF
|
||||||
* @param pdfId ID for the PDF
|
* @param pdfId ID for the PDF
|
||||||
* @returns IPdf object with embedded XML
|
* @returns Result with either IPdf object or error information
|
||||||
*/
|
*/
|
||||||
public async createPdfWithXml(
|
public async createPdfWithXml(
|
||||||
pdfBuffer: Uint8Array | Buffer,
|
pdfBuffer: Uint8Array | Buffer,
|
||||||
@ -65,16 +142,101 @@ export class PDFEmbedder {
|
|||||||
description: string = 'XML Invoice',
|
description: string = 'XML Invoice',
|
||||||
pdfName: string = 'invoice.pdf',
|
pdfName: string = 'invoice.pdf',
|
||||||
pdfId: string = `invoice-${Date.now()}`
|
pdfId: string = `invoice-${Date.now()}`
|
||||||
): Promise<IPdf> {
|
): Promise<PDFEmbedResult> {
|
||||||
const modifiedPdfBytes = await this.embedXml(pdfBuffer, xmlContent, filename, description);
|
// Embed XML into PDF
|
||||||
|
const embedResult = await this.embedXml(pdfBuffer, xmlContent, filename, description);
|
||||||
|
|
||||||
return {
|
// If embedding failed, return the error
|
||||||
|
if (!embedResult.success || !embedResult.data) {
|
||||||
|
return embedResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create IPdf object
|
||||||
|
const pdfObject: IPdf = {
|
||||||
name: pdfName,
|
name: pdfName,
|
||||||
id: pdfId,
|
id: pdfId,
|
||||||
metadata: {
|
metadata: {
|
||||||
textExtraction: ''
|
textExtraction: '',
|
||||||
|
format: this.detectPdfFormat(xmlContent),
|
||||||
|
embeddedXml: {
|
||||||
|
filename: filename,
|
||||||
|
description: description
|
||||||
|
}
|
||||||
},
|
},
|
||||||
buffer: modifiedPdfBytes
|
buffer: embedResult.data
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
pdf: pdfObject
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensures the filename is normalized according to PDF/A requirements
|
||||||
|
* @param filename Filename to normalize
|
||||||
|
* @returns Normalized filename
|
||||||
|
*/
|
||||||
|
private normalizeFilename(filename: string): string {
|
||||||
|
// Convert to lowercase
|
||||||
|
let normalized = filename.toLowerCase();
|
||||||
|
|
||||||
|
// Ensure it has .xml extension
|
||||||
|
if (!normalized.endsWith('.xml')) {
|
||||||
|
normalized = normalized.replace(/\.[^/.]+$/, '') + '.xml';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace invalid characters
|
||||||
|
normalized = normalized.replace(/[^a-z0-9_.-]/g, '_');
|
||||||
|
|
||||||
|
return normalized;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tries to detect the format of the XML content
|
||||||
|
* @param xmlContent XML content
|
||||||
|
* @returns Format string or undefined
|
||||||
|
*/
|
||||||
|
private detectPdfFormat(xmlContent: string): string | undefined {
|
||||||
|
if (xmlContent.includes('factur-x.eu') || xmlContent.includes('factur-x.xml')) {
|
||||||
|
return 'factur-x';
|
||||||
|
} else if (xmlContent.includes('zugferd') || xmlContent.includes('ZUGFeRD')) {
|
||||||
|
return 'zugferd';
|
||||||
|
} else if (xmlContent.includes('xrechnung')) {
|
||||||
|
return 'xrechnung';
|
||||||
|
} else if (xmlContent.includes('<Invoice') || xmlContent.includes('<CreditNote')) {
|
||||||
|
return 'ubl';
|
||||||
|
} else if (xmlContent.includes('FatturaElettronica')) {
|
||||||
|
return 'fatturapa';
|
||||||
|
}
|
||||||
|
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an error result object
|
||||||
|
* @param type Error type
|
||||||
|
* @param message Error message
|
||||||
|
* @param originalError Original error object
|
||||||
|
* @returns Error result
|
||||||
|
*/
|
||||||
|
private createErrorResult(
|
||||||
|
type: PDFEmbedError,
|
||||||
|
message: string,
|
||||||
|
originalError?: Error
|
||||||
|
): PDFEmbedResult {
|
||||||
|
console.error(`PDF Embedder Error (${type}): ${message}`);
|
||||||
|
if (originalError) {
|
||||||
|
console.error(originalError);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: {
|
||||||
|
type,
|
||||||
|
message,
|
||||||
|
originalError
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -4,6 +4,32 @@ import {
|
|||||||
AssociatedFilesExtractor,
|
AssociatedFilesExtractor,
|
||||||
TextXMLExtractor
|
TextXMLExtractor
|
||||||
} from './extractors/index.js';
|
} from './extractors/index.js';
|
||||||
|
import { FormatDetector } from '../utils/format.detector.js';
|
||||||
|
import { InvoiceFormat } from '../../interfaces/common.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Error types for PDF extraction operations
|
||||||
|
*/
|
||||||
|
export enum PDFExtractError {
|
||||||
|
EXTRACT_ERROR = 'XML extraction failed',
|
||||||
|
INVALID_INPUT = 'Invalid input parameters',
|
||||||
|
NO_XML_FOUND = 'No XML found in PDF'
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result of a PDF extraction operation
|
||||||
|
*/
|
||||||
|
export interface PDFExtractResult {
|
||||||
|
success: boolean;
|
||||||
|
xml?: string;
|
||||||
|
format?: InvoiceFormat;
|
||||||
|
extractorUsed?: string;
|
||||||
|
error?: {
|
||||||
|
type: PDFExtractError;
|
||||||
|
message: string;
|
||||||
|
originalError?: Error;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Main PDF extractor class that orchestrates the extraction process
|
* Main PDF extractor class that orchestrates the extraction process
|
||||||
@ -28,36 +54,88 @@ export class PDFExtractor {
|
|||||||
* Extract XML from a PDF buffer
|
* Extract XML from a PDF buffer
|
||||||
* Tries multiple extraction methods in sequence
|
* Tries multiple extraction methods in sequence
|
||||||
* @param pdfBuffer PDF buffer
|
* @param pdfBuffer PDF buffer
|
||||||
* @returns XML content or null if not found
|
* @returns Result with either the extracted XML or error information
|
||||||
*/
|
*/
|
||||||
public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
|
public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<PDFExtractResult> {
|
||||||
try {
|
try {
|
||||||
console.log('Starting XML extraction from PDF...');
|
console.log('Starting XML extraction from PDF...');
|
||||||
|
|
||||||
|
// Validate input
|
||||||
|
if (!pdfBuffer || pdfBuffer.length === 0) {
|
||||||
|
return this.createErrorResult(PDFExtractError.INVALID_INPUT, 'PDF buffer is empty or undefined');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure buffer is Uint8Array
|
||||||
|
const pdfBufferArray = Buffer.isBuffer(pdfBuffer) ? new Uint8Array(pdfBuffer) : pdfBuffer;
|
||||||
|
|
||||||
// Try each extractor in sequence
|
// Try each extractor in sequence
|
||||||
for (const extractor of this.extractors) {
|
for (const extractor of this.extractors) {
|
||||||
const extractorName = extractor.constructor.name;
|
const extractorName = extractor.constructor.name;
|
||||||
console.log(`Trying extraction with ${extractorName}...`);
|
console.log(`Trying extraction with ${extractorName}...`);
|
||||||
|
|
||||||
const xml = await extractor.extractXml(pdfBuffer);
|
try {
|
||||||
|
const xml = await extractor.extractXml(pdfBufferArray);
|
||||||
|
|
||||||
if (xml) {
|
if (xml) {
|
||||||
console.log(`Successfully extracted XML using ${extractorName}`);
|
console.log(`Successfully extracted XML using ${extractorName}`);
|
||||||
return xml;
|
|
||||||
|
// Detect format of the extracted XML
|
||||||
|
const format = FormatDetector.detectFormat(xml);
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
xml,
|
||||||
|
format,
|
||||||
|
extractorUsed: extractorName
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`Extraction with ${extractorName} failed, trying next method...`);
|
console.log(`Extraction with ${extractorName} failed, trying next method...`);
|
||||||
}
|
|
||||||
|
|
||||||
// If all extractors fail, return null
|
|
||||||
console.warn('All extraction methods failed, no valid XML found in PDF');
|
|
||||||
return null;
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error extracting XML from PDF:', error);
|
// Log error but continue with next extractor
|
||||||
return null;
|
console.warn(`Error using ${extractorName}: ${error instanceof Error ? error.message : String(error)}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If all extractors fail, return a no XML found error
|
||||||
|
return this.createErrorResult(
|
||||||
|
PDFExtractError.NO_XML_FOUND,
|
||||||
|
'All extraction methods failed, no valid XML found in PDF'
|
||||||
|
);
|
||||||
|
} catch (error) {
|
||||||
|
// Handle any unexpected errors
|
||||||
|
return this.createErrorResult(
|
||||||
|
PDFExtractError.EXTRACT_ERROR,
|
||||||
|
`Unexpected error during XML extraction: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
error instanceof Error ? error : undefined
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a PDF extract result with error information
|
||||||
|
* @param type Error type
|
||||||
|
* @param message Error message
|
||||||
|
* @param originalError Original error object
|
||||||
|
* @returns Error result
|
||||||
|
*/
|
||||||
|
private createErrorResult(
|
||||||
|
type: PDFExtractError,
|
||||||
|
message: string,
|
||||||
|
originalError?: Error
|
||||||
|
): PDFExtractResult {
|
||||||
|
console.error(`PDF Extractor Error (${type}): ${message}`);
|
||||||
|
if (originalError) {
|
||||||
|
console.error(originalError);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: {
|
||||||
|
type,
|
||||||
|
message,
|
||||||
|
originalError
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
@ -13,6 +13,18 @@ export class FormatDetector {
|
|||||||
*/
|
*/
|
||||||
public static detectFormat(xml: string): InvoiceFormat {
|
public static detectFormat(xml: string): InvoiceFormat {
|
||||||
try {
|
try {
|
||||||
|
// Quick check for empty or invalid XML
|
||||||
|
if (!xml || typeof xml !== 'string' || xml.trim().length === 0) {
|
||||||
|
return InvoiceFormat.UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quick string-based pre-checks for performance
|
||||||
|
const quickCheck = FormatDetector.quickFormatCheck(xml);
|
||||||
|
if (quickCheck !== InvoiceFormat.UNKNOWN) {
|
||||||
|
return quickCheck;
|
||||||
|
}
|
||||||
|
|
||||||
|
// More thorough parsing-based checks
|
||||||
const doc = new DOMParser().parseFromString(xml, 'application/xml');
|
const doc = new DOMParser().parseFromString(xml, 'application/xml');
|
||||||
const root = doc.documentElement;
|
const root = doc.documentElement;
|
||||||
|
|
||||||
@ -21,106 +33,26 @@ export class FormatDetector {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// UBL detection (Invoice or CreditNote root element)
|
// UBL detection (Invoice or CreditNote root element)
|
||||||
if (root.nodeName === 'Invoice' || root.nodeName === 'CreditNote') {
|
if (FormatDetector.isUBLFormat(root)) {
|
||||||
// For simplicity, we'll treat all UBL documents as XRechnung for now
|
// Check for XRechnung customization
|
||||||
// In a real implementation, we would check for specific customization IDs
|
if (FormatDetector.isXRechnungFormat(doc)) {
|
||||||
return InvoiceFormat.XRECHNUNG;
|
return InvoiceFormat.XRECHNUNG;
|
||||||
}
|
}
|
||||||
|
return InvoiceFormat.UBL;
|
||||||
// Factur-X/ZUGFeRD detection (CrossIndustryInvoice or CrossIndustryDocument root element)
|
|
||||||
if (root.nodeName === 'rsm:CrossIndustryInvoice' || root.nodeName === 'CrossIndustryInvoice' ||
|
|
||||||
root.nodeName.endsWith(':CrossIndustryInvoice')) {
|
|
||||||
// Set up namespaces for XPath queries (ZUGFeRD v2/Factur-X)
|
|
||||||
const namespaces = {
|
|
||||||
rsm: 'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100',
|
|
||||||
ram: 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100'
|
|
||||||
};
|
|
||||||
|
|
||||||
// Create XPath selector with namespaces
|
|
||||||
const select = xpath.useNamespaces(namespaces);
|
|
||||||
|
|
||||||
// Look for profile identifier
|
|
||||||
const profileNode = select(
|
|
||||||
'string(//rsm:ExchangedDocumentContext/ram:GuidelineSpecifiedDocumentContextParameter/ram:ID)',
|
|
||||||
doc
|
|
||||||
);
|
|
||||||
|
|
||||||
if (profileNode) {
|
|
||||||
const profileText = profileNode.toString();
|
|
||||||
|
|
||||||
// Check for ZUGFeRD profiles
|
|
||||||
if (profileText.includes('zugferd') ||
|
|
||||||
profileText === CII_PROFILE_IDS.ZUGFERD_BASIC ||
|
|
||||||
profileText === CII_PROFILE_IDS.ZUGFERD_COMFORT ||
|
|
||||||
profileText === CII_PROFILE_IDS.ZUGFERD_EXTENDED) {
|
|
||||||
return InvoiceFormat.ZUGFERD;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for Factur-X profiles
|
// Factur-X/ZUGFeRD detection (CrossIndustryInvoice root element)
|
||||||
if (profileText.includes('factur-x') ||
|
if (FormatDetector.isCIIFormat(root)) {
|
||||||
profileText === CII_PROFILE_IDS.FACTURX_MINIMUM ||
|
return FormatDetector.detectCIIFormat(doc, xml);
|
||||||
profileText === CII_PROFILE_IDS.FACTURX_BASIC ||
|
|
||||||
profileText === CII_PROFILE_IDS.FACTURX_EN16931) {
|
|
||||||
return InvoiceFormat.FACTURX;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we can't determine the specific CII format, default to generic CII
|
|
||||||
return InvoiceFormat.CII;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ZUGFeRD v1 detection (CrossIndustryDocument root element)
|
// ZUGFeRD v1 detection (CrossIndustryDocument root element)
|
||||||
if (root.nodeName === 'rsm:CrossIndustryDocument' || root.nodeName === 'CrossIndustryDocument' ||
|
if (FormatDetector.isZUGFeRDV1Format(root)) {
|
||||||
root.nodeName === 'ram:CrossIndustryDocument' || root.nodeName.endsWith(':CrossIndustryDocument')) {
|
|
||||||
|
|
||||||
// Check for ZUGFeRD v1 namespace in the document
|
|
||||||
const xmlString = xml.toString();
|
|
||||||
if (xmlString.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') ||
|
|
||||||
xmlString.includes('urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12') ||
|
|
||||||
xmlString.includes('urn:ferd:CrossIndustryDocument') ||
|
|
||||||
xmlString.includes('zugferd') ||
|
|
||||||
xmlString.includes('ZUGFeRD')) {
|
|
||||||
return InvoiceFormat.ZUGFERD;
|
return InvoiceFormat.ZUGFERD;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set up namespaces for XPath queries (ZUGFeRD v1)
|
// FatturaPA detection
|
||||||
try {
|
if (FormatDetector.isFatturaPAFormat(root)) {
|
||||||
const namespaces = {
|
|
||||||
rsm: ZUGFERD_V1_NAMESPACES.RSM,
|
|
||||||
ram: ZUGFERD_V1_NAMESPACES.RAM
|
|
||||||
};
|
|
||||||
|
|
||||||
// Create XPath selector with namespaces
|
|
||||||
const select = xpath.useNamespaces(namespaces);
|
|
||||||
|
|
||||||
// Look for profile identifier
|
|
||||||
const profileNode = select(
|
|
||||||
'string(//rsm:SpecifiedExchangedDocumentContext/ram:GuidelineSpecifiedDocumentContextParameter/ram:ID)',
|
|
||||||
doc
|
|
||||||
);
|
|
||||||
|
|
||||||
if (profileNode) {
|
|
||||||
const profileText = profileNode.toString();
|
|
||||||
|
|
||||||
// Check for ZUGFeRD v1 profiles
|
|
||||||
if (profileText.includes('ferd:CrossIndustryDocument:invoice:1p0') ||
|
|
||||||
profileText === CII_PROFILE_IDS.ZUGFERD_V1_BASIC ||
|
|
||||||
profileText === CII_PROFILE_IDS.ZUGFERD_V1_COMFORT ||
|
|
||||||
profileText === CII_PROFILE_IDS.ZUGFERD_V1_EXTENDED) {
|
|
||||||
return InvoiceFormat.ZUGFERD;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
console.log('Error in ZUGFeRD v1 XPath detection:', error);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we can't determine the specific profile but it's a CrossIndustryDocument, it's likely ZUGFeRD v1
|
|
||||||
return InvoiceFormat.ZUGFERD;
|
|
||||||
}
|
|
||||||
|
|
||||||
// FatturaPA detection would be implemented here
|
|
||||||
if (root.nodeName === 'FatturaElettronica' ||
|
|
||||||
(root.getAttribute('xmlns') && root.getAttribute('xmlns')!.includes('fatturapa.gov.it'))) {
|
|
||||||
return InvoiceFormat.FATTURAPA;
|
return InvoiceFormat.FATTURAPA;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -130,4 +62,241 @@ export class FormatDetector {
|
|||||||
return InvoiceFormat.UNKNOWN;
|
return InvoiceFormat.UNKNOWN;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs a quick format check based on string content
|
||||||
|
* This is faster than full XML parsing for obvious cases
|
||||||
|
* @param xml XML string
|
||||||
|
* @returns Detected format or UNKNOWN if more analysis is needed
|
||||||
|
*/
|
||||||
|
private static quickFormatCheck(xml: string): InvoiceFormat {
|
||||||
|
const lowerXml = xml.toLowerCase();
|
||||||
|
|
||||||
|
// Check for obvious Factur-X indicators
|
||||||
|
if (
|
||||||
|
lowerXml.includes('factur-x.eu') ||
|
||||||
|
lowerXml.includes('factur-x.xml') ||
|
||||||
|
lowerXml.includes('factur-x:') ||
|
||||||
|
lowerXml.includes('urn:cen.eu:en16931:2017') && lowerXml.includes('factur-x')
|
||||||
|
) {
|
||||||
|
return InvoiceFormat.FACTURX;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for obvious ZUGFeRD indicators
|
||||||
|
if (
|
||||||
|
lowerXml.includes('zugferd:') ||
|
||||||
|
lowerXml.includes('zugferd-invoice.xml') ||
|
||||||
|
lowerXml.includes('urn:ferd:') ||
|
||||||
|
lowerXml.includes('urn:zugferd')
|
||||||
|
) {
|
||||||
|
return InvoiceFormat.ZUGFERD;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for obvious XRechnung indicators
|
||||||
|
if (
|
||||||
|
lowerXml.includes('xrechnung') ||
|
||||||
|
lowerXml.includes('urn:xoev-de:kosit:standard:xrechnung')
|
||||||
|
) {
|
||||||
|
return InvoiceFormat.XRECHNUNG;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for obvious FatturaPA indicators
|
||||||
|
if (
|
||||||
|
lowerXml.includes('fatturapa') ||
|
||||||
|
lowerXml.includes('fattura elettronica') ||
|
||||||
|
lowerXml.includes('fatturaelettronica')
|
||||||
|
) {
|
||||||
|
return InvoiceFormat.FATTURAPA;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Need more analysis
|
||||||
|
return InvoiceFormat.UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if the document is a UBL format
|
||||||
|
* @param root Root element
|
||||||
|
* @returns True if it's a UBL format
|
||||||
|
*/
|
||||||
|
private static isUBLFormat(root: Element): boolean {
|
||||||
|
return (
|
||||||
|
root.nodeName === 'Invoice' ||
|
||||||
|
root.nodeName === 'CreditNote' ||
|
||||||
|
root.nodeName === 'ubl:Invoice' ||
|
||||||
|
root.nodeName === 'ubl:CreditNote' ||
|
||||||
|
root.nodeName.endsWith(':Invoice') ||
|
||||||
|
root.nodeName.endsWith(':CreditNote')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if the document is an XRechnung format
|
||||||
|
* @param doc XML document
|
||||||
|
* @returns True if it's an XRechnung format
|
||||||
|
*/
|
||||||
|
private static isXRechnungFormat(doc: Document): boolean {
|
||||||
|
try {
|
||||||
|
// Set up namespaces for XPath queries
|
||||||
|
const namespaces = {
|
||||||
|
'cbc': 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2',
|
||||||
|
'ubl': 'urn:oasis:names:specification:ubl:schema:xsd:Invoice-2'
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create XPath selector with namespaces
|
||||||
|
const select = xpath.useNamespaces(namespaces);
|
||||||
|
|
||||||
|
// Use getElementsByTagName directly for more reliable results
|
||||||
|
const customizationNodes = doc.getElementsByTagName('cbc:CustomizationID');
|
||||||
|
|
||||||
|
// Check if any CustomizationID node contains "xrechnung"
|
||||||
|
for (let i = 0; i < customizationNodes.length; i++) {
|
||||||
|
const node = customizationNodes[i];
|
||||||
|
if (node.textContent && node.textContent.includes('xrechnung')) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Error checking for XRechnung format:', error);
|
||||||
|
// If direct DOM access fails, try a string-based approach
|
||||||
|
const xmlStr = new XMLSerializer().serializeToString(doc);
|
||||||
|
return xmlStr.includes('xrechnung') || xmlStr.includes('XRechnung');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if the document is a CII format (Factur-X/ZUGFeRD v2+)
|
||||||
|
* @param root Root element
|
||||||
|
* @returns True if it's a CII format
|
||||||
|
*/
|
||||||
|
private static isCIIFormat(root: Element): boolean {
|
||||||
|
return (
|
||||||
|
root.nodeName === 'rsm:CrossIndustryInvoice' ||
|
||||||
|
root.nodeName === 'CrossIndustryInvoice' ||
|
||||||
|
root.nodeName.endsWith(':CrossIndustryInvoice')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if the document is a ZUGFeRD v1 format
|
||||||
|
* @param root Root element
|
||||||
|
* @returns True if it's a ZUGFeRD v1 format
|
||||||
|
*/
|
||||||
|
private static isZUGFeRDV1Format(root: Element): boolean {
|
||||||
|
return (
|
||||||
|
root.nodeName === 'rsm:CrossIndustryDocument' ||
|
||||||
|
root.nodeName === 'CrossIndustryDocument' ||
|
||||||
|
root.nodeName === 'ram:CrossIndustryDocument' ||
|
||||||
|
root.nodeName.endsWith(':CrossIndustryDocument')
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if the document is a FatturaPA format
|
||||||
|
* @param root Root element
|
||||||
|
* @returns True if it's a FatturaPA format
|
||||||
|
*/
|
||||||
|
private static isFatturaPAFormat(root: Element): boolean {
|
||||||
|
return (
|
||||||
|
root.nodeName === 'FatturaElettronica' ||
|
||||||
|
(root.getAttribute('xmlns') && root.getAttribute('xmlns')!.includes('fatturapa.gov.it'))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detects the specific CII format (Factur-X vs ZUGFeRD)
|
||||||
|
* @param doc XML document
|
||||||
|
* @param xml Original XML string for fallback checks
|
||||||
|
* @returns Detected format
|
||||||
|
*/
|
||||||
|
private static detectCIIFormat(doc: Document, xml: string): InvoiceFormat {
|
||||||
|
try {
|
||||||
|
// Use direct DOM traversal instead of XPath for more reliable behavior
|
||||||
|
const contextNodes = doc.getElementsByTagNameNS(
|
||||||
|
'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100',
|
||||||
|
'ExchangedDocumentContext'
|
||||||
|
);
|
||||||
|
|
||||||
|
if (contextNodes.length === 0) {
|
||||||
|
// Try without namespace
|
||||||
|
const noNsContextNodes = doc.getElementsByTagName('ExchangedDocumentContext');
|
||||||
|
if (noNsContextNodes.length === 0) {
|
||||||
|
// Fallback to string-based detection
|
||||||
|
return FormatDetector.detectCIIFormatFromString(xml);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loop through all potential context nodes
|
||||||
|
const allContextNodes = [...Array.from(contextNodes), ...Array.from(doc.getElementsByTagName('ExchangedDocumentContext'))];
|
||||||
|
|
||||||
|
for (const contextNode of allContextNodes) {
|
||||||
|
// Find guideline parameter
|
||||||
|
const guidelineNodes = contextNode.getElementsByTagName('ram:GuidelineSpecifiedDocumentContextParameter');
|
||||||
|
|
||||||
|
if (guidelineNodes.length === 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const guidelineNode of Array.from(guidelineNodes)) {
|
||||||
|
// Find ID element
|
||||||
|
const idNodes = guidelineNode.getElementsByTagName('ram:ID');
|
||||||
|
|
||||||
|
if (idNodes.length === 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const idNode of Array.from(idNodes)) {
|
||||||
|
const profileText = idNode.textContent || '';
|
||||||
|
|
||||||
|
// Check for ZUGFeRD profiles
|
||||||
|
if (
|
||||||
|
profileText.includes('zugferd') ||
|
||||||
|
profileText === CII_PROFILE_IDS.ZUGFERD_BASIC ||
|
||||||
|
profileText === CII_PROFILE_IDS.ZUGFERD_COMFORT ||
|
||||||
|
profileText === CII_PROFILE_IDS.ZUGFERD_EXTENDED
|
||||||
|
) {
|
||||||
|
return InvoiceFormat.ZUGFERD;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for Factur-X profiles
|
||||||
|
if (
|
||||||
|
profileText.includes('factur-x') ||
|
||||||
|
profileText === CII_PROFILE_IDS.FACTURX_MINIMUM ||
|
||||||
|
profileText === CII_PROFILE_IDS.FACTURX_BASIC ||
|
||||||
|
profileText === CII_PROFILE_IDS.FACTURX_EN16931
|
||||||
|
) {
|
||||||
|
return InvoiceFormat.FACTURX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we reach here, fall back to string checking
|
||||||
|
return FormatDetector.detectCIIFormatFromString(xml);
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Error detecting CII format, falling back to generic CII:', error);
|
||||||
|
return FormatDetector.detectCIIFormatFromString(xml);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fallback method to detect CII format from string content
|
||||||
|
* @param xml XML string
|
||||||
|
* @returns Detected format
|
||||||
|
*/
|
||||||
|
private static detectCIIFormatFromString(xml: string): InvoiceFormat {
|
||||||
|
// Check for Factur-X indicators
|
||||||
|
if (xml.includes('factur-x') || xml.includes('Factur-X')) {
|
||||||
|
return InvoiceFormat.FACTURX;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for ZUGFeRD indicators
|
||||||
|
if (xml.includes('zugferd') || xml.includes('ZUGFeRD')) {
|
||||||
|
return InvoiceFormat.ZUGFERD;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generic CII if we can't determine more specifically
|
||||||
|
return InvoiceFormat.CII;
|
||||||
|
}
|
||||||
}
|
}
|
@ -72,14 +72,19 @@ export interface IPdf {
|
|||||||
id: string;
|
id: string;
|
||||||
metadata: {
|
metadata: {
|
||||||
textExtraction: string;
|
textExtraction: string;
|
||||||
|
format?: string;
|
||||||
|
embeddedXml?: {
|
||||||
|
filename: string;
|
||||||
|
description: string;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
buffer: Uint8Array;
|
buffer: Uint8Array;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Re-export types from tsclass for convenience
|
// Re-export types from tsclass for convenience
|
||||||
export type { TInvoice } from '@tsclass/tsclass/dist_ts/finance';
|
export type { TInvoice } from '@tsclass/tsclass/dist_ts/finance/index.js';
|
||||||
export type { TCreditNote } from '@tsclass/tsclass/dist_ts/finance';
|
export type { TCreditNote } from '@tsclass/tsclass/dist_ts/finance/index.js';
|
||||||
export type { TDebitNote } from '@tsclass/tsclass/dist_ts/finance';
|
export type { TDebitNote } from '@tsclass/tsclass/dist_ts/finance/index.js';
|
||||||
export type { TContact } from '@tsclass/tsclass/dist_ts/business';
|
export type { TContact } from '@tsclass/tsclass/dist_ts/business/index.js';
|
||||||
export type { TLetterEnvelope } from '@tsclass/tsclass/dist_ts/business';
|
export type { TLetterEnvelope } from '@tsclass/tsclass/dist_ts/business/index.js';
|
||||||
export type { TDocumentEnvelope } from '@tsclass/tsclass/dist_ts/business';
|
export type { TDocumentEnvelope } from '@tsclass/tsclass/dist_ts/business/index.js';
|
Loading…
x
Reference in New Issue
Block a user