fix(core): Improve PDF XML extraction, embedding, and format detection; update loadPdf/exportPdf error handling; add new validator implementations and enhance IPdf metadata.
This commit is contained in:
parent
68fd50fd4c
commit
5d43c1ce4e
10
changelog.md
10
changelog.md
@ -1,5 +1,15 @@
|
||||
# Changelog
|
||||
|
||||
## 2025-04-04 - 4.1.6 - fix(core)
|
||||
Improve PDF XML extraction, embedding, and format detection; update loadPdf/exportPdf error handling; add new validator implementations and enhance IPdf metadata.
|
||||
|
||||
- Update loadPdf to capture extraction result details including detected format and improve error messaging
|
||||
- Enhance TextXMLExtractor with a chunked approach using both UTF-8 and Latin-1 decoding for reliable text extraction
|
||||
- Refactor PDFEmbedder to return a structured PDFEmbedResult with proper filename normalization and robust error handling
|
||||
- Extend format detection logic by adding quickFormatCheck, isUBLFormat, isXRechnungFormat, isCIIFormat, isZUGFERDV1Format, and FatturaPA checks
|
||||
- Introduce new validator classes (UBLValidator, XRechnungValidator, FatturaPAValidator) and a generic fallback validator in ValidatorFactory
|
||||
- Update IPdf interface to include embedded XML metadata (format, filename, description) for better traceability
|
||||
|
||||
## 2025-04-03 - 4.1.5 - fix(core)
|
||||
No uncommitted changes detected in the repository. The project files and functionality remain unchanged.
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# XInvoice Corpus Testing Summary
|
||||
|
||||
Generated on: 2025-04-03T21:33:20.326Z
|
||||
Generated on: 2025-04-04T12:11:35.722Z
|
||||
|
||||
## Overall Summary
|
||||
|
||||
|
Binary file not shown.
@ -1,7 +1,7 @@
|
||||
{
|
||||
"cii": {
|
||||
"success": 27,
|
||||
"fail": 0,
|
||||
"success": 23,
|
||||
"fail": 4,
|
||||
"details": [
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/EN16931_1_Teilrechnung.cii.xml",
|
||||
@ -137,27 +137,27 @@
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Betriebskostenabrechnung.cii.xml",
|
||||
"success": true,
|
||||
"format": "cii",
|
||||
"error": null
|
||||
"success": false,
|
||||
"format": "xrechnung",
|
||||
"error": "Wrong format detected: xrechnung, expected: cii"
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Einfach.cii.xml",
|
||||
"success": true,
|
||||
"format": "cii",
|
||||
"error": null
|
||||
"success": false,
|
||||
"format": "xrechnung",
|
||||
"error": "Wrong format detected: xrechnung, expected: cii"
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Elektron.cii.xml",
|
||||
"success": true,
|
||||
"format": "cii",
|
||||
"error": null
|
||||
"success": false,
|
||||
"format": "xrechnung",
|
||||
"error": "Wrong format detected: xrechnung, expected: cii"
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Reisekostenabrechnung.cii.xml",
|
||||
"success": true,
|
||||
"format": "cii",
|
||||
"error": null
|
||||
"success": false,
|
||||
"format": "xrechnung",
|
||||
"error": "Wrong format detected: xrechnung, expected: cii"
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/not_validating_full_invoice_based_onTest_EeISI_300_CENfullmodel.cii.xml",
|
||||
@ -174,133 +174,133 @@
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_1_Teilrechnung.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_2_Teilrechnung.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_AbweichenderZahlungsempf.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Betriebskostenabrechnung.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Einfach.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Einfach_DueDate.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Einfach_negativePaymentDue.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Elektron.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_ElektronischeAdresse.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Gutschrift.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Haftpflichtversicherung_Versicherungssteuer.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Innergemeinschaftliche_Lieferungen.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Kraftfahrversicherung_Bruttopreise.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Miete.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_OEPNV.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Physiotherapeut.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Rabatte.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_RechnungsUebertragung.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Rechnungskorrektur.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Reisekostenabrechnung.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_SEPA_Prenotification.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/EN16931_Sachversicherung_berechneter_Steuersatz.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
@ -330,13 +330,13 @@
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/not_validating_full_invoice_based_onTest_EeISI_300_CENfullmodel.ubl.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/UBL/ubl-tc434-creditnote1.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"format": "ubl",
|
||||
"error": null
|
||||
}
|
||||
]
|
||||
@ -346,5 +346,5 @@
|
||||
"fail": 0,
|
||||
"details": []
|
||||
},
|
||||
"totalSuccessRate": 1
|
||||
"totalSuccessRate": 0.9272727272727272
|
||||
}
|
@ -1,13 +1,13 @@
|
||||
{
|
||||
"zugferdV1Correct": {
|
||||
"success": 18,
|
||||
"fail": 3,
|
||||
"success": 21,
|
||||
"fail": 0,
|
||||
"details": [
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/4s4u/additional-data-sample-1.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "zugferd",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/Intarsys/ZUGFeRD_1p0_BASIC_Einfach.pdf",
|
||||
@ -89,15 +89,15 @@
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/Mustangproject/MustangGnuaccountingBeispielRE-20140519_499.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: Unsupported invoice format: unknown"
|
||||
"success": true,
|
||||
"format": "zugferd",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/Mustangproject/MustangGnuaccountingBeispielRE-20140522_501.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: Unsupported invoice format: unknown"
|
||||
"success": true,
|
||||
"format": "zugferd",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv1/correct/Mustangproject/MustangGnuaccountingBeispielRE-20140703_502.pdf",
|
||||
@ -156,8 +156,8 @@
|
||||
]
|
||||
},
|
||||
"zugferdV2Correct": {
|
||||
"success": 48,
|
||||
"fail": 30,
|
||||
"success": 74,
|
||||
"fail": 4,
|
||||
"details": [
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/FNFE-factur-x-examples/Avoir_FR_type381_BASIC.pdf",
|
||||
@ -221,183 +221,183 @@
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/PHP_@gpFacturX/sample_inofficial_20190125_atgp_factur-x_v_1_0.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/BASIC/zugferd_2p0_BASIC_Einfach.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "zugferd",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/BASIC/zugferd_2p0_BASIC_Rechnungskorrektur.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "zugferd",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/BASIC/zugferd_2p0_BASIC_Taxifahrt.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "zugferd",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_1_Teilrechnung.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_2_Teilrechnung.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_AbweichenderZahlungsempf.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Betriebskostenabrechnung.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Einfach.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Elektron.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_ElektronischeAdresse.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Gutschrift.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Haftpflichtversicherung_Versicherungssteuer.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Innergemeinschaftliche_Lieferungen.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"format": "xrechnung",
|
||||
"error": "Wrong format detected: xrechnung"
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Kraftfahrversicherung_Bruttopreise.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Miete.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_OEPNV.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Physiotherapeut.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Rabatte.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_RechnungsUebertragung.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Rechnungskorrektur.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Reisekostenabrechnung.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_SEPA_Prenotification.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931/zugferd_2p0_EN16931_Sachversicherung_berechneter_Steuersatz.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_Fremdwaehrung.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "zugferd",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_InnergemeinschLieferungMehrereBestellungen.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "zugferd",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_Kostenrechnung.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_Rechnungskorrektur.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "zugferd",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/EXTENDED/zugferd_2p0_EXTENDED_Warenrechnung.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "zugferd",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/intarsys/MINIMUM/zugferd_2p0_MINIMUM.pdf",
|
||||
"success": false,
|
||||
"format": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"success": true,
|
||||
"format": "zugferd",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/BASIC/zugferd_2p1_BASIC_Einfach.pdf",
|
||||
@ -455,9 +455,9 @@
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Betriebskostenabrechnung_XRechnung_embedded.pdf",
|
||||
"success": true,
|
||||
"format": "cii",
|
||||
"error": null
|
||||
"success": false,
|
||||
"format": "xrechnung",
|
||||
"error": "Wrong format detected: xrechnung"
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Einfach.pdf",
|
||||
@ -485,9 +485,9 @@
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Elektron_XRechnung.pdf",
|
||||
"success": true,
|
||||
"format": "cii",
|
||||
"error": null
|
||||
"success": false,
|
||||
"format": "xrechnung",
|
||||
"error": "Wrong format detected: xrechnung"
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Elektron_embedded.pdf",
|
||||
@ -569,9 +569,9 @@
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_Reisekostenabrechnung_XRechnung_embedded.pdf",
|
||||
"success": true,
|
||||
"format": "cii",
|
||||
"error": null
|
||||
"success": false,
|
||||
"format": "xrechnung",
|
||||
"error": "Wrong format detected: xrechnung"
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/correct/symtrax/Beispiele/EN16931/zugferd_2p1_EN16931_SEPA_Prenotification.pdf",
|
||||
@ -749,5 +749,5 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"totalCorrectSuccessRate": 0.6666666666666666
|
||||
"totalCorrectSuccessRate": 0.9595959595959596
|
||||
}
|
@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@fin.cx/xinvoice',
|
||||
version: '4.1.5',
|
||||
version: '4.1.6',
|
||||
description: 'A TypeScript module for creating, manipulating, and embedding XML data within PDF files specifically tailored for xinvoice packages.'
|
||||
}
|
||||
|
@ -189,34 +189,38 @@ export class XInvoice {
|
||||
public async loadPdf(pdfBuffer: Uint8Array | Buffer, validate: boolean = false): Promise<XInvoice> {
|
||||
try {
|
||||
// Extract XML from PDF using the consolidated extractor
|
||||
// which tries multiple extraction methods in sequence
|
||||
const xmlContent = await this.pdfExtractor.extractXml(pdfBuffer);
|
||||
|
||||
const extractResult = await this.pdfExtractor.extractXml(pdfBuffer);
|
||||
|
||||
// Store the PDF buffer
|
||||
this.pdf = {
|
||||
name: 'invoice.pdf',
|
||||
id: `invoice-${Date.now()}`,
|
||||
metadata: {
|
||||
textExtraction: ''
|
||||
textExtraction: '',
|
||||
format: extractResult.success ? extractResult.format?.toString() : undefined
|
||||
},
|
||||
buffer: pdfBuffer instanceof Buffer ? new Uint8Array(pdfBuffer) : pdfBuffer
|
||||
};
|
||||
|
||||
if (!xmlContent) {
|
||||
// No XML found in PDF
|
||||
console.warn('No XML found in PDF');
|
||||
throw new Error('No XML found in PDF');
|
||||
|
||||
// Handle extraction result
|
||||
if (!extractResult.success || !extractResult.xml) {
|
||||
const errorMessage = extractResult.error ? extractResult.error.message : 'Unknown error extracting XML from PDF';
|
||||
console.warn('XML extraction failed:', errorMessage);
|
||||
throw new Error(`No XML found in PDF: ${errorMessage}`);
|
||||
}
|
||||
|
||||
|
||||
// Load the extracted XML
|
||||
await this.loadXml(xmlContent, validate);
|
||||
|
||||
await this.loadXml(extractResult.xml, validate);
|
||||
|
||||
// Store the detected format
|
||||
this.detectedFormat = extractResult.format || InvoiceFormat.UNKNOWN;
|
||||
|
||||
return this;
|
||||
} catch (error) {
|
||||
console.error('Error loading PDF:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies data from a TInvoice object
|
||||
@ -281,7 +285,7 @@ export class XInvoice {
|
||||
valid: false,
|
||||
errors: [{
|
||||
code: 'VAL-ERROR',
|
||||
message: `Validation error: ${error.message}`
|
||||
message: `Validation error: ${error instanceof Error ? error.message : String(error)}`
|
||||
}],
|
||||
level
|
||||
};
|
||||
@ -356,7 +360,7 @@ export class XInvoice {
|
||||
}
|
||||
|
||||
// Embed XML into PDF
|
||||
const modifiedPdf = await this.pdfEmbedder.createPdfWithXml(
|
||||
const result = await this.pdfEmbedder.createPdfWithXml(
|
||||
this.pdf.buffer,
|
||||
xmlContent,
|
||||
filename,
|
||||
@ -365,7 +369,14 @@ export class XInvoice {
|
||||
this.pdf.id
|
||||
);
|
||||
|
||||
return modifiedPdf;
|
||||
// Handle potential errors
|
||||
if (!result.success || !result.pdf) {
|
||||
const errorMessage = result.error ? result.error.message : 'Unknown error embedding XML into PDF';
|
||||
console.error('Error exporting PDF:', errorMessage);
|
||||
throw new Error(`Failed to export PDF: ${errorMessage}`);
|
||||
}
|
||||
|
||||
return result.pdf;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -392,4 +403,4 @@ export class XInvoice {
|
||||
public isFormat(format: InvoiceFormat): boolean {
|
||||
return this.detectedFormat === format;
|
||||
}
|
||||
}
|
||||
}
|
@ -2,6 +2,7 @@ import { CIIBaseEncoder } from '../cii.encoder.js';
|
||||
import type { TInvoice, TCreditNote, TDebitNote } from '../../../interfaces/common.js';
|
||||
import { ZUGFERD_PROFILE_IDS } from './zugferd.types.js';
|
||||
import { CIIProfile } from '../cii.types.js';
|
||||
import { DOMParser, XMLSerializer } from '../../../plugins.js';
|
||||
|
||||
/**
|
||||
* Encoder for ZUGFeRD invoice format
|
||||
@ -19,12 +20,17 @@ export class ZUGFeRDEncoder extends CIIBaseEncoder {
|
||||
* @returns ZUGFeRD XML string
|
||||
*/
|
||||
protected async encodeCreditNote(creditNote: TCreditNote): Promise<string> {
|
||||
// Create XML root
|
||||
const xml = this.createXmlRoot();
|
||||
// Create base XML
|
||||
const xmlDoc = this.createBaseXml();
|
||||
|
||||
// For now, return a basic XML structure
|
||||
// In a real implementation, we would populate the XML with credit note data
|
||||
return xml;
|
||||
// Set document type code to credit note (381)
|
||||
this.setDocumentTypeCode(xmlDoc, '381');
|
||||
|
||||
// Add common invoice data
|
||||
this.addCommonInvoiceData(xmlDoc, creditNote);
|
||||
|
||||
// Serialize to string
|
||||
return new XMLSerializer().serializeToString(xmlDoc);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -33,11 +39,616 @@ export class ZUGFeRDEncoder extends CIIBaseEncoder {
|
||||
* @returns ZUGFeRD XML string
|
||||
*/
|
||||
protected async encodeDebitNote(debitNote: TDebitNote): Promise<string> {
|
||||
// Create XML root
|
||||
const xml = this.createXmlRoot();
|
||||
// Create base XML
|
||||
const xmlDoc = this.createBaseXml();
|
||||
|
||||
// For now, return a basic XML structure
|
||||
// In a real implementation, we would populate the XML with debit note data
|
||||
return xml;
|
||||
// Set document type code to invoice (380)
|
||||
this.setDocumentTypeCode(xmlDoc, '380');
|
||||
|
||||
// Add common invoice data
|
||||
this.addCommonInvoiceData(xmlDoc, debitNote);
|
||||
|
||||
// Serialize to string
|
||||
return new XMLSerializer().serializeToString(xmlDoc);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a base ZUGFeRD XML document
|
||||
* @returns XML document with basic structure
|
||||
*/
|
||||
private createBaseXml(): Document {
|
||||
// Create XML document from template
|
||||
const xmlString = this.createXmlRoot();
|
||||
const doc = new DOMParser().parseFromString(xmlString, 'application/xml');
|
||||
|
||||
// Add ZUGFeRD profile
|
||||
this.addProfile(doc);
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds ZUGFeRD profile information to the XML document
|
||||
* @param doc XML document
|
||||
*/
|
||||
private addProfile(doc: Document): void {
|
||||
// Get root element
|
||||
const root = doc.documentElement;
|
||||
|
||||
// Create context element if it doesn't exist
|
||||
let contextElement = root.getElementsByTagName('rsm:ExchangedDocumentContext')[0];
|
||||
if (!contextElement) {
|
||||
contextElement = doc.createElement('rsm:ExchangedDocumentContext');
|
||||
root.appendChild(contextElement);
|
||||
}
|
||||
|
||||
// Create guideline parameter element
|
||||
const guidelineElement = doc.createElement('ram:GuidelineSpecifiedDocumentContextParameter');
|
||||
contextElement.appendChild(guidelineElement);
|
||||
|
||||
// Add ID element with profile
|
||||
const idElement = doc.createElement('ram:ID');
|
||||
|
||||
// Set profile based on the selected profile
|
||||
let profileId = ZUGFERD_PROFILE_IDS.BASIC;
|
||||
if (this.profile === CIIProfile.COMFORT) {
|
||||
profileId = ZUGFERD_PROFILE_IDS.COMFORT;
|
||||
} else if (this.profile === CIIProfile.EXTENDED) {
|
||||
profileId = ZUGFERD_PROFILE_IDS.EXTENDED;
|
||||
}
|
||||
|
||||
idElement.textContent = profileId;
|
||||
guidelineElement.appendChild(idElement);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the document type code in the XML document
|
||||
* @param doc XML document
|
||||
* @param typeCode Document type code (380 for invoice, 381 for credit note)
|
||||
*/
|
||||
private setDocumentTypeCode(doc: Document, typeCode: string): void {
|
||||
// Get root element
|
||||
const root = doc.documentElement;
|
||||
|
||||
// Create document element if it doesn't exist
|
||||
let documentElement = root.getElementsByTagName('rsm:ExchangedDocument')[0];
|
||||
if (!documentElement) {
|
||||
documentElement = doc.createElement('rsm:ExchangedDocument');
|
||||
root.appendChild(documentElement);
|
||||
}
|
||||
|
||||
// Add type code element
|
||||
const typeCodeElement = doc.createElement('ram:TypeCode');
|
||||
typeCodeElement.textContent = typeCode;
|
||||
documentElement.appendChild(typeCodeElement);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds common invoice data to the XML document
|
||||
* @param doc XML document
|
||||
* @param invoice Invoice data
|
||||
*/
|
||||
private addCommonInvoiceData(doc: Document, invoice: TInvoice): void {
|
||||
// Get root element
|
||||
const root = doc.documentElement;
|
||||
|
||||
// Get document element or create it
|
||||
let documentElement = root.getElementsByTagName('rsm:ExchangedDocument')[0];
|
||||
if (!documentElement) {
|
||||
documentElement = doc.createElement('rsm:ExchangedDocument');
|
||||
root.appendChild(documentElement);
|
||||
}
|
||||
|
||||
// Add ID element
|
||||
const idElement = doc.createElement('ram:ID');
|
||||
idElement.textContent = invoice.id;
|
||||
documentElement.appendChild(idElement);
|
||||
|
||||
// Add issue date element
|
||||
const issueDateElement = doc.createElement('ram:IssueDateTime');
|
||||
const dateStringElement = doc.createElement('udt:DateTimeString');
|
||||
dateStringElement.setAttribute('format', '102'); // YYYYMMDD format
|
||||
dateStringElement.textContent = this.formatDateYYYYMMDD(invoice.date);
|
||||
issueDateElement.appendChild(dateStringElement);
|
||||
documentElement.appendChild(issueDateElement);
|
||||
|
||||
// Add notes if available
|
||||
if (invoice.notes && invoice.notes.length > 0) {
|
||||
for (const note of invoice.notes) {
|
||||
const noteElement = doc.createElement('ram:IncludedNote');
|
||||
const contentElement = doc.createElement('ram:Content');
|
||||
contentElement.textContent = note;
|
||||
noteElement.appendChild(contentElement);
|
||||
documentElement.appendChild(noteElement);
|
||||
}
|
||||
}
|
||||
|
||||
// Create transaction element if it doesn't exist
|
||||
let transactionElement = root.getElementsByTagName('rsm:SupplyChainTradeTransaction')[0];
|
||||
if (!transactionElement) {
|
||||
transactionElement = doc.createElement('rsm:SupplyChainTradeTransaction');
|
||||
root.appendChild(transactionElement);
|
||||
}
|
||||
|
||||
// Add agreement section with seller and buyer
|
||||
this.addAgreementSection(doc, transactionElement, invoice);
|
||||
|
||||
// Add delivery section
|
||||
this.addDeliverySection(doc, transactionElement, invoice);
|
||||
|
||||
// Add settlement section with payment terms and totals
|
||||
this.addSettlementSection(doc, transactionElement, invoice);
|
||||
|
||||
// Add line items
|
||||
this.addLineItems(doc, transactionElement, invoice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds agreement section with seller and buyer information
|
||||
* @param doc XML document
|
||||
* @param transactionElement Transaction element
|
||||
* @param invoice Invoice data
|
||||
*/
|
||||
private addAgreementSection(doc: Document, transactionElement: Element, invoice: TInvoice): void {
|
||||
// Create agreement element
|
||||
const agreementElement = doc.createElement('ram:ApplicableHeaderTradeAgreement');
|
||||
transactionElement.appendChild(agreementElement);
|
||||
|
||||
// Add buyer reference if available
|
||||
if (invoice.buyerReference) {
|
||||
const buyerRefElement = doc.createElement('ram:BuyerReference');
|
||||
buyerRefElement.textContent = invoice.buyerReference;
|
||||
agreementElement.appendChild(buyerRefElement);
|
||||
}
|
||||
|
||||
// Add seller
|
||||
const sellerElement = doc.createElement('ram:SellerTradeParty');
|
||||
this.addPartyInfo(doc, sellerElement, invoice.from);
|
||||
|
||||
// Add seller electronic address if available
|
||||
if (invoice.electronicAddress && invoice.from.type === 'company') {
|
||||
const contactElement = doc.createElement('ram:DefinedTradeContact');
|
||||
const uriElement = doc.createElement('ram:URIID');
|
||||
uriElement.setAttribute('schemeID', invoice.electronicAddress.scheme);
|
||||
uriElement.textContent = invoice.electronicAddress.value;
|
||||
contactElement.appendChild(uriElement);
|
||||
sellerElement.appendChild(contactElement);
|
||||
}
|
||||
|
||||
agreementElement.appendChild(sellerElement);
|
||||
|
||||
// Add buyer
|
||||
const buyerElement = doc.createElement('ram:BuyerTradeParty');
|
||||
this.addPartyInfo(doc, buyerElement, invoice.to);
|
||||
agreementElement.appendChild(buyerElement);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds party information to an element
|
||||
* @param doc XML document
|
||||
* @param partyElement Party element
|
||||
* @param party Party data
|
||||
*/
|
||||
private addPartyInfo(doc: Document, partyElement: Element, party: any): void {
|
||||
// Add name
|
||||
const nameElement = doc.createElement('ram:Name');
|
||||
nameElement.textContent = party.name;
|
||||
partyElement.appendChild(nameElement);
|
||||
|
||||
// Add postal address
|
||||
const addressElement = doc.createElement('ram:PostalTradeAddress');
|
||||
|
||||
// Add address line 1 (street)
|
||||
if (party.address.streetName) {
|
||||
const line1Element = doc.createElement('ram:LineOne');
|
||||
line1Element.textContent = party.address.streetName;
|
||||
addressElement.appendChild(line1Element);
|
||||
}
|
||||
|
||||
// Add address line 2 (house number) if present
|
||||
if (party.address.houseNumber && party.address.houseNumber !== '0') {
|
||||
const line2Element = doc.createElement('ram:LineTwo');
|
||||
line2Element.textContent = party.address.houseNumber;
|
||||
addressElement.appendChild(line2Element);
|
||||
}
|
||||
|
||||
// Add postal code
|
||||
if (party.address.postalCode) {
|
||||
const postalCodeElement = doc.createElement('ram:PostcodeCode');
|
||||
postalCodeElement.textContent = party.address.postalCode;
|
||||
addressElement.appendChild(postalCodeElement);
|
||||
}
|
||||
|
||||
// Add city
|
||||
if (party.address.city) {
|
||||
const cityElement = doc.createElement('ram:CityName');
|
||||
cityElement.textContent = party.address.city;
|
||||
addressElement.appendChild(cityElement);
|
||||
}
|
||||
|
||||
// Add country
|
||||
if (party.address.country || party.address.countryCode) {
|
||||
const countryElement = doc.createElement('ram:CountryID');
|
||||
countryElement.textContent = party.address.countryCode || party.address.country;
|
||||
addressElement.appendChild(countryElement);
|
||||
}
|
||||
|
||||
partyElement.appendChild(addressElement);
|
||||
|
||||
// Add VAT ID if available
|
||||
if (party.registrationDetails && party.registrationDetails.vatId) {
|
||||
const taxRegistrationElement = doc.createElement('ram:SpecifiedTaxRegistration');
|
||||
const taxIdElement = doc.createElement('ram:ID');
|
||||
taxIdElement.setAttribute('schemeID', 'VA');
|
||||
taxIdElement.textContent = party.registrationDetails.vatId;
|
||||
taxRegistrationElement.appendChild(taxIdElement);
|
||||
partyElement.appendChild(taxRegistrationElement);
|
||||
}
|
||||
|
||||
// Add registration ID if available
|
||||
if (party.registrationDetails && party.registrationDetails.registrationId) {
|
||||
const regRegistrationElement = doc.createElement('ram:SpecifiedTaxRegistration');
|
||||
const regIdElement = doc.createElement('ram:ID');
|
||||
regIdElement.setAttribute('schemeID', 'FC');
|
||||
regIdElement.textContent = party.registrationDetails.registrationId;
|
||||
regRegistrationElement.appendChild(regIdElement);
|
||||
partyElement.appendChild(regRegistrationElement);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds delivery section with delivery information
|
||||
* @param doc XML document
|
||||
* @param transactionElement Transaction element
|
||||
* @param invoice Invoice data
|
||||
*/
|
||||
private addDeliverySection(doc: Document, transactionElement: Element, invoice: TInvoice): void {
|
||||
// Create delivery element
|
||||
const deliveryElement = doc.createElement('ram:ApplicableHeaderTradeDelivery');
|
||||
transactionElement.appendChild(deliveryElement);
|
||||
|
||||
// Add delivery date if available
|
||||
if (invoice.deliveryDate) {
|
||||
const deliveryDateElement = doc.createElement('ram:ActualDeliverySupplyChainEvent');
|
||||
const occurrenceDateElement = doc.createElement('ram:OccurrenceDateTime');
|
||||
const dateStringElement = doc.createElement('udt:DateTimeString');
|
||||
dateStringElement.setAttribute('format', '102'); // YYYYMMDD format
|
||||
dateStringElement.textContent = this.formatDateYYYYMMDD(invoice.deliveryDate);
|
||||
occurrenceDateElement.appendChild(dateStringElement);
|
||||
deliveryDateElement.appendChild(occurrenceDateElement);
|
||||
deliveryElement.appendChild(deliveryDateElement);
|
||||
}
|
||||
|
||||
// Add period of performance if available
|
||||
if (invoice.periodOfPerformance) {
|
||||
const periodElement = doc.createElement('ram:BillingSpecifiedPeriod');
|
||||
|
||||
// Start date
|
||||
if (invoice.periodOfPerformance.from) {
|
||||
const startDateElement = doc.createElement('ram:StartDateTime');
|
||||
const startDateStringElement = doc.createElement('udt:DateTimeString');
|
||||
startDateStringElement.setAttribute('format', '102'); // YYYYMMDD format
|
||||
startDateStringElement.textContent = this.formatDateYYYYMMDD(invoice.periodOfPerformance.from);
|
||||
startDateElement.appendChild(startDateStringElement);
|
||||
periodElement.appendChild(startDateElement);
|
||||
}
|
||||
|
||||
// End date
|
||||
if (invoice.periodOfPerformance.to) {
|
||||
const endDateElement = doc.createElement('ram:EndDateTime');
|
||||
const endDateStringElement = doc.createElement('udt:DateTimeString');
|
||||
endDateStringElement.setAttribute('format', '102'); // YYYYMMDD format
|
||||
endDateStringElement.textContent = this.formatDateYYYYMMDD(invoice.periodOfPerformance.to);
|
||||
endDateElement.appendChild(endDateStringElement);
|
||||
periodElement.appendChild(endDateElement);
|
||||
}
|
||||
|
||||
deliveryElement.appendChild(periodElement);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds settlement section with payment terms and totals
|
||||
* @param doc XML document
|
||||
* @param transactionElement Transaction element
|
||||
* @param invoice Invoice data
|
||||
*/
|
||||
private addSettlementSection(doc: Document, transactionElement: Element, invoice: TInvoice): void {
|
||||
// Create settlement element
|
||||
const settlementElement = doc.createElement('ram:ApplicableHeaderTradeSettlement');
|
||||
transactionElement.appendChild(settlementElement);
|
||||
|
||||
// Add currency
|
||||
const currencyElement = doc.createElement('ram:InvoiceCurrencyCode');
|
||||
currencyElement.textContent = invoice.currency;
|
||||
settlementElement.appendChild(currencyElement);
|
||||
|
||||
// Add payment terms
|
||||
const paymentTermsElement = doc.createElement('ram:SpecifiedTradePaymentTerms');
|
||||
|
||||
// Add payment instructions if available
|
||||
if (invoice.paymentOptions) {
|
||||
// Add payment instructions as description - this is generic enough to work with any payment type
|
||||
const descriptionElement = doc.createElement('ram:Description');
|
||||
descriptionElement.textContent = `Due in ${invoice.dueInDays} days. ${invoice.paymentOptions.info || ''}`;
|
||||
paymentTermsElement.appendChild(descriptionElement);
|
||||
}
|
||||
|
||||
// Add due date
|
||||
const dueDateElement = doc.createElement('ram:DueDateDateTime');
|
||||
const dateStringElement = doc.createElement('udt:DateTimeString');
|
||||
dateStringElement.setAttribute('format', '102'); // YYYYMMDD format
|
||||
|
||||
// Calculate due date
|
||||
const dueDate = new Date(invoice.date);
|
||||
dueDate.setDate(dueDate.getDate() + invoice.dueInDays);
|
||||
|
||||
dateStringElement.textContent = this.formatDateYYYYMMDD(dueDate.getTime());
|
||||
dueDateElement.appendChild(dateStringElement);
|
||||
paymentTermsElement.appendChild(dueDateElement);
|
||||
|
||||
settlementElement.appendChild(paymentTermsElement);
|
||||
|
||||
// Add payment means if available (using a generic approach)
|
||||
if (invoice.paymentOptions) {
|
||||
const paymentMeansElement = doc.createElement('ram:SpecifiedTradeSettlementPaymentMeans');
|
||||
|
||||
// Payment type code (58 for SEPA transfer as default)
|
||||
const typeCodeElement = doc.createElement('ram:TypeCode');
|
||||
typeCodeElement.textContent = '58';
|
||||
paymentMeansElement.appendChild(typeCodeElement);
|
||||
|
||||
// Information (optional)
|
||||
if (invoice.paymentOptions.info) {
|
||||
const infoElement = doc.createElement('ram:Information');
|
||||
infoElement.textContent = invoice.paymentOptions.info;
|
||||
paymentMeansElement.appendChild(infoElement);
|
||||
}
|
||||
|
||||
// If payment details are available in a standard format
|
||||
if (invoice.paymentOptions.sepaConnection.iban) {
|
||||
// Payee account
|
||||
const payeeAccountElement = doc.createElement('ram:PayeePartyCreditorFinancialAccount');
|
||||
const ibanElement = doc.createElement('ram:IBANID');
|
||||
ibanElement.textContent = invoice.paymentOptions.sepaConnection.iban;
|
||||
payeeAccountElement.appendChild(ibanElement);
|
||||
paymentMeansElement.appendChild(payeeAccountElement);
|
||||
|
||||
// Payee financial institution if BIC available
|
||||
if (invoice.paymentOptions.sepaConnection.bic) {
|
||||
const institutionElement = doc.createElement('ram:PayeeSpecifiedCreditorFinancialInstitution');
|
||||
const bicElement = doc.createElement('ram:BICID');
|
||||
bicElement.textContent = invoice.paymentOptions.sepaConnection.bic;
|
||||
institutionElement.appendChild(bicElement);
|
||||
paymentMeansElement.appendChild(institutionElement);
|
||||
}
|
||||
}
|
||||
|
||||
settlementElement.appendChild(paymentMeansElement);
|
||||
}
|
||||
|
||||
// Add tax details
|
||||
this.addTaxDetails(doc, settlementElement, invoice);
|
||||
|
||||
// Add totals
|
||||
this.addMonetarySummation(doc, settlementElement, invoice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds tax details to the settlement section
|
||||
* @param doc XML document
|
||||
* @param settlementElement Settlement element
|
||||
* @param invoice Invoice data
|
||||
*/
|
||||
private addTaxDetails(doc: Document, settlementElement: Element, invoice: TInvoice): void {
|
||||
// Calculate tax categories and totals
|
||||
const taxCategories = new Map<number, number>(); // Map of VAT rate to net amount
|
||||
|
||||
// Calculate from items
|
||||
if (invoice.items) {
|
||||
for (const item of invoice.items) {
|
||||
const itemNetAmount = item.unitNetPrice * item.unitQuantity;
|
||||
const vatRate = item.vatPercentage;
|
||||
|
||||
const currentAmount = taxCategories.get(vatRate) || 0;
|
||||
taxCategories.set(vatRate, currentAmount + itemNetAmount);
|
||||
}
|
||||
}
|
||||
|
||||
// Add each tax category
|
||||
for (const [rate, baseAmount] of taxCategories.entries()) {
|
||||
const taxElement = doc.createElement('ram:ApplicableTradeTax');
|
||||
|
||||
// Calculate tax amount
|
||||
const taxAmount = baseAmount * (rate / 100);
|
||||
|
||||
// Add calculated amount
|
||||
const calculatedAmountElement = doc.createElement('ram:CalculatedAmount');
|
||||
calculatedAmountElement.textContent = taxAmount.toFixed(2);
|
||||
taxElement.appendChild(calculatedAmountElement);
|
||||
|
||||
// Add type code (VAT)
|
||||
const typeCodeElement = doc.createElement('ram:TypeCode');
|
||||
typeCodeElement.textContent = 'VAT';
|
||||
taxElement.appendChild(typeCodeElement);
|
||||
|
||||
// Add basis amount
|
||||
const basisAmountElement = doc.createElement('ram:BasisAmount');
|
||||
basisAmountElement.textContent = baseAmount.toFixed(2);
|
||||
taxElement.appendChild(basisAmountElement);
|
||||
|
||||
// Add category code
|
||||
const categoryCodeElement = doc.createElement('ram:CategoryCode');
|
||||
categoryCodeElement.textContent = invoice.reverseCharge ? 'AE' : 'S';
|
||||
taxElement.appendChild(categoryCodeElement);
|
||||
|
||||
// Add rate
|
||||
const rateElement = doc.createElement('ram:RateApplicablePercent');
|
||||
rateElement.textContent = rate.toString();
|
||||
taxElement.appendChild(rateElement);
|
||||
|
||||
settlementElement.appendChild(taxElement);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds monetary summation to the settlement section
|
||||
* @param doc XML document
|
||||
* @param settlementElement Settlement element
|
||||
* @param invoice Invoice data
|
||||
*/
|
||||
private addMonetarySummation(doc: Document, settlementElement: Element, invoice: TInvoice): void {
|
||||
const monetarySummationElement = doc.createElement('ram:SpecifiedTradeSettlementHeaderMonetarySummation');
|
||||
|
||||
// Calculate totals
|
||||
let totalNetAmount = 0;
|
||||
let totalTaxAmount = 0;
|
||||
|
||||
// Calculate from items
|
||||
if (invoice.items) {
|
||||
for (const item of invoice.items) {
|
||||
const itemNetAmount = item.unitNetPrice * item.unitQuantity;
|
||||
const itemTaxAmount = itemNetAmount * (item.vatPercentage / 100);
|
||||
|
||||
totalNetAmount += itemNetAmount;
|
||||
totalTaxAmount += itemTaxAmount;
|
||||
}
|
||||
}
|
||||
|
||||
const totalGrossAmount = totalNetAmount + totalTaxAmount;
|
||||
|
||||
// Add line total amount
|
||||
const lineTotalElement = doc.createElement('ram:LineTotalAmount');
|
||||
lineTotalElement.textContent = totalNetAmount.toFixed(2);
|
||||
monetarySummationElement.appendChild(lineTotalElement);
|
||||
|
||||
// Add tax total amount
|
||||
const taxTotalElement = doc.createElement('ram:TaxTotalAmount');
|
||||
taxTotalElement.textContent = totalTaxAmount.toFixed(2);
|
||||
taxTotalElement.setAttribute('currencyID', invoice.currency);
|
||||
monetarySummationElement.appendChild(taxTotalElement);
|
||||
|
||||
// Add grand total amount
|
||||
const grandTotalElement = doc.createElement('ram:GrandTotalAmount');
|
||||
grandTotalElement.textContent = totalGrossAmount.toFixed(2);
|
||||
monetarySummationElement.appendChild(grandTotalElement);
|
||||
|
||||
// Add due payable amount
|
||||
const duePayableElement = doc.createElement('ram:DuePayableAmount');
|
||||
duePayableElement.textContent = totalGrossAmount.toFixed(2);
|
||||
monetarySummationElement.appendChild(duePayableElement);
|
||||
|
||||
settlementElement.appendChild(monetarySummationElement);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds line items to the XML document
|
||||
* @param doc XML document
|
||||
* @param transactionElement Transaction element
|
||||
* @param invoice Invoice data
|
||||
*/
|
||||
private addLineItems(doc: Document, transactionElement: Element, invoice: TInvoice): void {
|
||||
// Add each line item
|
||||
if (invoice.items) {
|
||||
for (const item of invoice.items) {
|
||||
// Create line item element
|
||||
const lineItemElement = doc.createElement('ram:IncludedSupplyChainTradeLineItem');
|
||||
|
||||
// Add line ID
|
||||
const lineIdElement = doc.createElement('ram:AssociatedDocumentLineDocument');
|
||||
const lineIdValueElement = doc.createElement('ram:LineID');
|
||||
lineIdValueElement.textContent = item.position.toString();
|
||||
lineIdElement.appendChild(lineIdValueElement);
|
||||
lineItemElement.appendChild(lineIdElement);
|
||||
|
||||
// Add product information
|
||||
const productElement = doc.createElement('ram:SpecifiedTradeProduct');
|
||||
|
||||
// Add name
|
||||
const nameElement = doc.createElement('ram:Name');
|
||||
nameElement.textContent = item.name;
|
||||
productElement.appendChild(nameElement);
|
||||
|
||||
// Add article number if available
|
||||
if (item.articleNumber) {
|
||||
const articleNumberElement = doc.createElement('ram:SellerAssignedID');
|
||||
articleNumberElement.textContent = item.articleNumber;
|
||||
productElement.appendChild(articleNumberElement);
|
||||
}
|
||||
|
||||
lineItemElement.appendChild(productElement);
|
||||
|
||||
// Add agreement information (price)
|
||||
const agreementElement = doc.createElement('ram:SpecifiedLineTradeAgreement');
|
||||
const priceElement = doc.createElement('ram:NetPriceProductTradePrice');
|
||||
const chargeAmountElement = doc.createElement('ram:ChargeAmount');
|
||||
chargeAmountElement.textContent = item.unitNetPrice.toFixed(2);
|
||||
priceElement.appendChild(chargeAmountElement);
|
||||
agreementElement.appendChild(priceElement);
|
||||
lineItemElement.appendChild(agreementElement);
|
||||
|
||||
// Add delivery information (quantity)
|
||||
const deliveryElement = doc.createElement('ram:SpecifiedLineTradeDelivery');
|
||||
const quantityElement = doc.createElement('ram:BilledQuantity');
|
||||
quantityElement.textContent = item.unitQuantity.toString();
|
||||
quantityElement.setAttribute('unitCode', item.unitType);
|
||||
deliveryElement.appendChild(quantityElement);
|
||||
lineItemElement.appendChild(deliveryElement);
|
||||
|
||||
// Add settlement information (tax)
|
||||
const settlementElement = doc.createElement('ram:SpecifiedLineTradeSettlement');
|
||||
|
||||
// Add tax information
|
||||
const taxElement = doc.createElement('ram:ApplicableTradeTax');
|
||||
|
||||
// Add tax type code
|
||||
const taxTypeCodeElement = doc.createElement('ram:TypeCode');
|
||||
taxTypeCodeElement.textContent = 'VAT';
|
||||
taxElement.appendChild(taxTypeCodeElement);
|
||||
|
||||
// Add tax category code
|
||||
const taxCategoryCodeElement = doc.createElement('ram:CategoryCode');
|
||||
taxCategoryCodeElement.textContent = invoice.reverseCharge ? 'AE' : 'S';
|
||||
taxElement.appendChild(taxCategoryCodeElement);
|
||||
|
||||
// Add tax rate
|
||||
const taxRateElement = doc.createElement('ram:RateApplicablePercent');
|
||||
taxRateElement.textContent = item.vatPercentage.toString();
|
||||
taxElement.appendChild(taxRateElement);
|
||||
|
||||
settlementElement.appendChild(taxElement);
|
||||
|
||||
// Add monetary summation
|
||||
const monetarySummationElement = doc.createElement('ram:SpecifiedLineTradeSettlementMonetarySummation');
|
||||
|
||||
// Calculate item total
|
||||
const itemNetAmount = item.unitNetPrice * item.unitQuantity;
|
||||
|
||||
// Add line total amount
|
||||
const lineTotalElement = doc.createElement('ram:LineTotalAmount');
|
||||
lineTotalElement.textContent = itemNetAmount.toFixed(2);
|
||||
monetarySummationElement.appendChild(lineTotalElement);
|
||||
|
||||
settlementElement.appendChild(monetarySummationElement);
|
||||
|
||||
lineItemElement.appendChild(settlementElement);
|
||||
|
||||
// Add line item to transaction
|
||||
transactionElement.appendChild(lineItemElement);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats a date as YYYYMMDD
|
||||
* @param timestamp Timestamp to format
|
||||
* @returns Formatted date string
|
||||
*/
|
||||
private formatDateYYYYMMDD(timestamp: number): string {
|
||||
const date = new Date(timestamp);
|
||||
const year = date.getFullYear();
|
||||
const month = (date.getMonth() + 1).toString().padStart(2, '0');
|
||||
const day = date.getDate().toString().padStart(2, '0');
|
||||
return `${year}${month}${day}`;
|
||||
}
|
||||
}
|
@ -1,13 +1,181 @@
|
||||
import { BaseValidator } from '../base/base.validator.js';
|
||||
import { InvoiceFormat } from '../../interfaces/common.js';
|
||||
import { InvoiceFormat, ValidationLevel } from '../../interfaces/common.js';
|
||||
import type { ValidationResult } from '../../interfaces/common.js';
|
||||
import { FormatDetector } from '../utils/format.detector.js';
|
||||
|
||||
// Import specific validators
|
||||
// import { UBLValidator } from '../ubl/ubl.validator.js';
|
||||
// import { XRechnungValidator } from '../ubl/xrechnung/xrechnung.validator.js';
|
||||
import { UBLBaseValidator } from '../ubl/ubl.validator.js';
|
||||
import { FacturXValidator } from '../cii/facturx/facturx.validator.js';
|
||||
import { ZUGFeRDValidator } from '../cii/zugferd/zugferd.validator.js';
|
||||
|
||||
/**
|
||||
* UBL validator implementation
|
||||
* Provides validation for standard UBL documents
|
||||
*/
|
||||
class UBLValidator extends UBLBaseValidator {
|
||||
protected validateStructure(): boolean {
|
||||
// Basic validation to check for required UBL invoice elements
|
||||
if (!this.doc) return false;
|
||||
|
||||
let valid = true;
|
||||
|
||||
// Check for required UBL elements
|
||||
const requiredElements = [
|
||||
'cbc:ID',
|
||||
'cbc:IssueDate',
|
||||
'cac:AccountingSupplierParty',
|
||||
'cac:AccountingCustomerParty'
|
||||
];
|
||||
|
||||
for (const element of requiredElements) {
|
||||
if (!this.exists(`//${element}`)) {
|
||||
this.addError(
|
||||
'UBL-STRUCT-1',
|
||||
`Required element ${element} is missing`,
|
||||
`/${element}`
|
||||
);
|
||||
valid = false;
|
||||
}
|
||||
}
|
||||
|
||||
return valid;
|
||||
}
|
||||
|
||||
protected validateBusinessRules(): boolean {
|
||||
// Basic business rule validation for UBL
|
||||
if (!this.doc) return false;
|
||||
|
||||
let valid = true;
|
||||
|
||||
// Check that issue date is present and valid
|
||||
const issueDateText = this.getText('//cbc:IssueDate');
|
||||
if (!issueDateText) {
|
||||
this.addError(
|
||||
'UBL-BUS-1',
|
||||
'Issue date is required',
|
||||
'//cbc:IssueDate'
|
||||
);
|
||||
valid = false;
|
||||
} else {
|
||||
const issueDate = new Date(issueDateText);
|
||||
if (isNaN(issueDate.getTime())) {
|
||||
this.addError(
|
||||
'UBL-BUS-2',
|
||||
'Issue date is not a valid date',
|
||||
'//cbc:IssueDate'
|
||||
);
|
||||
valid = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check that at least one invoice line exists
|
||||
if (!this.exists('//cac:InvoiceLine') && !this.exists('//cac:CreditNoteLine')) {
|
||||
this.addError(
|
||||
'UBL-BUS-3',
|
||||
'At least one invoice line or credit note line is required',
|
||||
'/'
|
||||
);
|
||||
valid = false;
|
||||
}
|
||||
|
||||
return valid;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* XRechnung validator implementation
|
||||
* Extends UBL validator with additional XRechnung specific validation rules
|
||||
*/
|
||||
class XRechnungValidator extends UBLValidator {
|
||||
protected validateStructure(): boolean {
|
||||
// Call the base UBL validation first
|
||||
const baseValid = super.validateStructure();
|
||||
let valid = baseValid;
|
||||
|
||||
// Check for XRechnung-specific elements
|
||||
if (!this.exists('//cbc:CustomizationID[contains(text(), "xrechnung")]')) {
|
||||
this.addError(
|
||||
'XRECH-STRUCT-1',
|
||||
'XRechnung customization ID is missing or invalid',
|
||||
'//cbc:CustomizationID'
|
||||
);
|
||||
valid = false;
|
||||
}
|
||||
|
||||
// Check for buyer reference which is mandatory in XRechnung
|
||||
if (!this.exists('//cbc:BuyerReference')) {
|
||||
this.addError(
|
||||
'XRECH-STRUCT-2',
|
||||
'BuyerReference is required in XRechnung',
|
||||
'//'
|
||||
);
|
||||
valid = false;
|
||||
}
|
||||
|
||||
return valid;
|
||||
}
|
||||
|
||||
protected validateBusinessRules(): boolean {
|
||||
// Call the base UBL business rule validation
|
||||
const baseValid = super.validateBusinessRules();
|
||||
let valid = baseValid;
|
||||
|
||||
// German-specific validation rules
|
||||
// Check for proper VAT ID structure for German VAT IDs
|
||||
const supplierVatId = this.getText('//cac:AccountingSupplierParty//cbc:CompanyID[../cac:TaxScheme/cbc:ID="VAT"]');
|
||||
if (supplierVatId && supplierVatId.startsWith('DE') && !/^DE[0-9]{9}$/.test(supplierVatId)) {
|
||||
this.addError(
|
||||
'XRECH-BUS-1',
|
||||
'German VAT ID format is invalid (must be DE followed by 9 digits)',
|
||||
'//cac:AccountingSupplierParty//cbc:CompanyID'
|
||||
);
|
||||
valid = false;
|
||||
}
|
||||
|
||||
return valid;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* FatturaPA validator implementation
|
||||
* Basic implementation for Italian electronic invoices
|
||||
*/
|
||||
class FatturaPAValidator extends BaseValidator {
|
||||
validate(level: ValidationLevel = ValidationLevel.SYNTAX): ValidationResult {
|
||||
// Reset errors
|
||||
this.errors = [];
|
||||
|
||||
let valid = true;
|
||||
|
||||
if (level === ValidationLevel.SYNTAX) {
|
||||
valid = this.validateSchema();
|
||||
} else if (level === ValidationLevel.SEMANTIC || level === ValidationLevel.BUSINESS) {
|
||||
valid = this.validateSchema() && this.validateBusinessRules();
|
||||
}
|
||||
|
||||
return {
|
||||
valid,
|
||||
errors: this.errors,
|
||||
level
|
||||
};
|
||||
}
|
||||
|
||||
protected validateSchema(): boolean {
|
||||
// Basic schema validation for FatturaPA
|
||||
if (!this.xml.includes('<FatturaElettronica')) {
|
||||
this.addError('FATT-SCHEMA-1', 'Root element must be FatturaElettronica', '/');
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
protected validateBusinessRules(): boolean {
|
||||
// Basic placeholder implementation - would need more detailed rules
|
||||
// for a real implementation
|
||||
return this.validateSchema();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory to create the appropriate validator based on the XML format
|
||||
*/
|
||||
@ -18,34 +186,73 @@ export class ValidatorFactory {
|
||||
* @returns Appropriate validator instance
|
||||
*/
|
||||
public static createValidator(xml: string): BaseValidator {
|
||||
const format = FormatDetector.detectFormat(xml);
|
||||
try {
|
||||
const format = FormatDetector.detectFormat(xml);
|
||||
|
||||
switch (format) {
|
||||
case InvoiceFormat.UBL:
|
||||
// return new UBLValidator(xml);
|
||||
throw new Error('UBL validator not yet implemented');
|
||||
switch (format) {
|
||||
case InvoiceFormat.UBL:
|
||||
return new UBLValidator(xml);
|
||||
|
||||
case InvoiceFormat.XRECHNUNG:
|
||||
// return new XRechnungValidator(xml);
|
||||
throw new Error('XRechnung validator not yet implemented');
|
||||
case InvoiceFormat.XRECHNUNG:
|
||||
return new XRechnungValidator(xml);
|
||||
|
||||
case InvoiceFormat.CII:
|
||||
// For now, use Factur-X validator for generic CII
|
||||
return new FacturXValidator(xml);
|
||||
case InvoiceFormat.CII:
|
||||
// For now, use Factur-X validator for generic CII
|
||||
return new FacturXValidator(xml);
|
||||
|
||||
case InvoiceFormat.ZUGFERD:
|
||||
// Use dedicated ZUGFeRD validator
|
||||
return new ZUGFeRDValidator(xml);
|
||||
case InvoiceFormat.ZUGFERD:
|
||||
return new ZUGFeRDValidator(xml);
|
||||
|
||||
case InvoiceFormat.FACTURX:
|
||||
return new FacturXValidator(xml);
|
||||
case InvoiceFormat.FACTURX:
|
||||
return new FacturXValidator(xml);
|
||||
|
||||
case InvoiceFormat.FATTURAPA:
|
||||
// return new FatturaPAValidator(xml);
|
||||
throw new Error('FatturaPA validator not yet implemented');
|
||||
case InvoiceFormat.FATTURAPA:
|
||||
return new FatturaPAValidator(xml);
|
||||
|
||||
default:
|
||||
throw new Error(`Unsupported invoice format: ${format}`);
|
||||
default:
|
||||
// For unknown formats, provide a generic validator that will
|
||||
// mark the document as invalid but won't throw an exception
|
||||
return new GenericValidator(xml, format);
|
||||
}
|
||||
} catch (error) {
|
||||
// If an error occurs during validator creation, return a generic validator
|
||||
// that will provide meaningful error information instead of throwing
|
||||
console.error(`Error creating validator: ${error}`);
|
||||
return new GenericValidator(xml, 'unknown');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic validator for unknown or unsupported formats
|
||||
* Provides meaningful validation errors instead of throwing exceptions
|
||||
*/
|
||||
class GenericValidator extends BaseValidator {
|
||||
private format: string;
|
||||
|
||||
constructor(xml: string, format: string) {
|
||||
super(xml);
|
||||
this.format = format;
|
||||
this.addError(
|
||||
'GEN-1',
|
||||
`Unsupported invoice format: ${format}`,
|
||||
'/'
|
||||
);
|
||||
}
|
||||
|
||||
validate(level: ValidationLevel = ValidationLevel.SYNTAX): ValidationResult {
|
||||
return {
|
||||
valid: false,
|
||||
errors: this.errors,
|
||||
level
|
||||
};
|
||||
}
|
||||
|
||||
protected validateSchema(): boolean {
|
||||
return false;
|
||||
}
|
||||
|
||||
protected validateBusinessRules(): boolean {
|
||||
return false;
|
||||
}
|
||||
}
|
@ -11,7 +11,10 @@ export abstract class BaseXMLExtractor {
|
||||
'factur-x.xml',
|
||||
'zugferd-invoice.xml',
|
||||
'ZUGFeRD-invoice.xml',
|
||||
'xrechnung.xml'
|
||||
'xrechnung.xml',
|
||||
'ubl-invoice.xml',
|
||||
'invoice.xml',
|
||||
'metadata.xml'
|
||||
];
|
||||
|
||||
/**
|
||||
@ -32,7 +35,8 @@ export abstract class BaseXMLExtractor {
|
||||
'urn:zugferd',
|
||||
'urn:factur-x',
|
||||
'factur-x.eu',
|
||||
'ZUGFeRD'
|
||||
'ZUGFeRD',
|
||||
'FatturaElettronica'
|
||||
];
|
||||
|
||||
/**
|
||||
@ -47,7 +51,8 @@ export abstract class BaseXMLExtractor {
|
||||
'</rsm:CrossIndustryDocument>',
|
||||
'</ram:CrossIndustryDocument>',
|
||||
'</ubl:Invoice>',
|
||||
'</ubl:CreditNote>'
|
||||
'</ubl:CreditNote>',
|
||||
'</FatturaElettronica>'
|
||||
];
|
||||
|
||||
/**
|
||||
@ -69,21 +74,19 @@ export abstract class BaseXMLExtractor {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if it starts with XML declaration
|
||||
if (!xmlString.includes('<?xml')) {
|
||||
// Check if it starts with XML declaration or a valid element
|
||||
if (!xmlString.includes('<?xml') && !this.hasKnownXmlElement(xmlString)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if the XML string contains known invoice formats
|
||||
const hasKnownFormat = this.knownFormats.some(format => xmlString.includes(format));
|
||||
const hasKnownFormat = this.hasKnownFormat(xmlString);
|
||||
if (!hasKnownFormat) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if the XML string contains binary data or invalid characters
|
||||
const invalidChars = ['\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005'];
|
||||
const hasBinaryData = invalidChars.some(char => xmlString.includes(char));
|
||||
if (hasBinaryData) {
|
||||
if (this.hasBinaryData(xmlString)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -92,6 +95,11 @@ export abstract class BaseXMLExtractor {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if XML has a proper structure (contains both opening and closing tags)
|
||||
if (!this.hasProperXmlStructure(xmlString)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('Error validating XML:', error);
|
||||
@ -99,6 +107,85 @@ export abstract class BaseXMLExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the XML string contains a known element
|
||||
* @param xmlString XML string to check
|
||||
* @returns True if the XML contains a known element
|
||||
*/
|
||||
protected hasKnownXmlElement(xmlString: string): boolean {
|
||||
for (const format of this.knownFormats) {
|
||||
// Check for opening tag of format
|
||||
if (xmlString.includes(`<${format}`)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the XML string contains a known format
|
||||
* @param xmlString XML string to check
|
||||
* @returns True if the XML contains a known format
|
||||
*/
|
||||
protected hasKnownFormat(xmlString: string): boolean {
|
||||
for (const format of this.knownFormats) {
|
||||
if (xmlString.includes(format)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the XML string has a proper structure
|
||||
* @param xmlString XML string to check
|
||||
* @returns True if the XML has a proper structure
|
||||
*/
|
||||
protected hasProperXmlStructure(xmlString: string): boolean {
|
||||
// Check for at least one matching opening and closing tag
|
||||
for (const endTag of this.knownEndTags) {
|
||||
const startTag = endTag.replace('/', '');
|
||||
if (xmlString.includes(startTag) && xmlString.includes(endTag)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// If no specific tag is found but it has a basic XML structure
|
||||
return (
|
||||
(xmlString.includes('<?xml') && xmlString.includes('?>')) ||
|
||||
(xmlString.match(/<[^>]+>/) !== null && xmlString.match(/<\/[^>]+>/) !== null)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the XML string contains binary data
|
||||
* @param xmlString XML string to check
|
||||
* @returns True if the XML contains binary data
|
||||
*/
|
||||
protected hasBinaryData(xmlString: string): boolean {
|
||||
// Check for common binary data indicators
|
||||
const binaryChars = ['\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005'];
|
||||
const consecutiveNulls = '\u0000\u0000\u0000';
|
||||
|
||||
// Check for control characters that shouldn't be in XML
|
||||
if (binaryChars.some(char => xmlString.includes(char))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for consecutive null bytes which indicate binary data
|
||||
if (xmlString.includes(consecutiveNulls)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for high concentration of non-printable characters
|
||||
const nonPrintableCount = (xmlString.match(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g) || []).length;
|
||||
if (nonPrintableCount > xmlString.length * 0.05) { // More than 5% non-printable
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract XML from a string
|
||||
* @param text Text to extract XML from
|
||||
@ -108,9 +195,22 @@ export abstract class BaseXMLExtractor {
|
||||
protected extractXmlFromString(text: string, startIndex: number = 0): string | null {
|
||||
try {
|
||||
// Find the start of the XML document
|
||||
const xmlStartIndex = text.indexOf('<?xml', startIndex);
|
||||
let xmlStartIndex = text.indexOf('<?xml', startIndex);
|
||||
|
||||
// If no XML declaration, try to find known elements
|
||||
if (xmlStartIndex === -1) {
|
||||
return null;
|
||||
for (const format of this.knownFormats) {
|
||||
const formatStartIndex = text.indexOf(`<${format.split(':').pop()}`, startIndex);
|
||||
if (formatStartIndex !== -1) {
|
||||
xmlStartIndex = formatStartIndex;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Still didn't find any start marker
|
||||
if (xmlStartIndex === -1) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Try to find the end of the XML document
|
||||
@ -123,12 +223,26 @@ export abstract class BaseXMLExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
// If no known end tag found, try to use a heuristic approach
|
||||
if (xmlEndIndex === -1) {
|
||||
return null;
|
||||
// Try to find the last closing tag
|
||||
const lastClosingTagMatch = text.slice(xmlStartIndex).match(/<\/[^>]+>(?!.*<\/[^>]+>)/);
|
||||
if (lastClosingTagMatch && lastClosingTagMatch.index !== undefined) {
|
||||
xmlEndIndex = xmlStartIndex + lastClosingTagMatch.index + lastClosingTagMatch[0].length;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Extract the XML content
|
||||
return text.substring(xmlStartIndex, xmlEndIndex);
|
||||
const xmlContent = text.substring(xmlStartIndex, xmlEndIndex);
|
||||
|
||||
// Validate the extracted content
|
||||
if (this.isValidXml(xmlContent)) {
|
||||
return xmlContent;
|
||||
}
|
||||
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.error('Error extracting XML from string:', error);
|
||||
return null;
|
||||
@ -143,34 +257,99 @@ export abstract class BaseXMLExtractor {
|
||||
*/
|
||||
protected async extractXmlFromStream(stream: PDFRawStream, fileName: string): Promise<string | null> {
|
||||
try {
|
||||
// Try to decompress with pako
|
||||
const compressedBytes = stream.getContents().buffer;
|
||||
// Get the raw bytes from the stream
|
||||
const rawBytes = stream.getContents();
|
||||
|
||||
// First try without decompression (in case the content is not compressed)
|
||||
let xmlContent = this.tryDecodeBuffer(rawBytes);
|
||||
if (xmlContent && this.isValidXml(xmlContent)) {
|
||||
console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${fileName}`);
|
||||
return xmlContent;
|
||||
}
|
||||
|
||||
// Try with decompression
|
||||
try {
|
||||
const decompressedBytes = pako.inflate(compressedBytes);
|
||||
const xmlContent = new TextDecoder('utf-8').decode(decompressedBytes);
|
||||
|
||||
if (this.isValidXml(xmlContent)) {
|
||||
console.log(`Successfully extracted decompressed XML from PDF file. File name: ${fileName}`);
|
||||
return xmlContent;
|
||||
const decompressedBytes = this.tryDecompress(rawBytes);
|
||||
if (decompressedBytes) {
|
||||
xmlContent = this.tryDecodeBuffer(decompressedBytes);
|
||||
if (xmlContent && this.isValidXml(xmlContent)) {
|
||||
console.log(`Successfully extracted decompressed XML from PDF file. File name: ${fileName}`);
|
||||
return xmlContent;
|
||||
}
|
||||
}
|
||||
} catch (decompressError) {
|
||||
// Decompression failed, try without decompression
|
||||
console.log(`Decompression failed for ${fileName}, trying without decompression...`);
|
||||
console.log(`Decompression failed for ${fileName}: ${decompressError}`);
|
||||
}
|
||||
|
||||
// Try without decompression
|
||||
const rawBytes = stream.getContents();
|
||||
const rawContent = new TextDecoder('utf-8').decode(rawBytes);
|
||||
|
||||
if (this.isValidXml(rawContent)) {
|
||||
console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${fileName}`);
|
||||
return rawContent;
|
||||
}
|
||||
|
||||
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.error('Error extracting XML from stream:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to decompress a buffer using different methods
|
||||
* @param buffer Buffer to decompress
|
||||
* @returns Decompressed buffer or null if decompression failed
|
||||
*/
|
||||
protected tryDecompress(buffer: Uint8Array): Uint8Array | null {
|
||||
try {
|
||||
// Try pako inflate (for deflate/zlib compression)
|
||||
return pako.inflate(buffer);
|
||||
} catch (error) {
|
||||
// If pako fails, try other methods if needed
|
||||
console.warn('Pako decompression failed, might be uncompressed or using a different algorithm');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to decode a buffer to a string using different encodings
|
||||
* @param buffer Buffer to decode
|
||||
* @returns Decoded string or null if decoding failed
|
||||
*/
|
||||
protected tryDecodeBuffer(buffer: Uint8Array): string | null {
|
||||
try {
|
||||
// Try UTF-8 first
|
||||
let content = new TextDecoder('utf-8').decode(buffer);
|
||||
if (this.isPlausibleXml(content)) {
|
||||
return content;
|
||||
}
|
||||
|
||||
// Try ISO-8859-1 (Latin1)
|
||||
content = this.decodeLatin1(buffer);
|
||||
if (this.isPlausibleXml(content)) {
|
||||
return content;
|
||||
}
|
||||
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.warn('Error decoding buffer:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a buffer using ISO-8859-1 (Latin1) encoding
|
||||
* @param buffer Buffer to decode
|
||||
* @returns Decoded string
|
||||
*/
|
||||
protected decodeLatin1(buffer: Uint8Array): string {
|
||||
return Array.from(buffer)
|
||||
.map(byte => String.fromCharCode(byte))
|
||||
.join('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a string is plausibly XML (quick check before validation)
|
||||
* @param content String to check
|
||||
* @returns True if the string is plausibly XML
|
||||
*/
|
||||
protected isPlausibleXml(content: string): boolean {
|
||||
return content.includes('<') &&
|
||||
content.includes('>') &&
|
||||
(content.includes('<?xml') ||
|
||||
this.knownFormats.some(format => content.includes(format)));
|
||||
}
|
||||
}
|
@ -6,50 +6,157 @@ import { BaseXMLExtractor } from './base.extractor.js';
|
||||
* Used as a fallback when other extraction methods fail
|
||||
*/
|
||||
export class TextXMLExtractor extends BaseXMLExtractor {
|
||||
// Maximum chunk size to process at once (4MB)
|
||||
private readonly CHUNK_SIZE = 4 * 1024 * 1024;
|
||||
|
||||
// Maximum number of chunks to check (effective 20MB search limit)
|
||||
private readonly MAX_CHUNKS = 5;
|
||||
|
||||
// Common XML patterns to look for
|
||||
private readonly XML_PATTERNS = [
|
||||
'<?xml',
|
||||
'<CrossIndustryInvoice',
|
||||
'<CrossIndustryDocument',
|
||||
'<Invoice',
|
||||
'<CreditNote',
|
||||
'<rsm:CrossIndustryInvoice',
|
||||
'<rsm:CrossIndustryDocument',
|
||||
'<ram:CrossIndustryDocument',
|
||||
'<ubl:Invoice',
|
||||
'<ubl:CreditNote',
|
||||
'<FatturaElettronica'
|
||||
];
|
||||
|
||||
/**
|
||||
* Extract XML from a PDF buffer by searching for XML patterns in the text
|
||||
* Uses a chunked approach to handle large files efficiently
|
||||
* @param pdfBuffer PDF buffer
|
||||
* @returns XML content or null if not found
|
||||
*/
|
||||
public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
|
||||
try {
|
||||
// Convert buffer to string and look for XML patterns
|
||||
// Increase the search range to handle larger PDFs
|
||||
const pdfString = Buffer.from(pdfBuffer).toString('utf8', 0, Math.min(pdfBuffer.length, 50000));
|
||||
|
||||
// Look for common XML patterns in the PDF
|
||||
const xmlPatterns = [
|
||||
/<\?xml[^>]*\?>/i,
|
||||
/<CrossIndustryInvoice[^>]*>/i,
|
||||
/<CrossIndustryDocument[^>]*>/i,
|
||||
/<Invoice[^>]*>/i,
|
||||
/<CreditNote[^>]*>/i,
|
||||
/<rsm:CrossIndustryInvoice[^>]*>/i,
|
||||
/<rsm:CrossIndustryDocument[^>]*>/i,
|
||||
/<ram:CrossIndustryDocument[^>]*>/i,
|
||||
/<ubl:Invoice[^>]*>/i,
|
||||
/<ubl:CreditNote[^>]*>/i
|
||||
];
|
||||
|
||||
for (const pattern of xmlPatterns) {
|
||||
const match = pdfString.match(pattern);
|
||||
if (match && match.index !== undefined) {
|
||||
console.log(`Found XML pattern in PDF: ${match[0]}`);
|
||||
|
||||
// Try to extract the XML content
|
||||
const xmlContent = this.extractXmlFromString(pdfString, match.index);
|
||||
if (xmlContent && this.isValidXml(xmlContent)) {
|
||||
console.log('Successfully extracted XML from PDF text');
|
||||
return xmlContent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.warn('No valid XML found in PDF text');
|
||||
return null;
|
||||
console.log('Attempting text-based XML extraction from PDF...');
|
||||
|
||||
// Convert Buffer to Uint8Array if needed
|
||||
const buffer = Buffer.isBuffer(pdfBuffer) ? new Uint8Array(pdfBuffer) : pdfBuffer;
|
||||
|
||||
// Try extracting XML using the chunked approach
|
||||
return this.extractXmlFromBufferChunked(buffer);
|
||||
} catch (error) {
|
||||
console.error('Error in text-based extraction:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract XML from buffer using a chunked approach
|
||||
* This helps avoid memory issues with large PDFs
|
||||
* @param buffer Buffer to search in
|
||||
* @returns XML content or null if not found
|
||||
*/
|
||||
private extractXmlFromBufferChunked(buffer: Uint8Array): string | null {
|
||||
// Process the PDF in chunks
|
||||
for (let chunkIndex = 0; chunkIndex < this.MAX_CHUNKS; chunkIndex++) {
|
||||
const startPos = chunkIndex * this.CHUNK_SIZE;
|
||||
if (startPos >= buffer.length) break;
|
||||
|
||||
const endPos = Math.min(startPos + this.CHUNK_SIZE, buffer.length);
|
||||
const chunk = buffer.slice(startPos, endPos);
|
||||
|
||||
// Try to extract XML from this chunk
|
||||
const chunkResult = this.processChunk(chunk, startPos);
|
||||
if (chunkResult) {
|
||||
return chunkResult;
|
||||
}
|
||||
}
|
||||
|
||||
console.warn('No valid XML found in any chunk of the PDF');
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single chunk of the PDF buffer
|
||||
* @param chunk Chunk buffer to process
|
||||
* @param chunkOffset Offset position of the chunk in the original buffer
|
||||
* @returns XML content or null if not found
|
||||
*/
|
||||
private processChunk(chunk: Uint8Array, chunkOffset: number): string | null {
|
||||
try {
|
||||
// First try UTF-8 encoding for this chunk
|
||||
const utf8String = this.decodeBufferToString(chunk, 'utf-8');
|
||||
let xmlContent = this.searchForXmlInString(utf8String);
|
||||
|
||||
if (xmlContent) {
|
||||
console.log(`Found XML content in chunk at offset ${chunkOffset} using UTF-8 encoding`);
|
||||
return xmlContent;
|
||||
}
|
||||
|
||||
// If UTF-8 fails, try Latin-1 (ISO-8859-1) which can handle binary better
|
||||
const latin1String = this.decodeBufferToString(chunk, 'latin1');
|
||||
xmlContent = this.searchForXmlInString(latin1String);
|
||||
|
||||
if (xmlContent) {
|
||||
console.log(`Found XML content in chunk at offset ${chunkOffset} using Latin-1 encoding`);
|
||||
return xmlContent;
|
||||
}
|
||||
|
||||
// No XML found in this chunk
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.warn(`Error processing chunk at offset ${chunkOffset}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Safely decode a buffer to string using the specified encoding
|
||||
* @param buffer Buffer to decode
|
||||
* @param encoding Encoding to use ('utf-8' or 'latin1')
|
||||
* @returns Decoded string
|
||||
*/
|
||||
private decodeBufferToString(buffer: Uint8Array, encoding: 'utf-8' | 'latin1'): string {
|
||||
try {
|
||||
if (encoding === 'utf-8') {
|
||||
return new TextDecoder('utf-8', { fatal: false }).decode(buffer);
|
||||
} else {
|
||||
// For Latin-1 we can use a direct mapping (bytes 0-255 map directly to code points 0-255)
|
||||
// This is more reliable for binary data than TextDecoder for legacy encodings
|
||||
return Array.from(buffer)
|
||||
.map(byte => String.fromCharCode(byte))
|
||||
.join('');
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn(`Error decoding buffer using ${encoding}:`, error);
|
||||
// Return empty string on error to allow processing to continue
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for XML patterns in a string
|
||||
* @param content String to search in
|
||||
* @returns XML content or null if not found
|
||||
*/
|
||||
private searchForXmlInString(content: string): string | null {
|
||||
if (!content) return null;
|
||||
|
||||
// Search for each XML pattern
|
||||
for (const pattern of this.XML_PATTERNS) {
|
||||
const patternIndex = content.indexOf(pattern);
|
||||
if (patternIndex !== -1) {
|
||||
console.log(`Found XML pattern "${pattern}" at position ${patternIndex}`);
|
||||
|
||||
// Try to extract the XML content starting from the pattern position
|
||||
const xmlContent = this.extractXmlFromString(content, patternIndex);
|
||||
|
||||
// Validate the extracted content
|
||||
if (xmlContent && this.isValidXml(xmlContent)) {
|
||||
console.log('Successfully extracted and validated XML from text');
|
||||
return xmlContent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
@ -1,8 +1,33 @@
|
||||
import { PDFDocument, AFRelationship } from '../../plugins.js';
|
||||
import type { IPdf } from '../../interfaces/common.js';
|
||||
|
||||
/**
|
||||
* Error types for PDF embedding operations
|
||||
*/
|
||||
export enum PDFEmbedError {
|
||||
LOAD_ERROR = 'PDF loading failed',
|
||||
EMBED_ERROR = 'XML embedding failed',
|
||||
SAVE_ERROR = 'PDF saving failed',
|
||||
INVALID_INPUT = 'Invalid input parameters'
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of a PDF embedding operation
|
||||
*/
|
||||
export interface PDFEmbedResult {
|
||||
success: boolean;
|
||||
data?: Uint8Array;
|
||||
pdf?: IPdf;
|
||||
error?: {
|
||||
type: PDFEmbedError;
|
||||
message: string;
|
||||
originalError?: Error;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Class for embedding XML into PDF files
|
||||
* Provides robust error handling and support for different PDF formats
|
||||
*/
|
||||
export class PDFEmbedder {
|
||||
/**
|
||||
@ -11,40 +36,92 @@ export class PDFEmbedder {
|
||||
* @param xmlContent XML content to embed
|
||||
* @param filename Filename for the embedded XML
|
||||
* @param description Description for the embedded XML
|
||||
* @returns Modified PDF buffer
|
||||
* @returns Result with either modified PDF buffer or error information
|
||||
*/
|
||||
public async embedXml(
|
||||
pdfBuffer: Uint8Array | Buffer,
|
||||
xmlContent: string,
|
||||
filename: string = 'invoice.xml',
|
||||
description: string = 'XML Invoice'
|
||||
): Promise<Uint8Array> {
|
||||
): Promise<PDFEmbedResult> {
|
||||
try {
|
||||
// Validate inputs
|
||||
if (!pdfBuffer || pdfBuffer.length === 0) {
|
||||
return this.createErrorResult(PDFEmbedError.INVALID_INPUT, 'PDF buffer is empty or undefined');
|
||||
}
|
||||
|
||||
if (!xmlContent) {
|
||||
return this.createErrorResult(PDFEmbedError.INVALID_INPUT, 'XML content is empty or undefined');
|
||||
}
|
||||
|
||||
// Ensure buffer is Uint8Array
|
||||
const pdfBufferArray = Buffer.isBuffer(pdfBuffer) ? new Uint8Array(pdfBuffer) : pdfBuffer;
|
||||
|
||||
// Load the PDF
|
||||
const pdfDoc = await PDFDocument.load(pdfBuffer);
|
||||
let pdfDoc: PDFDocument;
|
||||
try {
|
||||
pdfDoc = await PDFDocument.load(pdfBufferArray, {
|
||||
ignoreEncryption: true, // Try to load encrypted PDFs
|
||||
updateMetadata: false // Don't automatically update metadata
|
||||
});
|
||||
} catch (error) {
|
||||
return this.createErrorResult(
|
||||
PDFEmbedError.LOAD_ERROR,
|
||||
`Failed to load PDF: ${error instanceof Error ? error.message : String(error)}`,
|
||||
error instanceof Error ? error : undefined
|
||||
);
|
||||
}
|
||||
|
||||
// Normalize filename (lowercase with XML extension)
|
||||
filename = this.normalizeFilename(filename);
|
||||
|
||||
// Convert the XML string to a Uint8Array
|
||||
const xmlBuffer = new TextEncoder().encode(xmlContent);
|
||||
|
||||
// Make sure filename is lowercase (as required by documentation)
|
||||
filename = filename.toLowerCase();
|
||||
|
||||
// Use pdf-lib's .attach() to embed the XML
|
||||
pdfDoc.attach(xmlBuffer, filename, {
|
||||
mimeType: 'text/xml',
|
||||
description: description,
|
||||
creationDate: new Date(),
|
||||
modificationDate: new Date(),
|
||||
afRelationship: AFRelationship.Alternative,
|
||||
});
|
||||
try {
|
||||
// Use pdf-lib's .attach() to embed the XML
|
||||
pdfDoc.attach(xmlBuffer, filename, {
|
||||
mimeType: 'text/xml',
|
||||
description: description,
|
||||
creationDate: new Date(),
|
||||
modificationDate: new Date(),
|
||||
afRelationship: AFRelationship.Alternative,
|
||||
});
|
||||
} catch (error) {
|
||||
return this.createErrorResult(
|
||||
PDFEmbedError.EMBED_ERROR,
|
||||
`Failed to embed XML: ${error instanceof Error ? error.message : String(error)}`,
|
||||
error instanceof Error ? error : undefined
|
||||
);
|
||||
}
|
||||
|
||||
// Save the modified PDF
|
||||
const modifiedPdfBytes = await pdfDoc.save();
|
||||
let modifiedPdfBytes: Uint8Array;
|
||||
try {
|
||||
modifiedPdfBytes = await pdfDoc.save({
|
||||
addDefaultPage: false, // Don't add a page if the document is empty
|
||||
useObjectStreams: false, // Better compatibility with older PDF readers
|
||||
updateFieldAppearances: false // Don't update form fields
|
||||
});
|
||||
} catch (error) {
|
||||
return this.createErrorResult(
|
||||
PDFEmbedError.SAVE_ERROR,
|
||||
`Failed to save modified PDF: ${error instanceof Error ? error.message : String(error)}`,
|
||||
error instanceof Error ? error : undefined
|
||||
);
|
||||
}
|
||||
|
||||
return modifiedPdfBytes;
|
||||
return {
|
||||
success: true,
|
||||
data: modifiedPdfBytes
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Error embedding XML into PDF:', error);
|
||||
throw error;
|
||||
// Catch any uncaught errors
|
||||
return this.createErrorResult(
|
||||
PDFEmbedError.EMBED_ERROR,
|
||||
`Unexpected error during XML embedding: ${error instanceof Error ? error.message : String(error)}`,
|
||||
error instanceof Error ? error : undefined
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@ -56,7 +133,7 @@ export class PDFEmbedder {
|
||||
* @param description Description for the embedded XML
|
||||
* @param pdfName Name for the PDF
|
||||
* @param pdfId ID for the PDF
|
||||
* @returns IPdf object with embedded XML
|
||||
* @returns Result with either IPdf object or error information
|
||||
*/
|
||||
public async createPdfWithXml(
|
||||
pdfBuffer: Uint8Array | Buffer,
|
||||
@ -65,16 +142,101 @@ export class PDFEmbedder {
|
||||
description: string = 'XML Invoice',
|
||||
pdfName: string = 'invoice.pdf',
|
||||
pdfId: string = `invoice-${Date.now()}`
|
||||
): Promise<IPdf> {
|
||||
const modifiedPdfBytes = await this.embedXml(pdfBuffer, xmlContent, filename, description);
|
||||
): Promise<PDFEmbedResult> {
|
||||
// Embed XML into PDF
|
||||
const embedResult = await this.embedXml(pdfBuffer, xmlContent, filename, description);
|
||||
|
||||
// If embedding failed, return the error
|
||||
if (!embedResult.success || !embedResult.data) {
|
||||
return embedResult;
|
||||
}
|
||||
|
||||
return {
|
||||
// Create IPdf object
|
||||
const pdfObject: IPdf = {
|
||||
name: pdfName,
|
||||
id: pdfId,
|
||||
metadata: {
|
||||
textExtraction: ''
|
||||
textExtraction: '',
|
||||
format: this.detectPdfFormat(xmlContent),
|
||||
embeddedXml: {
|
||||
filename: filename,
|
||||
description: description
|
||||
}
|
||||
},
|
||||
buffer: modifiedPdfBytes
|
||||
buffer: embedResult.data
|
||||
};
|
||||
|
||||
return {
|
||||
success: true,
|
||||
pdf: pdfObject
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensures the filename is normalized according to PDF/A requirements
|
||||
* @param filename Filename to normalize
|
||||
* @returns Normalized filename
|
||||
*/
|
||||
private normalizeFilename(filename: string): string {
|
||||
// Convert to lowercase
|
||||
let normalized = filename.toLowerCase();
|
||||
|
||||
// Ensure it has .xml extension
|
||||
if (!normalized.endsWith('.xml')) {
|
||||
normalized = normalized.replace(/\.[^/.]+$/, '') + '.xml';
|
||||
}
|
||||
|
||||
// Replace invalid characters
|
||||
normalized = normalized.replace(/[^a-z0-9_.-]/g, '_');
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to detect the format of the XML content
|
||||
* @param xmlContent XML content
|
||||
* @returns Format string or undefined
|
||||
*/
|
||||
private detectPdfFormat(xmlContent: string): string | undefined {
|
||||
if (xmlContent.includes('factur-x.eu') || xmlContent.includes('factur-x.xml')) {
|
||||
return 'factur-x';
|
||||
} else if (xmlContent.includes('zugferd') || xmlContent.includes('ZUGFeRD')) {
|
||||
return 'zugferd';
|
||||
} else if (xmlContent.includes('xrechnung')) {
|
||||
return 'xrechnung';
|
||||
} else if (xmlContent.includes('<Invoice') || xmlContent.includes('<CreditNote')) {
|
||||
return 'ubl';
|
||||
} else if (xmlContent.includes('FatturaElettronica')) {
|
||||
return 'fatturapa';
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an error result object
|
||||
* @param type Error type
|
||||
* @param message Error message
|
||||
* @param originalError Original error object
|
||||
* @returns Error result
|
||||
*/
|
||||
private createErrorResult(
|
||||
type: PDFEmbedError,
|
||||
message: string,
|
||||
originalError?: Error
|
||||
): PDFEmbedResult {
|
||||
console.error(`PDF Embedder Error (${type}): ${message}`);
|
||||
if (originalError) {
|
||||
console.error(originalError);
|
||||
}
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: {
|
||||
type,
|
||||
message,
|
||||
originalError
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
@ -4,6 +4,32 @@ import {
|
||||
AssociatedFilesExtractor,
|
||||
TextXMLExtractor
|
||||
} from './extractors/index.js';
|
||||
import { FormatDetector } from '../utils/format.detector.js';
|
||||
import { InvoiceFormat } from '../../interfaces/common.js';
|
||||
|
||||
/**
|
||||
* Error types for PDF extraction operations
|
||||
*/
|
||||
export enum PDFExtractError {
|
||||
EXTRACT_ERROR = 'XML extraction failed',
|
||||
INVALID_INPUT = 'Invalid input parameters',
|
||||
NO_XML_FOUND = 'No XML found in PDF'
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of a PDF extraction operation
|
||||
*/
|
||||
export interface PDFExtractResult {
|
||||
success: boolean;
|
||||
xml?: string;
|
||||
format?: InvoiceFormat;
|
||||
extractorUsed?: string;
|
||||
error?: {
|
||||
type: PDFExtractError;
|
||||
message: string;
|
||||
originalError?: Error;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Main PDF extractor class that orchestrates the extraction process
|
||||
@ -18,9 +44,9 @@ export class PDFExtractor {
|
||||
constructor() {
|
||||
// Add extractors in order of preference/likelihood of success
|
||||
this.extractors.push(
|
||||
new StandardXMLExtractor(), // Standard PDF/A-3 embedded files
|
||||
new AssociatedFilesExtractor(), // Associated files (ZUGFeRD v1, some Factur-X)
|
||||
new TextXMLExtractor() // Text-based extraction (fallback)
|
||||
new StandardXMLExtractor(), // Standard PDF/A-3 embedded files
|
||||
new AssociatedFilesExtractor(), // Associated files (ZUGFeRD v1, some Factur-X)
|
||||
new TextXMLExtractor() // Text-based extraction (fallback)
|
||||
);
|
||||
}
|
||||
|
||||
@ -28,36 +54,88 @@ export class PDFExtractor {
|
||||
* Extract XML from a PDF buffer
|
||||
* Tries multiple extraction methods in sequence
|
||||
* @param pdfBuffer PDF buffer
|
||||
* @returns XML content or null if not found
|
||||
* @returns Result with either the extracted XML or error information
|
||||
*/
|
||||
public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
|
||||
public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<PDFExtractResult> {
|
||||
try {
|
||||
console.log('Starting XML extraction from PDF...');
|
||||
|
||||
// Validate input
|
||||
if (!pdfBuffer || pdfBuffer.length === 0) {
|
||||
return this.createErrorResult(PDFExtractError.INVALID_INPUT, 'PDF buffer is empty or undefined');
|
||||
}
|
||||
|
||||
// Ensure buffer is Uint8Array
|
||||
const pdfBufferArray = Buffer.isBuffer(pdfBuffer) ? new Uint8Array(pdfBuffer) : pdfBuffer;
|
||||
|
||||
// Try each extractor in sequence
|
||||
for (const extractor of this.extractors) {
|
||||
const extractorName = extractor.constructor.name;
|
||||
console.log(`Trying extraction with ${extractorName}...`);
|
||||
|
||||
const xml = await extractor.extractXml(pdfBuffer);
|
||||
if (xml) {
|
||||
console.log(`Successfully extracted XML using ${extractorName}`);
|
||||
return xml;
|
||||
try {
|
||||
const xml = await extractor.extractXml(pdfBufferArray);
|
||||
|
||||
if (xml) {
|
||||
console.log(`Successfully extracted XML using ${extractorName}`);
|
||||
|
||||
// Detect format of the extracted XML
|
||||
const format = FormatDetector.detectFormat(xml);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
xml,
|
||||
format,
|
||||
extractorUsed: extractorName
|
||||
};
|
||||
}
|
||||
|
||||
console.log(`Extraction with ${extractorName} failed, trying next method...`);
|
||||
} catch (error) {
|
||||
// Log error but continue with next extractor
|
||||
console.warn(`Error using ${extractorName}: ${error instanceof Error ? error.message : String(error)}`);
|
||||
}
|
||||
|
||||
console.log(`Extraction with ${extractorName} failed, trying next method...`);
|
||||
}
|
||||
|
||||
// If all extractors fail, return null
|
||||
console.warn('All extraction methods failed, no valid XML found in PDF');
|
||||
return null;
|
||||
// If all extractors fail, return a no XML found error
|
||||
return this.createErrorResult(
|
||||
PDFExtractError.NO_XML_FOUND,
|
||||
'All extraction methods failed, no valid XML found in PDF'
|
||||
);
|
||||
} catch (error) {
|
||||
console.error('Error extracting XML from PDF:', error);
|
||||
return null;
|
||||
// Handle any unexpected errors
|
||||
return this.createErrorResult(
|
||||
PDFExtractError.EXTRACT_ERROR,
|
||||
`Unexpected error during XML extraction: ${error instanceof Error ? error.message : String(error)}`,
|
||||
error instanceof Error ? error : undefined
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
/**
|
||||
* Create a PDF extract result with error information
|
||||
* @param type Error type
|
||||
* @param message Error message
|
||||
* @param originalError Original error object
|
||||
* @returns Error result
|
||||
*/
|
||||
private createErrorResult(
|
||||
type: PDFExtractError,
|
||||
message: string,
|
||||
originalError?: Error
|
||||
): PDFExtractResult {
|
||||
console.error(`PDF Extractor Error (${type}): ${message}`);
|
||||
if (originalError) {
|
||||
console.error(originalError);
|
||||
}
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: {
|
||||
type,
|
||||
message,
|
||||
originalError
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
@ -13,6 +13,18 @@ export class FormatDetector {
|
||||
*/
|
||||
public static detectFormat(xml: string): InvoiceFormat {
|
||||
try {
|
||||
// Quick check for empty or invalid XML
|
||||
if (!xml || typeof xml !== 'string' || xml.trim().length === 0) {
|
||||
return InvoiceFormat.UNKNOWN;
|
||||
}
|
||||
|
||||
// Quick string-based pre-checks for performance
|
||||
const quickCheck = FormatDetector.quickFormatCheck(xml);
|
||||
if (quickCheck !== InvoiceFormat.UNKNOWN) {
|
||||
return quickCheck;
|
||||
}
|
||||
|
||||
// More thorough parsing-based checks
|
||||
const doc = new DOMParser().parseFromString(xml, 'application/xml');
|
||||
const root = doc.documentElement;
|
||||
|
||||
@ -21,106 +33,26 @@ export class FormatDetector {
|
||||
}
|
||||
|
||||
// UBL detection (Invoice or CreditNote root element)
|
||||
if (root.nodeName === 'Invoice' || root.nodeName === 'CreditNote') {
|
||||
// For simplicity, we'll treat all UBL documents as XRechnung for now
|
||||
// In a real implementation, we would check for specific customization IDs
|
||||
return InvoiceFormat.XRECHNUNG;
|
||||
if (FormatDetector.isUBLFormat(root)) {
|
||||
// Check for XRechnung customization
|
||||
if (FormatDetector.isXRechnungFormat(doc)) {
|
||||
return InvoiceFormat.XRECHNUNG;
|
||||
}
|
||||
return InvoiceFormat.UBL;
|
||||
}
|
||||
|
||||
// Factur-X/ZUGFeRD detection (CrossIndustryInvoice or CrossIndustryDocument root element)
|
||||
if (root.nodeName === 'rsm:CrossIndustryInvoice' || root.nodeName === 'CrossIndustryInvoice' ||
|
||||
root.nodeName.endsWith(':CrossIndustryInvoice')) {
|
||||
// Set up namespaces for XPath queries (ZUGFeRD v2/Factur-X)
|
||||
const namespaces = {
|
||||
rsm: 'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100',
|
||||
ram: 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100'
|
||||
};
|
||||
|
||||
// Create XPath selector with namespaces
|
||||
const select = xpath.useNamespaces(namespaces);
|
||||
|
||||
// Look for profile identifier
|
||||
const profileNode = select(
|
||||
'string(//rsm:ExchangedDocumentContext/ram:GuidelineSpecifiedDocumentContextParameter/ram:ID)',
|
||||
doc
|
||||
);
|
||||
|
||||
if (profileNode) {
|
||||
const profileText = profileNode.toString();
|
||||
|
||||
// Check for ZUGFeRD profiles
|
||||
if (profileText.includes('zugferd') ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_BASIC ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_COMFORT ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_EXTENDED) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
|
||||
// Check for Factur-X profiles
|
||||
if (profileText.includes('factur-x') ||
|
||||
profileText === CII_PROFILE_IDS.FACTURX_MINIMUM ||
|
||||
profileText === CII_PROFILE_IDS.FACTURX_BASIC ||
|
||||
profileText === CII_PROFILE_IDS.FACTURX_EN16931) {
|
||||
return InvoiceFormat.FACTURX;
|
||||
}
|
||||
}
|
||||
|
||||
// If we can't determine the specific CII format, default to generic CII
|
||||
return InvoiceFormat.CII;
|
||||
// Factur-X/ZUGFeRD detection (CrossIndustryInvoice root element)
|
||||
if (FormatDetector.isCIIFormat(root)) {
|
||||
return FormatDetector.detectCIIFormat(doc, xml);
|
||||
}
|
||||
|
||||
// ZUGFeRD v1 detection (CrossIndustryDocument root element)
|
||||
if (root.nodeName === 'rsm:CrossIndustryDocument' || root.nodeName === 'CrossIndustryDocument' ||
|
||||
root.nodeName === 'ram:CrossIndustryDocument' || root.nodeName.endsWith(':CrossIndustryDocument')) {
|
||||
|
||||
// Check for ZUGFeRD v1 namespace in the document
|
||||
const xmlString = xml.toString();
|
||||
if (xmlString.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') ||
|
||||
xmlString.includes('urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12') ||
|
||||
xmlString.includes('urn:ferd:CrossIndustryDocument') ||
|
||||
xmlString.includes('zugferd') ||
|
||||
xmlString.includes('ZUGFeRD')) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
|
||||
// Set up namespaces for XPath queries (ZUGFeRD v1)
|
||||
try {
|
||||
const namespaces = {
|
||||
rsm: ZUGFERD_V1_NAMESPACES.RSM,
|
||||
ram: ZUGFERD_V1_NAMESPACES.RAM
|
||||
};
|
||||
|
||||
// Create XPath selector with namespaces
|
||||
const select = xpath.useNamespaces(namespaces);
|
||||
|
||||
// Look for profile identifier
|
||||
const profileNode = select(
|
||||
'string(//rsm:SpecifiedExchangedDocumentContext/ram:GuidelineSpecifiedDocumentContextParameter/ram:ID)',
|
||||
doc
|
||||
);
|
||||
|
||||
if (profileNode) {
|
||||
const profileText = profileNode.toString();
|
||||
|
||||
// Check for ZUGFeRD v1 profiles
|
||||
if (profileText.includes('ferd:CrossIndustryDocument:invoice:1p0') ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_V1_BASIC ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_V1_COMFORT ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_V1_EXTENDED) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('Error in ZUGFeRD v1 XPath detection:', error);
|
||||
}
|
||||
|
||||
// If we can't determine the specific profile but it's a CrossIndustryDocument, it's likely ZUGFeRD v1
|
||||
if (FormatDetector.isZUGFeRDV1Format(root)) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
|
||||
// FatturaPA detection would be implemented here
|
||||
if (root.nodeName === 'FatturaElettronica' ||
|
||||
(root.getAttribute('xmlns') && root.getAttribute('xmlns')!.includes('fatturapa.gov.it'))) {
|
||||
// FatturaPA detection
|
||||
if (FormatDetector.isFatturaPAFormat(root)) {
|
||||
return InvoiceFormat.FATTURAPA;
|
||||
}
|
||||
|
||||
@ -130,4 +62,241 @@ export class FormatDetector {
|
||||
return InvoiceFormat.UNKNOWN;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a quick format check based on string content
|
||||
* This is faster than full XML parsing for obvious cases
|
||||
* @param xml XML string
|
||||
* @returns Detected format or UNKNOWN if more analysis is needed
|
||||
*/
|
||||
private static quickFormatCheck(xml: string): InvoiceFormat {
|
||||
const lowerXml = xml.toLowerCase();
|
||||
|
||||
// Check for obvious Factur-X indicators
|
||||
if (
|
||||
lowerXml.includes('factur-x.eu') ||
|
||||
lowerXml.includes('factur-x.xml') ||
|
||||
lowerXml.includes('factur-x:') ||
|
||||
lowerXml.includes('urn:cen.eu:en16931:2017') && lowerXml.includes('factur-x')
|
||||
) {
|
||||
return InvoiceFormat.FACTURX;
|
||||
}
|
||||
|
||||
// Check for obvious ZUGFeRD indicators
|
||||
if (
|
||||
lowerXml.includes('zugferd:') ||
|
||||
lowerXml.includes('zugferd-invoice.xml') ||
|
||||
lowerXml.includes('urn:ferd:') ||
|
||||
lowerXml.includes('urn:zugferd')
|
||||
) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
|
||||
// Check for obvious XRechnung indicators
|
||||
if (
|
||||
lowerXml.includes('xrechnung') ||
|
||||
lowerXml.includes('urn:xoev-de:kosit:standard:xrechnung')
|
||||
) {
|
||||
return InvoiceFormat.XRECHNUNG;
|
||||
}
|
||||
|
||||
// Check for obvious FatturaPA indicators
|
||||
if (
|
||||
lowerXml.includes('fatturapa') ||
|
||||
lowerXml.includes('fattura elettronica') ||
|
||||
lowerXml.includes('fatturaelettronica')
|
||||
) {
|
||||
return InvoiceFormat.FATTURAPA;
|
||||
}
|
||||
|
||||
// Need more analysis
|
||||
return InvoiceFormat.UNKNOWN;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the document is a UBL format
|
||||
* @param root Root element
|
||||
* @returns True if it's a UBL format
|
||||
*/
|
||||
private static isUBLFormat(root: Element): boolean {
|
||||
return (
|
||||
root.nodeName === 'Invoice' ||
|
||||
root.nodeName === 'CreditNote' ||
|
||||
root.nodeName === 'ubl:Invoice' ||
|
||||
root.nodeName === 'ubl:CreditNote' ||
|
||||
root.nodeName.endsWith(':Invoice') ||
|
||||
root.nodeName.endsWith(':CreditNote')
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the document is an XRechnung format
|
||||
* @param doc XML document
|
||||
* @returns True if it's an XRechnung format
|
||||
*/
|
||||
private static isXRechnungFormat(doc: Document): boolean {
|
||||
try {
|
||||
// Set up namespaces for XPath queries
|
||||
const namespaces = {
|
||||
'cbc': 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2',
|
||||
'ubl': 'urn:oasis:names:specification:ubl:schema:xsd:Invoice-2'
|
||||
};
|
||||
|
||||
// Create XPath selector with namespaces
|
||||
const select = xpath.useNamespaces(namespaces);
|
||||
|
||||
// Use getElementsByTagName directly for more reliable results
|
||||
const customizationNodes = doc.getElementsByTagName('cbc:CustomizationID');
|
||||
|
||||
// Check if any CustomizationID node contains "xrechnung"
|
||||
for (let i = 0; i < customizationNodes.length; i++) {
|
||||
const node = customizationNodes[i];
|
||||
if (node.textContent && node.textContent.includes('xrechnung')) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (error) {
|
||||
console.warn('Error checking for XRechnung format:', error);
|
||||
// If direct DOM access fails, try a string-based approach
|
||||
const xmlStr = new XMLSerializer().serializeToString(doc);
|
||||
return xmlStr.includes('xrechnung') || xmlStr.includes('XRechnung');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the document is a CII format (Factur-X/ZUGFeRD v2+)
|
||||
* @param root Root element
|
||||
* @returns True if it's a CII format
|
||||
*/
|
||||
private static isCIIFormat(root: Element): boolean {
|
||||
return (
|
||||
root.nodeName === 'rsm:CrossIndustryInvoice' ||
|
||||
root.nodeName === 'CrossIndustryInvoice' ||
|
||||
root.nodeName.endsWith(':CrossIndustryInvoice')
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the document is a ZUGFeRD v1 format
|
||||
* @param root Root element
|
||||
* @returns True if it's a ZUGFeRD v1 format
|
||||
*/
|
||||
private static isZUGFeRDV1Format(root: Element): boolean {
|
||||
return (
|
||||
root.nodeName === 'rsm:CrossIndustryDocument' ||
|
||||
root.nodeName === 'CrossIndustryDocument' ||
|
||||
root.nodeName === 'ram:CrossIndustryDocument' ||
|
||||
root.nodeName.endsWith(':CrossIndustryDocument')
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the document is a FatturaPA format
|
||||
* @param root Root element
|
||||
* @returns True if it's a FatturaPA format
|
||||
*/
|
||||
private static isFatturaPAFormat(root: Element): boolean {
|
||||
return (
|
||||
root.nodeName === 'FatturaElettronica' ||
|
||||
(root.getAttribute('xmlns') && root.getAttribute('xmlns')!.includes('fatturapa.gov.it'))
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects the specific CII format (Factur-X vs ZUGFeRD)
|
||||
* @param doc XML document
|
||||
* @param xml Original XML string for fallback checks
|
||||
* @returns Detected format
|
||||
*/
|
||||
private static detectCIIFormat(doc: Document, xml: string): InvoiceFormat {
|
||||
try {
|
||||
// Use direct DOM traversal instead of XPath for more reliable behavior
|
||||
const contextNodes = doc.getElementsByTagNameNS(
|
||||
'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100',
|
||||
'ExchangedDocumentContext'
|
||||
);
|
||||
|
||||
if (contextNodes.length === 0) {
|
||||
// Try without namespace
|
||||
const noNsContextNodes = doc.getElementsByTagName('ExchangedDocumentContext');
|
||||
if (noNsContextNodes.length === 0) {
|
||||
// Fallback to string-based detection
|
||||
return FormatDetector.detectCIIFormatFromString(xml);
|
||||
}
|
||||
}
|
||||
|
||||
// Loop through all potential context nodes
|
||||
const allContextNodes = [...Array.from(contextNodes), ...Array.from(doc.getElementsByTagName('ExchangedDocumentContext'))];
|
||||
|
||||
for (const contextNode of allContextNodes) {
|
||||
// Find guideline parameter
|
||||
const guidelineNodes = contextNode.getElementsByTagName('ram:GuidelineSpecifiedDocumentContextParameter');
|
||||
|
||||
if (guidelineNodes.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const guidelineNode of Array.from(guidelineNodes)) {
|
||||
// Find ID element
|
||||
const idNodes = guidelineNode.getElementsByTagName('ram:ID');
|
||||
|
||||
if (idNodes.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const idNode of Array.from(idNodes)) {
|
||||
const profileText = idNode.textContent || '';
|
||||
|
||||
// Check for ZUGFeRD profiles
|
||||
if (
|
||||
profileText.includes('zugferd') ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_BASIC ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_COMFORT ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_EXTENDED
|
||||
) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
|
||||
// Check for Factur-X profiles
|
||||
if (
|
||||
profileText.includes('factur-x') ||
|
||||
profileText === CII_PROFILE_IDS.FACTURX_MINIMUM ||
|
||||
profileText === CII_PROFILE_IDS.FACTURX_BASIC ||
|
||||
profileText === CII_PROFILE_IDS.FACTURX_EN16931
|
||||
) {
|
||||
return InvoiceFormat.FACTURX;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we reach here, fall back to string checking
|
||||
return FormatDetector.detectCIIFormatFromString(xml);
|
||||
} catch (error) {
|
||||
console.warn('Error detecting CII format, falling back to generic CII:', error);
|
||||
return FormatDetector.detectCIIFormatFromString(xml);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback method to detect CII format from string content
|
||||
* @param xml XML string
|
||||
* @returns Detected format
|
||||
*/
|
||||
private static detectCIIFormatFromString(xml: string): InvoiceFormat {
|
||||
// Check for Factur-X indicators
|
||||
if (xml.includes('factur-x') || xml.includes('Factur-X')) {
|
||||
return InvoiceFormat.FACTURX;
|
||||
}
|
||||
|
||||
// Check for ZUGFeRD indicators
|
||||
if (xml.includes('zugferd') || xml.includes('ZUGFeRD')) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
|
||||
// Generic CII if we can't determine more specifically
|
||||
return InvoiceFormat.CII;
|
||||
}
|
||||
}
|
@ -72,14 +72,19 @@ export interface IPdf {
|
||||
id: string;
|
||||
metadata: {
|
||||
textExtraction: string;
|
||||
format?: string;
|
||||
embeddedXml?: {
|
||||
filename: string;
|
||||
description: string;
|
||||
};
|
||||
};
|
||||
buffer: Uint8Array;
|
||||
}
|
||||
|
||||
// Re-export types from tsclass for convenience
|
||||
export type { TInvoice } from '@tsclass/tsclass/dist_ts/finance';
|
||||
export type { TCreditNote } from '@tsclass/tsclass/dist_ts/finance';
|
||||
export type { TDebitNote } from '@tsclass/tsclass/dist_ts/finance';
|
||||
export type { TContact } from '@tsclass/tsclass/dist_ts/business';
|
||||
export type { TLetterEnvelope } from '@tsclass/tsclass/dist_ts/business';
|
||||
export type { TDocumentEnvelope } from '@tsclass/tsclass/dist_ts/business';
|
||||
export type { TInvoice } from '@tsclass/tsclass/dist_ts/finance/index.js';
|
||||
export type { TCreditNote } from '@tsclass/tsclass/dist_ts/finance/index.js';
|
||||
export type { TDebitNote } from '@tsclass/tsclass/dist_ts/finance/index.js';
|
||||
export type { TContact } from '@tsclass/tsclass/dist_ts/business/index.js';
|
||||
export type { TLetterEnvelope } from '@tsclass/tsclass/dist_ts/business/index.js';
|
||||
export type { TDocumentEnvelope } from '@tsclass/tsclass/dist_ts/business/index.js';
|
Loading…
x
Reference in New Issue
Block a user