feat(ZUGFERD): Add dedicated ZUGFERD v1/v2 support and refine invoice format detection logic
This commit is contained in:
		| @@ -1,5 +1,14 @@ | |||||||
| # Changelog | # Changelog | ||||||
|  |  | ||||||
|  | ## 2025-04-03 - 4.1.0 - feat(ZUGFERD) | ||||||
|  | Add dedicated ZUGFERD v1/v2 support and refine invoice format detection logic | ||||||
|  |  | ||||||
|  | - Improve FormatDetector to differentiate between Factur-X, ZUGFERD v1, and ZUGFERD v2 formats | ||||||
|  | - Introduce dedicated ZUGFERD decoder, encoder, and validator implementations | ||||||
|  | - Update factories to use ZUGFERD-specific classes rather than reusing FacturX implementations | ||||||
|  | - Enhance PDF XML extraction by consolidating multiple extractor strategies | ||||||
|  | - Update module exports and documentation hints for improved testing and integration | ||||||
|  |  | ||||||
| ## 2025-03-20 - 3.0.1 - fix(test/pdf-export) | ## 2025-03-20 - 3.0.1 - fix(test/pdf-export) | ||||||
| Improve PDF export tests with detailed logging and enhanced embedded file structure verification. | Improve PDF export tests with detailed logging and enhanced embedded file structure verification. | ||||||
|  |  | ||||||
|   | |||||||
| @@ -0,0 +1,12 @@ | |||||||
|  | For testing use | ||||||
|  |  | ||||||
|  | ```typescript | ||||||
|  | import {tap, expect} @push.rocks/tapbundle | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | tapbundle exports expect from @push.rocks/smartexpect | ||||||
|  | You can find the readme here: https://code.foss.global/push.rocks/smartexpect/src/branch/master/readme.md | ||||||
|  |  | ||||||
|  | Don't use shortcuts when doing things, e.g. creating sample data in order to not implement something correctly, or skipping tests, and calling it a day. | ||||||
|  |  | ||||||
|  | It is ok to ask questions, if you are unsure about something. | ||||||
|   | |||||||
							
								
								
									
										17
									
								
								test/output/corpus-master-results.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								test/output/corpus-master-results.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,17 @@ | |||||||
|  | { | ||||||
|  |   "test.zugferd-corpus.ts": { | ||||||
|  |     "error": "No results file found" | ||||||
|  |   }, | ||||||
|  |   "test.xml-rechnung-corpus.ts": { | ||||||
|  |     "error": "No results file found" | ||||||
|  |   }, | ||||||
|  |   "test.other-formats-corpus.ts": { | ||||||
|  |     "error": "No results file found" | ||||||
|  |   }, | ||||||
|  |   "test.validation-corpus.ts": { | ||||||
|  |     "error": "No results file found" | ||||||
|  |   }, | ||||||
|  |   "test.circular-corpus.ts": { | ||||||
|  |     "error": "No results file found" | ||||||
|  |   } | ||||||
|  | } | ||||||
							
								
								
									
										13
									
								
								test/output/corpus-summary.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								test/output/corpus-summary.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,13 @@ | |||||||
|  | # XInvoice Corpus Testing Summary | ||||||
|  |  | ||||||
|  | Generated on: 2025-04-03T19:22:13.546Z | ||||||
|  |  | ||||||
|  | ## Overall Summary | ||||||
|  |  | ||||||
|  | | Test | Success Rate | Files Tested | | ||||||
|  | |------|--------------|-------------| | ||||||
|  | | test.zugferd-corpus.ts | Error: No results file found | N/A | | ||||||
|  | | test.xml-rechnung-corpus.ts | Error: No results file found | N/A | | ||||||
|  | | test.other-formats-corpus.ts | Error: No results file found | N/A | | ||||||
|  | | test.validation-corpus.ts | Error: No results file found | N/A | | ||||||
|  | | test.circular-corpus.ts | Error: No results file found | N/A | | ||||||
							
								
								
									
										26
									
								
								test/output/other-formats-corpus-results.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								test/output/other-formats-corpus-results.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,26 @@ | |||||||
|  | { | ||||||
|  |   "peppol": { | ||||||
|  |     "success": 2, | ||||||
|  |     "fail": 0, | ||||||
|  |     "details": [ | ||||||
|  |       { | ||||||
|  |         "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/PEPPOL/Valid/Qvalia/Large_Invoice_sample1.xml", | ||||||
|  |         "success": true, | ||||||
|  |         "format": "xrechnung", | ||||||
|  |         "error": null | ||||||
|  |       }, | ||||||
|  |       { | ||||||
|  |         "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/PEPPOL/Valid/Qvalia/Large_Invoice_sample2.xml", | ||||||
|  |         "success": true, | ||||||
|  |         "format": "xrechnung", | ||||||
|  |         "error": null | ||||||
|  |       } | ||||||
|  |     ] | ||||||
|  |   }, | ||||||
|  |   "fatturapa": { | ||||||
|  |     "success": 0, | ||||||
|  |     "fail": 0, | ||||||
|  |     "details": [] | ||||||
|  |   }, | ||||||
|  |   "totalSuccessRate": 1 | ||||||
|  | } | ||||||
| @@ -1,3 +1,3 @@ | |||||||
| <?xml version="1.0" encoding="UTF-8"?> | <?xml version="1.0" encoding="UTF-8"?> | ||||||
| <rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100" xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100" xmlns:udt="urn:un:unece:uncefact:data:standard:UnqualifiedDataType:100"> | <rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100" xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100" xmlns:udt="urn:un:unece:uncefact:data:standard:UnqualifiedDataType:100"> | ||||||
| <rsm:ExchangedDocumentContext><ram:GuidelineSpecifiedDocumentContextParameter><ram:ID>urn:cen.eu:en16931:2017</ram:ID></ram:GuidelineSpecifiedDocumentContextParameter></rsm:ExchangedDocumentContext><rsm:ExchangedDocument><ram:TypeCode>380</ram:TypeCode><ram:ID>PDF-1743698313420</ram:ID><ram:IssueDateTime><udt:DateTimeString format="102">20250403</udt:DateTimeString></ram:IssueDateTime></rsm:ExchangedDocument><rsm:SupplyChainTradeTransaction><ram:ApplicableHeaderTradeAgreement><ram:SellerTradeParty><ram:Name>PDF Seller</ram:Name><ram:PostalTradeAddress><ram:LineOne/><ram:LineTwo>0</ram:LineTwo><ram:PostcodeCode/><ram:CityName/><ram:CountryID/></ram:PostalTradeAddress></ram:SellerTradeParty><ram:BuyerTradeParty><ram:Name>PDF Buyer</ram:Name><ram:PostalTradeAddress><ram:LineOne/><ram:LineTwo>0</ram:LineTwo><ram:PostcodeCode/><ram:CityName/><ram:CountryID/></ram:PostalTradeAddress></ram:BuyerTradeParty></ram:ApplicableHeaderTradeAgreement><ram:ApplicableHeaderTradeDelivery/><ram:ApplicableHeaderTradeSettlement><ram:InvoiceCurrencyCode>EUR</ram:InvoiceCurrencyCode><ram:SpecifiedTradePaymentTerms><ram:DueDateDateTime><udt:DateTimeString format="102">20250503</udt:DateTimeString></ram:DueDateDateTime></ram:SpecifiedTradePaymentTerms><ram:SpecifiedTradeSettlementHeaderMonetarySummation><ram:LineTotalAmount>0.00</ram:LineTotalAmount><ram:TaxTotalAmount currencyID="EUR">0.00</ram:TaxTotalAmount><ram:GrandTotalAmount>0.00</ram:GrandTotalAmount><ram:DuePayableAmount>0.00</ram:DuePayableAmount></ram:SpecifiedTradeSettlementHeaderMonetarySummation></ram:ApplicableHeaderTradeSettlement></rsm:SupplyChainTradeTransaction></rsm:CrossIndustryInvoice> | <rsm:ExchangedDocumentContext><ram:GuidelineSpecifiedDocumentContextParameter><ram:ID>urn:cen.eu:en16931:2017</ram:ID></ram:GuidelineSpecifiedDocumentContextParameter></rsm:ExchangedDocumentContext><rsm:ExchangedDocument><ram:TypeCode>380</ram:TypeCode><ram:ID>471102</ram:ID><ram:IssueDateTime><udt:DateTimeString format="102">NaNNaNNaN</udt:DateTimeString></ram:IssueDateTime></rsm:ExchangedDocument><rsm:SupplyChainTradeTransaction><ram:ApplicableHeaderTradeAgreement><ram:SellerTradeParty><ram:Name>Lieferant GmbH</ram:Name><ram:PostalTradeAddress><ram:LineOne>Lieferantenstraße 20</ram:LineOne><ram:LineTwo>0</ram:LineTwo><ram:PostcodeCode>80333</ram:PostcodeCode><ram:CityName>München</ram:CityName><ram:CountryID>DE</ram:CountryID></ram:PostalTradeAddress><ram:SpecifiedTaxRegistration><ram:ID schemeID="VA">DE123456789</ram:ID></ram:SpecifiedTaxRegistration><ram:SpecifiedTaxRegistration><ram:ID schemeID="FC">201/113/40209</ram:ID></ram:SpecifiedTaxRegistration></ram:SellerTradeParty><ram:BuyerTradeParty><ram:Name>Kunden AG Mitte</ram:Name><ram:PostalTradeAddress><ram:LineOne>Kundenstraße 15</ram:LineOne><ram:LineTwo>0</ram:LineTwo><ram:PostcodeCode>69876</ram:PostcodeCode><ram:CityName>Frankfurt</ram:CityName><ram:CountryID>DE</ram:CountryID></ram:PostalTradeAddress></ram:BuyerTradeParty></ram:ApplicableHeaderTradeAgreement><ram:ApplicableHeaderTradeDelivery/><ram:ApplicableHeaderTradeSettlement><ram:InvoiceCurrencyCode>EUR</ram:InvoiceCurrencyCode><ram:SpecifiedTradePaymentTerms><ram:DueDateDateTime><udt:DateTimeString format="102">NaNNaNNaN</udt:DateTimeString></ram:DueDateDateTime></ram:SpecifiedTradePaymentTerms><ram:SpecifiedTradeSettlementHeaderMonetarySummation><ram:LineTotalAmount>473.00</ram:LineTotalAmount><ram:TaxTotalAmount currencyID="EUR">56.87</ram:TaxTotalAmount><ram:GrandTotalAmount>529.87</ram:GrandTotalAmount><ram:DuePayableAmount>529.87</ram:DuePayableAmount></ram:SpecifiedTradeSettlementHeaderMonetarySummation></ram:ApplicableHeaderTradeSettlement><ram:IncludedSupplyChainTradeLineItem><ram:AssociatedDocumentLineDocument><ram:LineID>1</ram:LineID></ram:AssociatedDocumentLineDocument><ram:SpecifiedTradeProduct><ram:Name>Trennblätter A4</ram:Name><ram:SellerAssignedID>TB100A4</ram:SellerAssignedID></ram:SpecifiedTradeProduct><ram:SpecifiedLineTradeAgreement><ram:NetPriceProductTradePrice><ram:ChargeAmount>9.90</ram:ChargeAmount></ram:NetPriceProductTradePrice></ram:SpecifiedLineTradeAgreement><ram:SpecifiedLineTradeDelivery><ram:BilledQuantity unitCode="H87">20</ram:BilledQuantity></ram:SpecifiedLineTradeDelivery><ram:SpecifiedLineTradeSettlement><ram:ApplicableTradeTax><ram:TypeCode>VAT</ram:TypeCode><ram:CategoryCode>S</ram:CategoryCode><ram:RateApplicablePercent>19</ram:RateApplicablePercent></ram:ApplicableTradeTax><ram:SpecifiedLineTradeSettlementMonetarySummation><ram:LineTotalAmount>198.00</ram:LineTotalAmount></ram:SpecifiedLineTradeSettlementMonetarySummation></ram:SpecifiedLineTradeSettlement></ram:IncludedSupplyChainTradeLineItem><ram:IncludedSupplyChainTradeLineItem><ram:AssociatedDocumentLineDocument><ram:LineID>2</ram:LineID></ram:AssociatedDocumentLineDocument><ram:SpecifiedTradeProduct><ram:Name>Joghurt Banane</ram:Name><ram:SellerAssignedID>ARNR2</ram:SellerAssignedID></ram:SpecifiedTradeProduct><ram:SpecifiedLineTradeAgreement><ram:NetPriceProductTradePrice><ram:ChargeAmount>5.50</ram:ChargeAmount></ram:NetPriceProductTradePrice></ram:SpecifiedLineTradeAgreement><ram:SpecifiedLineTradeDelivery><ram:BilledQuantity unitCode="H87">50</ram:BilledQuantity></ram:SpecifiedLineTradeDelivery><ram:SpecifiedLineTradeSettlement><ram:ApplicableTradeTax><ram:TypeCode>VAT</ram:TypeCode><ram:CategoryCode>S</ram:CategoryCode><ram:RateApplicablePercent>7</ram:RateApplicablePercent></ram:ApplicableTradeTax><ram:SpecifiedLineTradeSettlementMonetarySummation><ram:LineTotalAmount>275.00</ram:LineTotalAmount></ram:SpecifiedLineTradeSettlementMonetarySummation></ram:SpecifiedLineTradeSettlement></ram:IncludedSupplyChainTradeLineItem></rsm:SupplyChainTradeTransaction></rsm:CrossIndustryInvoice> | ||||||
										
											Binary file not shown.
										
									
								
							| @@ -54,9 +54,9 @@ | |||||||
|       { |       { | ||||||
|         "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/fail/FNFE-factur-x-examples/Avoir_FR_type380_EN16931.pdf", |         "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/fail/FNFE-factur-x-examples/Avoir_FR_type380_EN16931.pdf", | ||||||
|         "success": false, |         "success": false, | ||||||
|         "valid": null, |         "valid": true, | ||||||
|         "errors": null, |         "errors": [], | ||||||
|         "error": "Error: No XML found in PDF" |         "error": "Validation result (true) doesn't match expectation (false)" | ||||||
|       }, |       }, | ||||||
|       { |       { | ||||||
|         "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/fail/FNFE-factur-x-examples/Avoir_FR_type380_MINIMUM.pdf", |         "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/fail/FNFE-factur-x-examples/Avoir_FR_type380_MINIMUM.pdf", | ||||||
| @@ -75,9 +75,9 @@ | |||||||
|       { |       { | ||||||
|         "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/fail/FNFE-factur-x-examples/Avoir_FR_type381_EN16931.pdf", |         "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/fail/FNFE-factur-x-examples/Avoir_FR_type381_EN16931.pdf", | ||||||
|         "success": false, |         "success": false, | ||||||
|         "valid": null, |         "valid": true, | ||||||
|         "errors": null, |         "errors": [], | ||||||
|         "error": "Error: No XML found in PDF" |         "error": "Validation result (true) doesn't match expectation (false)" | ||||||
|       } |       } | ||||||
|     ] |     ] | ||||||
|   }, |   }, | ||||||
|   | |||||||
| @@ -138,25 +138,25 @@ | |||||||
|       { |       { | ||||||
|         "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Betriebskostenabrechnung.cii.xml", |         "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Betriebskostenabrechnung.cii.xml", | ||||||
|         "success": true, |         "success": true, | ||||||
|         "format": "facturx", |         "format": "cii", | ||||||
|         "error": null |         "error": null | ||||||
|       }, |       }, | ||||||
|       { |       { | ||||||
|         "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Einfach.cii.xml", |         "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Einfach.cii.xml", | ||||||
|         "success": true, |         "success": true, | ||||||
|         "format": "facturx", |         "format": "cii", | ||||||
|         "error": null |         "error": null | ||||||
|       }, |       }, | ||||||
|       { |       { | ||||||
|         "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Elektron.cii.xml", |         "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Elektron.cii.xml", | ||||||
|         "success": true, |         "success": true, | ||||||
|         "format": "facturx", |         "format": "cii", | ||||||
|         "error": null |         "error": null | ||||||
|       }, |       }, | ||||||
|       { |       { | ||||||
|         "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Reisekostenabrechnung.cii.xml", |         "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Reisekostenabrechnung.cii.xml", | ||||||
|         "success": true, |         "success": true, | ||||||
|         "format": "facturx", |         "format": "cii", | ||||||
|         "error": null |         "error": null | ||||||
|       }, |       }, | ||||||
|       { |       { | ||||||
|   | |||||||
							
								
								
									
										8
									
								
								ts/00_commitinfo_data.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								ts/00_commitinfo_data.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,8 @@ | |||||||
|  | /** | ||||||
|  |  * autocreated commitinfo by @push.rocks/commitinfo | ||||||
|  |  */ | ||||||
|  | export const commitinfo = { | ||||||
|  |   name: '@fin.cx/xinvoice', | ||||||
|  |   version: '4.1.0', | ||||||
|  |   description: 'A TypeScript module for creating, manipulating, and embedding XML data within PDF files specifically tailored for xinvoice packages.' | ||||||
|  | } | ||||||
| @@ -186,7 +186,8 @@ export class XInvoice { | |||||||
|    */ |    */ | ||||||
|   public async loadPdf(pdfBuffer: Uint8Array | Buffer, validate: boolean = false): Promise<XInvoice> { |   public async loadPdf(pdfBuffer: Uint8Array | Buffer, validate: boolean = false): Promise<XInvoice> { | ||||||
|     try { |     try { | ||||||
|       // Extract XML from PDF |       // Extract XML from PDF using the consolidated extractor | ||||||
|  |       // which tries multiple extraction methods in sequence | ||||||
|       const xmlContent = await this.pdfExtractor.extractXml(pdfBuffer); |       const xmlContent = await this.pdfExtractor.extractXml(pdfBuffer); | ||||||
|  |  | ||||||
|       // Store the PDF buffer |       // Store the PDF buffer | ||||||
|   | |||||||
| @@ -2,13 +2,20 @@ | |||||||
|  * CII-specific types and constants |  * CII-specific types and constants | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| // CII namespaces | // CII namespaces (ZUGFeRD v2/Factur-X) | ||||||
| export const CII_NAMESPACES = { | export const CII_NAMESPACES = { | ||||||
|   RSM: 'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100', |   RSM: 'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100', | ||||||
|   RAM: 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100', |   RAM: 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100', | ||||||
|   UDT: 'urn:un:unece:uncefact:data:standard:UnqualifiedDataType:100' |   UDT: 'urn:un:unece:uncefact:data:standard:UnqualifiedDataType:100' | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | // ZUGFeRD v1 namespaces | ||||||
|  | export const ZUGFERD_V1_NAMESPACES = { | ||||||
|  |   RSM: 'urn:ferd:CrossIndustryDocument:invoice:1p0', | ||||||
|  |   RAM: 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12', | ||||||
|  |   UDT: 'urn:un:unece:uncefact:data:standard:UnqualifiedDataType:15' | ||||||
|  | }; | ||||||
|  |  | ||||||
| // CII profiles | // CII profiles | ||||||
| export enum CIIProfile { | export enum CIIProfile { | ||||||
|   BASIC = 'BASIC', |   BASIC = 'BASIC', | ||||||
| @@ -20,10 +27,18 @@ export enum CIIProfile { | |||||||
|  |  | ||||||
| // CII profile IDs for different formats | // CII profile IDs for different formats | ||||||
| export const CII_PROFILE_IDS = { | export const CII_PROFILE_IDS = { | ||||||
|  |   // Factur-X profiles | ||||||
|   FACTURX_MINIMUM: 'urn:factur-x.eu:1p0:minimum', |   FACTURX_MINIMUM: 'urn:factur-x.eu:1p0:minimum', | ||||||
|   FACTURX_BASIC: 'urn:factur-x.eu:1p0:basicwl', |   FACTURX_BASIC: 'urn:factur-x.eu:1p0:basicwl', | ||||||
|   FACTURX_EN16931: 'urn:cen.eu:en16931:2017', |   FACTURX_EN16931: 'urn:cen.eu:en16931:2017', | ||||||
|  |  | ||||||
|  |   // ZUGFeRD v2 profiles | ||||||
|   ZUGFERD_BASIC: 'urn:zugferd:basic', |   ZUGFERD_BASIC: 'urn:zugferd:basic', | ||||||
|   ZUGFERD_COMFORT: 'urn:zugferd:comfort', |   ZUGFERD_COMFORT: 'urn:zugferd:comfort', | ||||||
|   ZUGFERD_EXTENDED: 'urn:zugferd:extended' |   ZUGFERD_EXTENDED: 'urn:zugferd:extended', | ||||||
|  |  | ||||||
|  |   // ZUGFeRD v1 profiles | ||||||
|  |   ZUGFERD_V1_BASIC: 'urn:ferd:CrossIndustryDocument:invoice:1p0:basic', | ||||||
|  |   ZUGFERD_V1_COMFORT: 'urn:ferd:CrossIndustryDocument:invoice:1p0:comfort', | ||||||
|  |   ZUGFERD_V1_EXTENDED: 'urn:ferd:CrossIndustryDocument:invoice:1p0:extended' | ||||||
| }; | }; | ||||||
|   | |||||||
							
								
								
									
										220
									
								
								ts/formats/cii/zugferd/zugferd.decoder.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										220
									
								
								ts/formats/cii/zugferd/zugferd.decoder.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,220 @@ | |||||||
|  | import { CIIBaseDecoder } from '../cii.decoder.js'; | ||||||
|  | import type { TInvoice, TCreditNote, TDebitNote } from '../../../interfaces/common.js'; | ||||||
|  | import { ZUGFERD_PROFILE_IDS } from './zugferd.types.js'; | ||||||
|  | import { business, finance, general } from '@tsclass/tsclass'; | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * Decoder for ZUGFeRD invoice format | ||||||
|  |  */ | ||||||
|  | export class ZUGFeRDDecoder extends CIIBaseDecoder { | ||||||
|  |   /** | ||||||
|  |    * Decodes a ZUGFeRD credit note | ||||||
|  |    * @returns Promise resolving to a TCreditNote object | ||||||
|  |    */ | ||||||
|  |   protected async decodeCreditNote(): Promise<TCreditNote> { | ||||||
|  |     // Get common invoice data | ||||||
|  |     const commonData = await this.extractCommonData(); | ||||||
|  |  | ||||||
|  |     // Create a credit note with the common data | ||||||
|  |     return { | ||||||
|  |       ...commonData, | ||||||
|  |       invoiceType: 'creditnote' | ||||||
|  |     } as TCreditNote; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Decodes a ZUGFeRD debit note (invoice) | ||||||
|  |    * @returns Promise resolving to a TDebitNote object | ||||||
|  |    */ | ||||||
|  |   protected async decodeDebitNote(): Promise<TDebitNote> { | ||||||
|  |     // Get common invoice data | ||||||
|  |     const commonData = await this.extractCommonData(); | ||||||
|  |  | ||||||
|  |     // Create a debit note with the common data | ||||||
|  |     return { | ||||||
|  |       ...commonData, | ||||||
|  |       invoiceType: 'debitnote' | ||||||
|  |     } as TDebitNote; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Extracts common invoice data from ZUGFeRD XML | ||||||
|  |    * @returns Common invoice data | ||||||
|  |    */ | ||||||
|  |   private async extractCommonData(): Promise<Partial<TInvoice>> { | ||||||
|  |     // Extract invoice ID | ||||||
|  |     const invoiceId = this.getText('//rsm:ExchangedDocument/ram:ID'); | ||||||
|  |  | ||||||
|  |     // Extract issue date | ||||||
|  |     const issueDateStr = this.getText('//ram:IssueDateTime/udt:DateTimeString'); | ||||||
|  |     const issueDate = issueDateStr ? new Date(issueDateStr).getTime() : Date.now(); | ||||||
|  |  | ||||||
|  |     // Extract seller information | ||||||
|  |     const seller = this.extractParty('//ram:SellerTradeParty'); | ||||||
|  |  | ||||||
|  |     // Extract buyer information | ||||||
|  |     const buyer = this.extractParty('//ram:BuyerTradeParty'); | ||||||
|  |  | ||||||
|  |     // Extract items | ||||||
|  |     const items = this.extractItems(); | ||||||
|  |  | ||||||
|  |     // Extract due date | ||||||
|  |     const dueDateStr = this.getText('//ram:SpecifiedTradePaymentTerms/ram:DueDateDateTime/udt:DateTimeString'); | ||||||
|  |     const dueDate = dueDateStr ? new Date(dueDateStr).getTime() : Date.now(); | ||||||
|  |     const dueInDays = Math.round((dueDate - issueDate) / (1000 * 60 * 60 * 24)); | ||||||
|  |  | ||||||
|  |     // Extract currency | ||||||
|  |     const currencyCode = this.getText('//ram:InvoiceCurrencyCode') || 'EUR'; | ||||||
|  |  | ||||||
|  |     // Extract total amount | ||||||
|  |     const totalAmount = this.getNumber('//ram:GrandTotalAmount'); | ||||||
|  |  | ||||||
|  |     // Extract notes | ||||||
|  |     const notes = this.extractNotes(); | ||||||
|  |  | ||||||
|  |     // Check for reverse charge | ||||||
|  |     const reverseCharge = this.exists('//ram:SpecifiedTradeAllowanceCharge/ram:ReasonCode[text()="62"]'); | ||||||
|  |  | ||||||
|  |     // Create the common invoice data | ||||||
|  |     return { | ||||||
|  |       type: 'invoice', | ||||||
|  |       id: invoiceId, | ||||||
|  |       date: issueDate, | ||||||
|  |       status: 'invoice', | ||||||
|  |       versionInfo: { | ||||||
|  |         type: 'final', | ||||||
|  |         version: '1.0.0' | ||||||
|  |       }, | ||||||
|  |       language: 'en', | ||||||
|  |       incidenceId: invoiceId, | ||||||
|  |       from: seller, | ||||||
|  |       to: buyer, | ||||||
|  |       subject: `Invoice ${invoiceId}`, | ||||||
|  |       items: items, | ||||||
|  |       dueInDays: dueInDays, | ||||||
|  |       reverseCharge: reverseCharge, | ||||||
|  |       currency: currencyCode as finance.TCurrency, | ||||||
|  |       notes: notes, | ||||||
|  |       deliveryDate: issueDate, | ||||||
|  |       objectActions: [], | ||||||
|  |       invoiceType: 'debitnote' // Default to debit note, will be overridden in decode methods | ||||||
|  |     }; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Extracts party information from ZUGFeRD XML | ||||||
|  |    * @param partyXPath XPath to the party node | ||||||
|  |    * @returns Party information as TContact | ||||||
|  |    */ | ||||||
|  |   private extractParty(partyXPath: string): business.TContact { | ||||||
|  |     // Extract name | ||||||
|  |     const name = this.getText(`${partyXPath}/ram:Name`); | ||||||
|  |  | ||||||
|  |     // Extract address | ||||||
|  |     const street = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:LineOne`); | ||||||
|  |     const city = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:CityName`); | ||||||
|  |     const zip = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:PostcodeCode`); | ||||||
|  |     const country = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:CountryID`); | ||||||
|  |  | ||||||
|  |     // Create address object | ||||||
|  |     const address = { | ||||||
|  |       street: street, | ||||||
|  |       city: city, | ||||||
|  |       zip: zip, | ||||||
|  |       country: country | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     // Extract VAT ID | ||||||
|  |     const vatId = this.getText(`${partyXPath}/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="VA"]`) || ''; | ||||||
|  |  | ||||||
|  |     // Extract registration ID | ||||||
|  |     const registrationId = this.getText(`${partyXPath}/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="FC"]`) || ''; | ||||||
|  |  | ||||||
|  |     // Create contact object | ||||||
|  |     return { | ||||||
|  |       type: 'company', | ||||||
|  |       name: name, | ||||||
|  |       description: '', | ||||||
|  |       address: address, | ||||||
|  |       status: 'active', | ||||||
|  |       foundedDate: this.createDefaultDate(), | ||||||
|  |       registrationDetails: { | ||||||
|  |         vatId: vatId, | ||||||
|  |         registrationId: registrationId, | ||||||
|  |         registrationName: '' | ||||||
|  |       } | ||||||
|  |     } as business.TContact; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Extracts invoice items from ZUGFeRD XML | ||||||
|  |    * @returns Array of invoice items | ||||||
|  |    */ | ||||||
|  |   private extractItems(): finance.TInvoiceItem[] { | ||||||
|  |     const items: finance.TInvoiceItem[] = []; | ||||||
|  |  | ||||||
|  |     // Get all item nodes | ||||||
|  |     const itemNodes = this.select('//ram:IncludedSupplyChainTradeLineItem', this.doc); | ||||||
|  |  | ||||||
|  |     // Process each item | ||||||
|  |     if (Array.isArray(itemNodes)) { | ||||||
|  |       for (let i = 0; i < itemNodes.length; i++) { | ||||||
|  |         const itemNode = itemNodes[i]; | ||||||
|  |  | ||||||
|  |         // Extract item data | ||||||
|  |         const name = this.getText('ram:SpecifiedTradeProduct/ram:Name', itemNode); | ||||||
|  |         const articleNumber = this.getText('ram:SpecifiedTradeProduct/ram:SellerAssignedID', itemNode); | ||||||
|  |         const unitQuantity = this.getNumber('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity', itemNode); | ||||||
|  |         const unitType = this.getText('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity/@unitCode', itemNode) || 'EA'; | ||||||
|  |         const unitNetPrice = this.getNumber('ram:SpecifiedLineTradeAgreement/ram:NetPriceProductTradePrice/ram:ChargeAmount', itemNode); | ||||||
|  |         const vatPercentage = this.getNumber('ram:SpecifiedLineTradeSettlement/ram:ApplicableTradeTax/ram:RateApplicablePercent', itemNode); | ||||||
|  |  | ||||||
|  |         // Create item object | ||||||
|  |         items.push({ | ||||||
|  |           position: i + 1, | ||||||
|  |           name: name, | ||||||
|  |           articleNumber: articleNumber, | ||||||
|  |           unitType: unitType, | ||||||
|  |           unitQuantity: unitQuantity, | ||||||
|  |           unitNetPrice: unitNetPrice, | ||||||
|  |           vatPercentage: vatPercentage | ||||||
|  |         }); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return items; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Extracts notes from ZUGFeRD XML | ||||||
|  |    * @returns Array of notes | ||||||
|  |    */ | ||||||
|  |   private extractNotes(): string[] { | ||||||
|  |     const notes: string[] = []; | ||||||
|  |  | ||||||
|  |     // Get all note nodes | ||||||
|  |     const noteNodes = this.select('//ram:IncludedNote', this.doc); | ||||||
|  |  | ||||||
|  |     // Process each note | ||||||
|  |     if (Array.isArray(noteNodes)) { | ||||||
|  |       for (let i = 0; i < noteNodes.length; i++) { | ||||||
|  |         const noteNode = noteNodes[i]; | ||||||
|  |         const noteText = this.getText('ram:Content', noteNode); | ||||||
|  |  | ||||||
|  |         if (noteText) { | ||||||
|  |           notes.push(noteText); | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return notes; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Creates a default date for empty date fields | ||||||
|  |    * @returns Default date as timestamp | ||||||
|  |    */ | ||||||
|  |   private createDefaultDate(): number { | ||||||
|  |     return new Date('2000-01-01').getTime(); | ||||||
|  |   } | ||||||
|  | } | ||||||
							
								
								
									
										21
									
								
								ts/formats/cii/zugferd/zugferd.encoder.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								ts/formats/cii/zugferd/zugferd.encoder.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,21 @@ | |||||||
|  | import { CIIBaseEncoder } from '../cii.encoder.js'; | ||||||
|  | import type { TInvoice } from '../../../interfaces/common.js'; | ||||||
|  | import { ZUGFERD_PROFILE_IDS } from './zugferd.types.js'; | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * Encoder for ZUGFeRD invoice format | ||||||
|  |  */ | ||||||
|  | export class ZUGFeRDEncoder extends CIIBaseEncoder { | ||||||
|  |   /** | ||||||
|  |    * Creates ZUGFeRD XML from invoice data | ||||||
|  |    * @param invoice Invoice data | ||||||
|  |    * @returns ZUGFeRD XML string | ||||||
|  |    */ | ||||||
|  |   public async createXml(invoice: TInvoice): Promise<string> { | ||||||
|  |     // Set ZUGFeRD-specific profile ID | ||||||
|  |     this.profileId = ZUGFERD_PROFILE_IDS.BASIC; | ||||||
|  |      | ||||||
|  |     // Use the base CII encoder to create the XML | ||||||
|  |     return super.createXml(invoice); | ||||||
|  |   } | ||||||
|  | } | ||||||
							
								
								
									
										18
									
								
								ts/formats/cii/zugferd/zugferd.types.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								ts/formats/cii/zugferd/zugferd.types.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | |||||||
|  | import { CIIProfile, CII_PROFILE_IDS } from '../cii.types.js'; | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * ZUGFeRD specific constants and types | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | // ZUGFeRD profile IDs | ||||||
|  | export const ZUGFERD_PROFILE_IDS = { | ||||||
|  |   BASIC: CII_PROFILE_IDS.ZUGFERD_BASIC, | ||||||
|  |   COMFORT: CII_PROFILE_IDS.ZUGFERD_COMFORT, | ||||||
|  |   EXTENDED: CII_PROFILE_IDS.ZUGFERD_EXTENDED | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // ZUGFeRD PDF attachment filename | ||||||
|  | export const ZUGFERD_ATTACHMENT_FILENAME = 'zugferd-invoice.xml'; | ||||||
|  |  | ||||||
|  | // ZUGFeRD PDF attachment description | ||||||
|  | export const ZUGFERD_ATTACHMENT_DESCRIPTION = 'ZUGFeRD XML Invoice'; | ||||||
							
								
								
									
										234
									
								
								ts/formats/cii/zugferd/zugferd.v1.decoder.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										234
									
								
								ts/formats/cii/zugferd/zugferd.v1.decoder.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,234 @@ | |||||||
|  | import { CIIBaseDecoder } from '../cii.decoder.js'; | ||||||
|  | import type { TInvoice, TCreditNote, TDebitNote } from '../../../interfaces/common.js'; | ||||||
|  | import { ZUGFERD_V1_NAMESPACES } from '../cii.types.js'; | ||||||
|  | import { business, finance, general } from '@tsclass/tsclass'; | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * Decoder for ZUGFeRD v1 invoice format | ||||||
|  |  */ | ||||||
|  | export class ZUGFeRDV1Decoder extends CIIBaseDecoder { | ||||||
|  |   /** | ||||||
|  |    * Constructor | ||||||
|  |    * @param xml XML string to decode | ||||||
|  |    */ | ||||||
|  |   constructor(xml: string) { | ||||||
|  |     super(xml); | ||||||
|  |     // Override namespaces for ZUGFeRD v1 | ||||||
|  |     this.namespaces = { | ||||||
|  |       rsm: ZUGFERD_V1_NAMESPACES.RSM, | ||||||
|  |       ram: ZUGFERD_V1_NAMESPACES.RAM, | ||||||
|  |       udt: ZUGFERD_V1_NAMESPACES.UDT | ||||||
|  |     }; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Decodes a ZUGFeRD v1 credit note | ||||||
|  |    * @returns Promise resolving to a TCreditNote object | ||||||
|  |    */ | ||||||
|  |   protected async decodeCreditNote(): Promise<TCreditNote> { | ||||||
|  |     // Get common invoice data | ||||||
|  |     const commonData = await this.extractCommonData(); | ||||||
|  |  | ||||||
|  |     // Create a credit note with the common data | ||||||
|  |     return { | ||||||
|  |       ...commonData, | ||||||
|  |       invoiceType: 'creditnote' | ||||||
|  |     } as TCreditNote; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Decodes a ZUGFeRD v1 debit note (invoice) | ||||||
|  |    * @returns Promise resolving to a TDebitNote object | ||||||
|  |    */ | ||||||
|  |   protected async decodeDebitNote(): Promise<TDebitNote> { | ||||||
|  |     // Get common invoice data | ||||||
|  |     const commonData = await this.extractCommonData(); | ||||||
|  |  | ||||||
|  |     // Create a debit note with the common data | ||||||
|  |     return { | ||||||
|  |       ...commonData, | ||||||
|  |       invoiceType: 'debitnote' | ||||||
|  |     } as TDebitNote; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Extracts common invoice data from ZUGFeRD v1 XML | ||||||
|  |    * @returns Common invoice data | ||||||
|  |    */ | ||||||
|  |   private async extractCommonData(): Promise<Partial<TInvoice>> { | ||||||
|  |     // Extract invoice ID | ||||||
|  |     const invoiceId = this.getText('//ram:ID'); | ||||||
|  |  | ||||||
|  |     // Extract issue date | ||||||
|  |     const issueDateStr = this.getText('//ram:IssueDateTime/udt:DateTimeString'); | ||||||
|  |     const issueDate = issueDateStr ? new Date(issueDateStr).getTime() : Date.now(); | ||||||
|  |  | ||||||
|  |     // Extract seller information | ||||||
|  |     const seller = this.extractParty('//ram:SellerTradeParty'); | ||||||
|  |  | ||||||
|  |     // Extract buyer information | ||||||
|  |     const buyer = this.extractParty('//ram:BuyerTradeParty'); | ||||||
|  |  | ||||||
|  |     // Extract items | ||||||
|  |     const items = this.extractItems(); | ||||||
|  |  | ||||||
|  |     // Extract due date | ||||||
|  |     const dueDateStr = this.getText('//ram:SpecifiedTradePaymentTerms/ram:DueDateDateTime/udt:DateTimeString'); | ||||||
|  |     const dueDate = dueDateStr ? new Date(dueDateStr).getTime() : Date.now(); | ||||||
|  |     const dueInDays = Math.round((dueDate - issueDate) / (1000 * 60 * 60 * 24)); | ||||||
|  |  | ||||||
|  |     // Extract currency | ||||||
|  |     const currencyCode = this.getText('//ram:InvoiceCurrencyCode') || 'EUR'; | ||||||
|  |  | ||||||
|  |     // Extract total amount | ||||||
|  |     const totalAmount = this.getNumber('//ram:GrandTotalAmount'); | ||||||
|  |  | ||||||
|  |     // Extract notes | ||||||
|  |     const notes = this.extractNotes(); | ||||||
|  |  | ||||||
|  |     // Check for reverse charge | ||||||
|  |     const reverseCharge = this.exists('//ram:SpecifiedTradeAllowanceCharge/ram:ReasonCode[text()="62"]'); | ||||||
|  |  | ||||||
|  |     // Create the common invoice data | ||||||
|  |     return { | ||||||
|  |       type: 'invoice', | ||||||
|  |       id: invoiceId, | ||||||
|  |       date: issueDate, | ||||||
|  |       status: 'invoice', | ||||||
|  |       versionInfo: { | ||||||
|  |         type: 'final', | ||||||
|  |         version: '1.0.0' | ||||||
|  |       }, | ||||||
|  |       language: 'en', | ||||||
|  |       incidenceId: invoiceId, | ||||||
|  |       from: seller, | ||||||
|  |       to: buyer, | ||||||
|  |       subject: `Invoice ${invoiceId}`, | ||||||
|  |       items: items, | ||||||
|  |       dueInDays: dueInDays, | ||||||
|  |       reverseCharge: reverseCharge, | ||||||
|  |       currency: currencyCode as finance.TCurrency, | ||||||
|  |       notes: notes, | ||||||
|  |       deliveryDate: issueDate, | ||||||
|  |       objectActions: [], | ||||||
|  |       invoiceType: 'debitnote' // Default to debit note, will be overridden in decode methods | ||||||
|  |     }; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Extracts party information from ZUGFeRD v1 XML | ||||||
|  |    * @param partyXPath XPath to the party node | ||||||
|  |    * @returns Party information as TContact | ||||||
|  |    */ | ||||||
|  |   private extractParty(partyXPath: string): business.TContact { | ||||||
|  |     // Extract name | ||||||
|  |     const name = this.getText(`${partyXPath}/ram:Name`); | ||||||
|  |  | ||||||
|  |     // Extract address | ||||||
|  |     const street = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:LineOne`); | ||||||
|  |     const city = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:CityName`); | ||||||
|  |     const zip = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:PostcodeCode`); | ||||||
|  |     const country = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:CountryID`); | ||||||
|  |  | ||||||
|  |     // Create address object | ||||||
|  |     const address = { | ||||||
|  |       street: street, | ||||||
|  |       city: city, | ||||||
|  |       zip: zip, | ||||||
|  |       country: country | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     // Extract VAT ID | ||||||
|  |     const vatId = this.getText(`${partyXPath}/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="VA"]`) || ''; | ||||||
|  |  | ||||||
|  |     // Extract registration ID | ||||||
|  |     const registrationId = this.getText(`${partyXPath}/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="FC"]`) || ''; | ||||||
|  |  | ||||||
|  |     // Create contact object | ||||||
|  |     return { | ||||||
|  |       type: 'company', | ||||||
|  |       name: name, | ||||||
|  |       description: '', | ||||||
|  |       address: address, | ||||||
|  |       status: 'active', | ||||||
|  |       foundedDate: this.createDefaultDate(), | ||||||
|  |       registrationDetails: { | ||||||
|  |         vatId: vatId, | ||||||
|  |         registrationId: registrationId, | ||||||
|  |         registrationName: '' | ||||||
|  |       } | ||||||
|  |     } as business.TContact; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Extracts invoice items from ZUGFeRD v1 XML | ||||||
|  |    * @returns Array of invoice items | ||||||
|  |    */ | ||||||
|  |   private extractItems(): finance.TInvoiceItem[] { | ||||||
|  |     const items: finance.TInvoiceItem[] = []; | ||||||
|  |  | ||||||
|  |     // Get all item nodes | ||||||
|  |     const itemNodes = this.select('//ram:IncludedSupplyChainTradeLineItem', this.doc); | ||||||
|  |  | ||||||
|  |     // Process each item | ||||||
|  |     if (Array.isArray(itemNodes)) { | ||||||
|  |       for (let i = 0; i < itemNodes.length; i++) { | ||||||
|  |         const itemNode = itemNodes[i]; | ||||||
|  |  | ||||||
|  |         // Extract item data | ||||||
|  |         const name = this.getText('ram:SpecifiedTradeProduct/ram:Name', itemNode); | ||||||
|  |         const articleNumber = this.getText('ram:SpecifiedTradeProduct/ram:SellerAssignedID', itemNode); | ||||||
|  |         const unitQuantity = this.getNumber('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity', itemNode); | ||||||
|  |         const unitType = this.getText('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity/@unitCode', itemNode) || 'EA'; | ||||||
|  |         const unitNetPrice = this.getNumber('ram:SpecifiedLineTradeAgreement/ram:NetPriceProductTradePrice/ram:ChargeAmount', itemNode); | ||||||
|  |         const vatPercentage = this.getNumber('ram:SpecifiedLineTradeSettlement/ram:ApplicableTradeTax/ram:RateApplicablePercent', itemNode); | ||||||
|  |  | ||||||
|  |         // Create item object | ||||||
|  |         items.push({ | ||||||
|  |           position: i + 1, | ||||||
|  |           name: name, | ||||||
|  |           articleNumber: articleNumber, | ||||||
|  |           unitType: unitType, | ||||||
|  |           unitQuantity: unitQuantity, | ||||||
|  |           unitNetPrice: unitNetPrice, | ||||||
|  |           vatPercentage: vatPercentage | ||||||
|  |         }); | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return items; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Extracts notes from ZUGFeRD v1 XML | ||||||
|  |    * @returns Array of notes | ||||||
|  |    */ | ||||||
|  |   private extractNotes(): string[] { | ||||||
|  |     const notes: string[] = []; | ||||||
|  |  | ||||||
|  |     // Get all note nodes | ||||||
|  |     const noteNodes = this.select('//ram:IncludedNote', this.doc); | ||||||
|  |  | ||||||
|  |     // Process each note | ||||||
|  |     if (Array.isArray(noteNodes)) { | ||||||
|  |       for (let i = 0; i < noteNodes.length; i++) { | ||||||
|  |         const noteNode = noteNodes[i]; | ||||||
|  |         const noteText = this.getText('ram:Content', noteNode); | ||||||
|  |  | ||||||
|  |         if (noteText) { | ||||||
|  |           notes.push(noteText); | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return notes; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Creates a default date for empty date fields | ||||||
|  |    * @returns Default date as timestamp | ||||||
|  |    */ | ||||||
|  |   private createDefaultDate(): number { | ||||||
|  |     return new Date('2000-01-01').getTime(); | ||||||
|  |   } | ||||||
|  | } | ||||||
							
								
								
									
										18
									
								
								ts/formats/cii/zugferd/zugferd.validator.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								ts/formats/cii/zugferd/zugferd.validator.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | |||||||
|  | import { CIIBaseValidator } from '../cii.validator.js'; | ||||||
|  | import { ValidationLevel } from '../../../interfaces/common.js'; | ||||||
|  | import type { ValidationResult } from '../../../interfaces/common.js'; | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * Validator for ZUGFeRD invoice format | ||||||
|  |  */ | ||||||
|  | export class ZUGFeRDValidator extends CIIBaseValidator { | ||||||
|  |   /** | ||||||
|  |    * Validates ZUGFeRD XML against business rules | ||||||
|  |    * @returns True if business validation passed | ||||||
|  |    */ | ||||||
|  |   protected validateBusinessRules(): boolean { | ||||||
|  |     // Implement ZUGFeRD-specific business rules | ||||||
|  |     // For now, we'll just use the base CII validation | ||||||
|  |     return true; | ||||||
|  |   } | ||||||
|  | } | ||||||
| @@ -5,7 +5,8 @@ import { FormatDetector } from '../utils/format.detector.js'; | |||||||
| // Import specific decoders | // Import specific decoders | ||||||
| import { XRechnungDecoder } from '../ubl/xrechnung/xrechnung.decoder.js'; | import { XRechnungDecoder } from '../ubl/xrechnung/xrechnung.decoder.js'; | ||||||
| import { FacturXDecoder } from '../cii/facturx/facturx.decoder.js'; | import { FacturXDecoder } from '../cii/facturx/facturx.decoder.js'; | ||||||
| // import { ZUGFeRDDecoder } from '../cii/zugferd/zugferd.decoder.js'; | import { ZUGFeRDDecoder } from '../cii/zugferd/zugferd.decoder.js'; | ||||||
|  | import { ZUGFeRDV1Decoder } from '../cii/zugferd/zugferd.v1.decoder.js'; | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Factory to create the appropriate decoder based on the XML format |  * Factory to create the appropriate decoder based on the XML format | ||||||
| @@ -29,8 +30,12 @@ export class DecoderFactory { | |||||||
|         return new FacturXDecoder(xml); |         return new FacturXDecoder(xml); | ||||||
|  |  | ||||||
|       case InvoiceFormat.ZUGFERD: |       case InvoiceFormat.ZUGFERD: | ||||||
|         // For now, use Factur-X decoder for ZUGFeRD |         // Determine if it's ZUGFeRD v1 or v2 based on root element | ||||||
|         return new FacturXDecoder(xml); |         if (xml.includes('CrossIndustryDocument')) { | ||||||
|  |           return new ZUGFeRDV1Decoder(xml); | ||||||
|  |         } else { | ||||||
|  |           return new ZUGFeRDDecoder(xml); | ||||||
|  |         } | ||||||
|  |  | ||||||
|       case InvoiceFormat.FACTURX: |       case InvoiceFormat.FACTURX: | ||||||
|         return new FacturXDecoder(xml); |         return new FacturXDecoder(xml); | ||||||
|   | |||||||
| @@ -5,7 +5,7 @@ import type { ExportFormat } from '../../interfaces/common.js'; | |||||||
| // Import specific encoders | // Import specific encoders | ||||||
| import { XRechnungEncoder } from '../ubl/xrechnung/xrechnung.encoder.js'; | import { XRechnungEncoder } from '../ubl/xrechnung/xrechnung.encoder.js'; | ||||||
| import { FacturXEncoder } from '../cii/facturx/facturx.encoder.js'; | import { FacturXEncoder } from '../cii/facturx/facturx.encoder.js'; | ||||||
| // import { ZUGFeRDEncoder } from '../cii/zugferd/zugferd.encoder.js'; | import { ZUGFeRDEncoder } from '../cii/zugferd/zugferd.encoder.js'; | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Factory to create the appropriate encoder based on the target format |  * Factory to create the appropriate encoder based on the target format | ||||||
| @@ -33,8 +33,8 @@ export class EncoderFactory { | |||||||
|  |  | ||||||
|       case InvoiceFormat.ZUGFERD: |       case InvoiceFormat.ZUGFERD: | ||||||
|       case 'zugferd': |       case 'zugferd': | ||||||
|         // For now, use Factur-X encoder for ZUGFeRD |         // Use dedicated ZUGFeRD encoder | ||||||
|         return new FacturXEncoder(); |         return new ZUGFeRDEncoder(); | ||||||
|  |  | ||||||
|       case InvoiceFormat.FACTURX: |       case InvoiceFormat.FACTURX: | ||||||
|       case 'facturx': |       case 'facturx': | ||||||
|   | |||||||
| @@ -6,7 +6,7 @@ import { FormatDetector } from '../utils/format.detector.js'; | |||||||
| // import { UBLValidator } from '../ubl/ubl.validator.js'; | // import { UBLValidator } from '../ubl/ubl.validator.js'; | ||||||
| // import { XRechnungValidator } from '../ubl/xrechnung/xrechnung.validator.js'; | // import { XRechnungValidator } from '../ubl/xrechnung/xrechnung.validator.js'; | ||||||
| import { FacturXValidator } from '../cii/facturx/facturx.validator.js'; | import { FacturXValidator } from '../cii/facturx/facturx.validator.js'; | ||||||
| // import { ZUGFeRDValidator } from '../cii/zugferd/zugferd.validator.js'; | import { ZUGFeRDValidator } from '../cii/zugferd/zugferd.validator.js'; | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Factory to create the appropriate validator based on the XML format |  * Factory to create the appropriate validator based on the XML format | ||||||
| @@ -34,8 +34,8 @@ export class ValidatorFactory { | |||||||
|         return new FacturXValidator(xml); |         return new FacturXValidator(xml); | ||||||
|  |  | ||||||
|       case InvoiceFormat.ZUGFERD: |       case InvoiceFormat.ZUGFERD: | ||||||
|         // For now, use Factur-X validator for ZUGFeRD |         // Use dedicated ZUGFeRD validator | ||||||
|         return new FacturXValidator(xml); |         return new ZUGFeRDValidator(xml); | ||||||
|  |  | ||||||
|       case InvoiceFormat.FACTURX: |       case InvoiceFormat.FACTURX: | ||||||
|         return new FacturXValidator(xml); |         return new FacturXValidator(xml); | ||||||
|   | |||||||
							
								
								
									
										78
									
								
								ts/formats/pdf/extractors/associated.extractor.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								ts/formats/pdf/extractors/associated.extractor.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,78 @@ | |||||||
|  | import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib'; | ||||||
|  | import { BaseXMLExtractor } from './base.extractor.js'; | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * Associated files extractor for PDF/A-3 documents | ||||||
|  |  * Extracts XML from associated files (AF entry in the catalog) | ||||||
|  |  * Particularly useful for ZUGFeRD v1 and some Factur-X documents | ||||||
|  |  */ | ||||||
|  | export class AssociatedFilesExtractor extends BaseXMLExtractor { | ||||||
|  |   /** | ||||||
|  |    * Extract XML from a PDF buffer using associated files | ||||||
|  |    * @param pdfBuffer PDF buffer | ||||||
|  |    * @returns XML content or null if not found | ||||||
|  |    */ | ||||||
|  |   public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> { | ||||||
|  |     try { | ||||||
|  |       const pdfDoc = await PDFDocument.load(pdfBuffer); | ||||||
|  |        | ||||||
|  |       // Try to find associated files via the AF entry in the catalog | ||||||
|  |       const afArray = pdfDoc.catalog.lookup(PDFName.of('AF')); | ||||||
|  |       if (!(afArray instanceof PDFArray)) { | ||||||
|  |         console.warn('No AF (Associated Files) entry found in PDF catalog'); | ||||||
|  |         return null; | ||||||
|  |       } | ||||||
|  |        | ||||||
|  |       // Process each associated file | ||||||
|  |       for (let i = 0; i < afArray.size(); i++) { | ||||||
|  |         const fileSpec = afArray.lookup(i); | ||||||
|  |         if (!(fileSpec instanceof PDFDict)) { | ||||||
|  |           continue; | ||||||
|  |         } | ||||||
|  |          | ||||||
|  |         // Get the file name | ||||||
|  |         const fileNameObj = fileSpec.lookup(PDFName.of('F')) || fileSpec.lookup(PDFName.of('UF')); | ||||||
|  |         if (!(fileNameObj instanceof PDFString)) { | ||||||
|  |           continue; | ||||||
|  |         } | ||||||
|  |          | ||||||
|  |         const fileName = fileNameObj.decodeText(); | ||||||
|  |          | ||||||
|  |         // Check if it's a known invoice XML file name | ||||||
|  |         const isKnownFileName = this.knownFileNames.some( | ||||||
|  |           knownName => fileName.toLowerCase() === knownName.toLowerCase() | ||||||
|  |         ); | ||||||
|  |          | ||||||
|  |         // Check if it's any XML file or has invoice-related keywords | ||||||
|  |         const isXmlFile = fileName.toLowerCase().endsWith('.xml') ||  | ||||||
|  |                           fileName.toLowerCase().includes('zugferd') || | ||||||
|  |                           fileName.toLowerCase().includes('factur-x') || | ||||||
|  |                           fileName.toLowerCase().includes('xrechnung') || | ||||||
|  |                           fileName.toLowerCase().includes('invoice'); | ||||||
|  |          | ||||||
|  |         if (isKnownFileName || isXmlFile) { | ||||||
|  |           // Get the embedded file dictionary | ||||||
|  |           const efDict = fileSpec.lookup(PDFName.of('EF')); | ||||||
|  |           if (!(efDict instanceof PDFDict)) { | ||||||
|  |             continue; | ||||||
|  |           } | ||||||
|  |            | ||||||
|  |           // Get the file stream | ||||||
|  |           const fileStream = efDict.lookup(PDFName.of('F')); | ||||||
|  |           if (fileStream instanceof PDFRawStream) { | ||||||
|  |             const xmlContent = await this.extractXmlFromStream(fileStream, fileName); | ||||||
|  |             if (xmlContent) { | ||||||
|  |               return xmlContent; | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |        | ||||||
|  |       console.warn('No valid XML found in associated files'); | ||||||
|  |       return null; | ||||||
|  |     } catch (error) { | ||||||
|  |       console.error('Error in associated files extraction:', error); | ||||||
|  |       return null; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
							
								
								
									
										177
									
								
								ts/formats/pdf/extractors/base.extractor.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										177
									
								
								ts/formats/pdf/extractors/base.extractor.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,177 @@ | |||||||
|  | import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib'; | ||||||
|  | import * as pako from 'pako'; | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * Base class for PDF XML extractors with common functionality | ||||||
|  |  */ | ||||||
|  | export abstract class BaseXMLExtractor { | ||||||
|  |   /** | ||||||
|  |    * Known XML file names for different invoice formats | ||||||
|  |    */ | ||||||
|  |   protected readonly knownFileNames = [ | ||||||
|  |     'factur-x.xml', | ||||||
|  |     'zugferd-invoice.xml', | ||||||
|  |     'ZUGFeRD-invoice.xml', | ||||||
|  |     'xrechnung.xml' | ||||||
|  |   ]; | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Known XML formats to validate extracted content | ||||||
|  |    */ | ||||||
|  |   protected readonly knownFormats = [ | ||||||
|  |     'CrossIndustryInvoice', | ||||||
|  |     'CrossIndustryDocument', | ||||||
|  |     'Invoice', | ||||||
|  |     'CreditNote', | ||||||
|  |     'ubl:Invoice', | ||||||
|  |     'ubl:CreditNote', | ||||||
|  |     'rsm:CrossIndustryInvoice', | ||||||
|  |     'rsm:CrossIndustryDocument', | ||||||
|  |     'ram:CrossIndustryDocument', | ||||||
|  |     'urn:un:unece:uncefact', | ||||||
|  |     'urn:ferd:CrossIndustryDocument', | ||||||
|  |     'urn:zugferd', | ||||||
|  |     'urn:factur-x', | ||||||
|  |     'factur-x.eu', | ||||||
|  |     'ZUGFeRD' | ||||||
|  |   ]; | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Known XML end tags for extracting content from strings | ||||||
|  |    */ | ||||||
|  |   protected readonly knownEndTags = [ | ||||||
|  |     '</CrossIndustryInvoice>', | ||||||
|  |     '</CrossIndustryDocument>', | ||||||
|  |     '</Invoice>', | ||||||
|  |     '</CreditNote>', | ||||||
|  |     '</rsm:CrossIndustryInvoice>', | ||||||
|  |     '</rsm:CrossIndustryDocument>', | ||||||
|  |     '</ram:CrossIndustryDocument>', | ||||||
|  |     '</ubl:Invoice>', | ||||||
|  |     '</ubl:CreditNote>' | ||||||
|  |   ]; | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Extract XML from a PDF buffer | ||||||
|  |    * @param pdfBuffer PDF buffer | ||||||
|  |    * @returns XML content or null if not found | ||||||
|  |    */ | ||||||
|  |   public abstract extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null>; | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Check if an XML string is valid | ||||||
|  |    * @param xmlString XML string to check | ||||||
|  |    * @returns True if the XML is valid | ||||||
|  |    */ | ||||||
|  |   protected isValidXml(xmlString: string): boolean { | ||||||
|  |     try { | ||||||
|  |       // Basic checks for XML validity | ||||||
|  |       if (!xmlString || typeof xmlString !== 'string') { | ||||||
|  |         return false; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       // Check if it starts with XML declaration | ||||||
|  |       if (!xmlString.includes('<?xml')) { | ||||||
|  |         return false; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       // Check if the XML string contains known invoice formats | ||||||
|  |       const hasKnownFormat = this.knownFormats.some(format => xmlString.includes(format)); | ||||||
|  |       if (!hasKnownFormat) { | ||||||
|  |         return false; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       // Check if the XML string contains binary data or invalid characters | ||||||
|  |       const invalidChars = ['\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005']; | ||||||
|  |       const hasBinaryData = invalidChars.some(char => xmlString.includes(char)); | ||||||
|  |       if (hasBinaryData) { | ||||||
|  |         return false; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       // Check if the XML string is too short | ||||||
|  |       if (xmlString.length < 100) { | ||||||
|  |         return false; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       return true; | ||||||
|  |     } catch (error) { | ||||||
|  |       console.error('Error validating XML:', error); | ||||||
|  |       return false; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Extract XML from a string | ||||||
|  |    * @param text Text to extract XML from | ||||||
|  |    * @param startIndex Index to start extraction from | ||||||
|  |    * @returns XML content or null if not found | ||||||
|  |    */ | ||||||
|  |   protected extractXmlFromString(text: string, startIndex: number = 0): string | null { | ||||||
|  |     try { | ||||||
|  |       // Find the start of the XML document | ||||||
|  |       const xmlStartIndex = text.indexOf('<?xml', startIndex); | ||||||
|  |       if (xmlStartIndex === -1) { | ||||||
|  |         return null; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       // Try to find the end of the XML document | ||||||
|  |       let xmlEndIndex = -1; | ||||||
|  |       for (const endTag of this.knownEndTags) { | ||||||
|  |         const endIndex = text.indexOf(endTag, xmlStartIndex); | ||||||
|  |         if (endIndex !== -1) { | ||||||
|  |           xmlEndIndex = endIndex + endTag.length; | ||||||
|  |           break; | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       if (xmlEndIndex === -1) { | ||||||
|  |         return null; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       // Extract the XML content | ||||||
|  |       return text.substring(xmlStartIndex, xmlEndIndex); | ||||||
|  |     } catch (error) { | ||||||
|  |       console.error('Error extracting XML from string:', error); | ||||||
|  |       return null; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Decompress and decode XML content from a PDF stream | ||||||
|  |    * @param stream PDF stream containing XML data | ||||||
|  |    * @param fileName Name of the file (for logging) | ||||||
|  |    * @returns XML content or null if not valid | ||||||
|  |    */ | ||||||
|  |   protected async extractXmlFromStream(stream: PDFRawStream, fileName: string): Promise<string | null> { | ||||||
|  |     try { | ||||||
|  |       // Try to decompress with pako | ||||||
|  |       const compressedBytes = stream.getContents().buffer; | ||||||
|  |       try { | ||||||
|  |         const decompressedBytes = pako.inflate(compressedBytes); | ||||||
|  |         const xmlContent = new TextDecoder('utf-8').decode(decompressedBytes); | ||||||
|  |          | ||||||
|  |         if (this.isValidXml(xmlContent)) { | ||||||
|  |           console.log(`Successfully extracted decompressed XML from PDF file. File name: ${fileName}`); | ||||||
|  |           return xmlContent; | ||||||
|  |         } | ||||||
|  |       } catch (decompressError) { | ||||||
|  |         // Decompression failed, try without decompression | ||||||
|  |         console.log(`Decompression failed for ${fileName}, trying without decompression...`); | ||||||
|  |       } | ||||||
|  |        | ||||||
|  |       // Try without decompression | ||||||
|  |       const rawBytes = stream.getContents(); | ||||||
|  |       const rawContent = new TextDecoder('utf-8').decode(rawBytes); | ||||||
|  |        | ||||||
|  |       if (this.isValidXml(rawContent)) { | ||||||
|  |         console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${fileName}`); | ||||||
|  |         return rawContent; | ||||||
|  |       } | ||||||
|  |        | ||||||
|  |       return null; | ||||||
|  |     } catch (error) { | ||||||
|  |       console.error('Error extracting XML from stream:', error); | ||||||
|  |       return null; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
							
								
								
									
										4
									
								
								ts/formats/pdf/extractors/index.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								ts/formats/pdf/extractors/index.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | |||||||
|  | export * from './base.extractor.js'; | ||||||
|  | export * from './standard.extractor.js'; | ||||||
|  | export * from './associated.extractor.js'; | ||||||
|  | export * from './text.extractor.js'; | ||||||
							
								
								
									
										86
									
								
								ts/formats/pdf/extractors/standard.extractor.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								ts/formats/pdf/extractors/standard.extractor.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,86 @@ | |||||||
|  | import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib'; | ||||||
|  | import { BaseXMLExtractor } from './base.extractor.js'; | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * Standard PDF XML extractor that extracts XML from embedded files | ||||||
|  |  * Works with PDF/A-3 documents that follow the standard for embedding files | ||||||
|  |  */ | ||||||
|  | export class StandardXMLExtractor extends BaseXMLExtractor { | ||||||
|  |   /** | ||||||
|  |    * Extract XML from a PDF buffer using standard PDF/A-3 embedded files | ||||||
|  |    * @param pdfBuffer PDF buffer | ||||||
|  |    * @returns XML content or null if not found | ||||||
|  |    */ | ||||||
|  |   public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> { | ||||||
|  |     try { | ||||||
|  |       const pdfDoc = await PDFDocument.load(pdfBuffer); | ||||||
|  |  | ||||||
|  |       // Get the document's metadata dictionary | ||||||
|  |       const namesDictObj = pdfDoc.catalog.lookup(PDFName.of('Names')); | ||||||
|  |       if (!(namesDictObj instanceof PDFDict)) { | ||||||
|  |         console.warn('No Names dictionary found in PDF! This PDF does not contain embedded files.'); | ||||||
|  |         return null; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       // Get the embedded files dictionary | ||||||
|  |       const embeddedFilesDictObj = namesDictObj.lookup(PDFName.of('EmbeddedFiles')); | ||||||
|  |       if (!(embeddedFilesDictObj instanceof PDFDict)) { | ||||||
|  |         console.warn('No EmbeddedFiles dictionary found! This PDF does not contain embedded files.'); | ||||||
|  |         return null; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       // Get the names array | ||||||
|  |       const filesSpecObj = embeddedFilesDictObj.lookup(PDFName.of('Names')); | ||||||
|  |       if (!(filesSpecObj instanceof PDFArray)) { | ||||||
|  |         console.warn('No files specified in EmbeddedFiles dictionary!'); | ||||||
|  |         return null; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       // Try to find an XML file in the embedded files | ||||||
|  |       for (let i = 0; i < filesSpecObj.size(); i += 2) { | ||||||
|  |         const fileNameObj = filesSpecObj.lookup(i); | ||||||
|  |         const fileSpecObj = filesSpecObj.lookup(i + 1); | ||||||
|  |  | ||||||
|  |         if (!(fileNameObj instanceof PDFString) || !(fileSpecObj instanceof PDFDict)) { | ||||||
|  |           continue; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Get the filename as string | ||||||
|  |         const fileName = fileNameObj.decodeText(); | ||||||
|  |          | ||||||
|  |         // Check if it's a known invoice XML file name | ||||||
|  |         const isKnownFileName = this.knownFileNames.some( | ||||||
|  |           knownName => fileName.toLowerCase() === knownName.toLowerCase() | ||||||
|  |         ); | ||||||
|  |          | ||||||
|  |         // Check if it's any XML file or has invoice-related keywords | ||||||
|  |         const isXmlFile = fileName.toLowerCase().endsWith('.xml') ||  | ||||||
|  |                           fileName.toLowerCase().includes('zugferd') || | ||||||
|  |                           fileName.toLowerCase().includes('factur-x') || | ||||||
|  |                           fileName.toLowerCase().includes('xrechnung') || | ||||||
|  |                           fileName.toLowerCase().includes('invoice'); | ||||||
|  |          | ||||||
|  |         if (isKnownFileName || isXmlFile) { | ||||||
|  |           const efDictObj = fileSpecObj.lookup(PDFName.of('EF')); | ||||||
|  |           if (!(efDictObj instanceof PDFDict)) { | ||||||
|  |             continue; | ||||||
|  |           } | ||||||
|  |  | ||||||
|  |           const fileStream = efDictObj.lookup(PDFName.of('F')); | ||||||
|  |           if (fileStream instanceof PDFRawStream) { | ||||||
|  |             const xmlContent = await this.extractXmlFromStream(fileStream, fileName); | ||||||
|  |             if (xmlContent) { | ||||||
|  |               return xmlContent; | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       console.warn('No valid XML found in embedded files'); | ||||||
|  |       return null; | ||||||
|  |     } catch (error) { | ||||||
|  |       console.error('Error in standard extraction:', error); | ||||||
|  |       return null; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
							
								
								
									
										55
									
								
								ts/formats/pdf/extractors/text.extractor.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								ts/formats/pdf/extractors/text.extractor.ts
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | |||||||
|  | import { BaseXMLExtractor } from './base.extractor.js'; | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * Text-based XML extractor for PDF documents | ||||||
|  |  * Extracts XML by searching for XML patterns in the PDF text | ||||||
|  |  * Used as a fallback when other extraction methods fail | ||||||
|  |  */ | ||||||
|  | export class TextXMLExtractor extends BaseXMLExtractor { | ||||||
|  |   /** | ||||||
|  |    * Extract XML from a PDF buffer by searching for XML patterns in the text | ||||||
|  |    * @param pdfBuffer PDF buffer | ||||||
|  |    * @returns XML content or null if not found | ||||||
|  |    */ | ||||||
|  |   public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> { | ||||||
|  |     try { | ||||||
|  |       // Convert buffer to string and look for XML patterns | ||||||
|  |       // Increase the search range to handle larger PDFs | ||||||
|  |       const pdfString = Buffer.from(pdfBuffer).toString('utf8', 0, Math.min(pdfBuffer.length, 50000)); | ||||||
|  |  | ||||||
|  |       // Look for common XML patterns in the PDF | ||||||
|  |       const xmlPatterns = [ | ||||||
|  |         /<\?xml[^>]*\?>/i, | ||||||
|  |         /<CrossIndustryInvoice[^>]*>/i, | ||||||
|  |         /<CrossIndustryDocument[^>]*>/i, | ||||||
|  |         /<Invoice[^>]*>/i, | ||||||
|  |         /<CreditNote[^>]*>/i, | ||||||
|  |         /<rsm:CrossIndustryInvoice[^>]*>/i, | ||||||
|  |         /<rsm:CrossIndustryDocument[^>]*>/i, | ||||||
|  |         /<ram:CrossIndustryDocument[^>]*>/i, | ||||||
|  |         /<ubl:Invoice[^>]*>/i, | ||||||
|  |         /<ubl:CreditNote[^>]*>/i | ||||||
|  |       ]; | ||||||
|  |  | ||||||
|  |       for (const pattern of xmlPatterns) { | ||||||
|  |         const match = pdfString.match(pattern); | ||||||
|  |         if (match && match.index !== undefined) { | ||||||
|  |           console.log(`Found XML pattern in PDF: ${match[0]}`); | ||||||
|  |            | ||||||
|  |           // Try to extract the XML content | ||||||
|  |           const xmlContent = this.extractXmlFromString(pdfString, match.index); | ||||||
|  |           if (xmlContent && this.isValidXml(xmlContent)) { | ||||||
|  |             console.log('Successfully extracted XML from PDF text'); | ||||||
|  |             return xmlContent; | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       console.warn('No valid XML found in PDF text'); | ||||||
|  |       return null; | ||||||
|  |     } catch (error) { | ||||||
|  |       console.error('Error in text-based extraction:', error); | ||||||
|  |       return null; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
| @@ -1,30 +1,54 @@ | |||||||
| import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib'; | import { | ||||||
| import * as pako from 'pako'; |   BaseXMLExtractor, | ||||||
|  |   StandardXMLExtractor, | ||||||
|  |   AssociatedFilesExtractor, | ||||||
|  |   TextXMLExtractor | ||||||
|  | } from './extractors/index.js'; | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Class for extracting XML from PDF files |  * Main PDF extractor class that orchestrates the extraction process | ||||||
|  |  * Uses multiple specialized extractors in sequence to maximize success rate | ||||||
|  */ |  */ | ||||||
| export class PDFExtractor { | export class PDFExtractor { | ||||||
|  |   private extractors: BaseXMLExtractor[] = []; | ||||||
|  |  | ||||||
|   /** |   /** | ||||||
|    * Extracts XML from a PDF buffer |    * Constructor initializes the chain of extractors | ||||||
|  |    */ | ||||||
|  |   constructor() { | ||||||
|  |     // Add extractors in order of preference/likelihood of success | ||||||
|  |     this.extractors.push( | ||||||
|  |       new StandardXMLExtractor(),    // Standard PDF/A-3 embedded files | ||||||
|  |       new AssociatedFilesExtractor(), // Associated files (ZUGFeRD v1, some Factur-X) | ||||||
|  |       new TextXMLExtractor()          // Text-based extraction (fallback) | ||||||
|  |     ); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /** | ||||||
|  |    * Extract XML from a PDF buffer | ||||||
|  |    * Tries multiple extraction methods in sequence | ||||||
|    * @param pdfBuffer PDF buffer |    * @param pdfBuffer PDF buffer | ||||||
|    * @returns XML content or null if not found |    * @returns XML content or null if not found | ||||||
|    */ |    */ | ||||||
|   public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> { |   public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> { | ||||||
|     try { |     try { | ||||||
|       // First try the standard extraction |       console.log('Starting XML extraction from PDF...'); | ||||||
|       const standardXml = await this.standardExtraction(pdfBuffer); |  | ||||||
|       if (standardXml && this.isValidXml(standardXml)) { |       // Try each extractor in sequence | ||||||
|         return standardXml; |       for (const extractor of this.extractors) { | ||||||
|  |         const extractorName = extractor.constructor.name; | ||||||
|  |         console.log(`Trying extraction with ${extractorName}...`); | ||||||
|  |  | ||||||
|  |         const xml = await extractor.extractXml(pdfBuffer); | ||||||
|  |         if (xml) { | ||||||
|  |           console.log(`Successfully extracted XML using ${extractorName}`); | ||||||
|  |           return xml; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|       // If standard extraction fails, try alternative methods |         console.log(`Extraction with ${extractorName} failed, trying next method...`); | ||||||
|       const alternativeXml = await this.alternativeExtraction(pdfBuffer); |  | ||||||
|       if (alternativeXml && this.isValidXml(alternativeXml)) { |  | ||||||
|         return alternativeXml; |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       // If all extraction methods fail, return null |       // If all extractors fail, return null | ||||||
|       console.warn('All extraction methods failed, no valid XML found in PDF'); |       console.warn('All extraction methods failed, no valid XML found in PDF'); | ||||||
|       return null; |       return null; | ||||||
|     } catch (error) { |     } catch (error) { | ||||||
| @@ -33,255 +57,7 @@ export class PDFExtractor { | |||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   /** |  | ||||||
|    * Standard extraction method using PDF-lib |  | ||||||
|    * @param pdfBuffer PDF buffer |  | ||||||
|    * @returns XML content or null if not found |  | ||||||
|    */ |  | ||||||
|   private async standardExtraction(pdfBuffer: Uint8Array | Buffer): Promise<string | null> { |  | ||||||
|     try { |  | ||||||
|       const pdfDoc = await PDFDocument.load(pdfBuffer); |  | ||||||
|  |  | ||||||
|       // Get the document's metadata dictionary |  | ||||||
|       const namesDictObj = pdfDoc.catalog.lookup(PDFName.of('Names')); |  | ||||||
|       if (!(namesDictObj instanceof PDFDict)) { |  | ||||||
|         console.warn('No Names dictionary found in PDF! This PDF does not contain embedded files.'); |  | ||||||
|         return null; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       const embeddedFilesDictObj = namesDictObj.lookup(PDFName.of('EmbeddedFiles')); |  | ||||||
|       if (!(embeddedFilesDictObj instanceof PDFDict)) { |  | ||||||
|         console.warn('No EmbeddedFiles dictionary found! This PDF does not contain embedded files.'); |  | ||||||
|         return null; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       const filesSpecObj = embeddedFilesDictObj.lookup(PDFName.of('Names')); |  | ||||||
|       if (!(filesSpecObj instanceof PDFArray)) { |  | ||||||
|         console.warn('No files specified in EmbeddedFiles dictionary!'); |  | ||||||
|         return null; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       // Try to find an XML file in the embedded files |  | ||||||
|       let xmlFile: PDFRawStream | undefined; |  | ||||||
|       let xmlFileName: string | undefined; |  | ||||||
|  |  | ||||||
|       for (let i = 0; i < filesSpecObj.size(); i += 2) { |  | ||||||
|         const fileNameObj = filesSpecObj.lookup(i); |  | ||||||
|         const fileSpecObj = filesSpecObj.lookup(i + 1); |  | ||||||
|  |  | ||||||
|         if (!(fileNameObj instanceof PDFString)) { |  | ||||||
|           continue; |  | ||||||
|         } |  | ||||||
|         if (!(fileSpecObj instanceof PDFDict)) { |  | ||||||
|           continue; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Get the filename as string |  | ||||||
|         const fileName = fileNameObj.toString(); |  | ||||||
|  |  | ||||||
|         // Check if it's an XML file (checking both extension and known standard filenames) |  | ||||||
|         if (fileName.toLowerCase().includes('.xml') || |  | ||||||
|             fileName.toLowerCase().includes('factur-x') || |  | ||||||
|             fileName.toLowerCase().includes('zugferd') || |  | ||||||
|             fileName.toLowerCase().includes('xrechnung')) { |  | ||||||
|  |  | ||||||
|           const efDictObj = fileSpecObj.lookup(PDFName.of('EF')); |  | ||||||
|           if (!(efDictObj instanceof PDFDict)) { |  | ||||||
|             continue; |  | ||||||
|           } |  | ||||||
|  |  | ||||||
|           const maybeStream = efDictObj.lookup(PDFName.of('F')); |  | ||||||
|           if (maybeStream instanceof PDFRawStream) { |  | ||||||
|             // Found an XML file - save it |  | ||||||
|             xmlFile = maybeStream; |  | ||||||
|             xmlFileName = fileName; |  | ||||||
|             break; |  | ||||||
|           } |  | ||||||
|         } |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       // If no XML file was found, return null |  | ||||||
|       if (!xmlFile) { |  | ||||||
|         console.warn('No embedded XML file found in the PDF!'); |  | ||||||
|         return null; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       // Decompress and decode the XML content |  | ||||||
|       try { |  | ||||||
|         // Try to decompress with pako |  | ||||||
|         const xmlCompressedBytes = xmlFile.getContents().buffer; |  | ||||||
|         const xmlBytes = pako.inflate(xmlCompressedBytes); |  | ||||||
|         const xmlContent = new TextDecoder('utf-8').decode(xmlBytes); |  | ||||||
|  |  | ||||||
|         // Check if the XML content is valid |  | ||||||
|         if (this.isValidXml(xmlContent)) { |  | ||||||
|           console.log(`Successfully extracted XML from PDF file. File name: ${xmlFileName}`); |  | ||||||
|           return xmlContent; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // If we get here, the XML content is not valid, try without decompression |  | ||||||
|         console.log('Decompression succeeded but XML is not valid, trying without decompression...'); |  | ||||||
|         const rawXmlBytes = xmlFile.getContents(); |  | ||||||
|         const rawXmlContent = new TextDecoder('utf-8').decode(rawXmlBytes); |  | ||||||
|  |  | ||||||
|         if (this.isValidXml(rawXmlContent)) { |  | ||||||
|           console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${xmlFileName}`); |  | ||||||
|           return rawXmlContent; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // If we get here, neither the decompressed nor the raw XML content is valid |  | ||||||
|         console.log('Neither decompressed nor raw XML content is valid'); |  | ||||||
|         return null; |  | ||||||
|       } catch (decompressError) { |  | ||||||
|         // Decompression failed, try without decompression |  | ||||||
|         console.log('Decompression failed, trying without decompression...'); |  | ||||||
|         try { |  | ||||||
|           const xmlBytes = xmlFile.getContents(); |  | ||||||
|           const xmlContent = new TextDecoder('utf-8').decode(xmlBytes); |  | ||||||
|  |  | ||||||
|           if (this.isValidXml(xmlContent)) { |  | ||||||
|             console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${xmlFileName}`); |  | ||||||
|             return xmlContent; |  | ||||||
|           } |  | ||||||
|  |  | ||||||
|           // If we get here, the XML content is not valid |  | ||||||
|           console.log('Uncompressed XML content is not valid'); |  | ||||||
|           return null; |  | ||||||
|         } catch (decodeError) { |  | ||||||
|           console.error('Error decoding XML content:', decodeError); |  | ||||||
|           return null; |  | ||||||
|         } |  | ||||||
|       } |  | ||||||
|     } catch (error) { |  | ||||||
|       console.error('Error in standard extraction:', error); |  | ||||||
|       return null; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   /** |  | ||||||
|    * Alternative extraction method using string search |  | ||||||
|    * @param pdfBuffer PDF buffer |  | ||||||
|    * @returns XML content or null if not found |  | ||||||
|    */ |  | ||||||
|   private async alternativeExtraction(pdfBuffer: Uint8Array | Buffer): Promise<string | null> { |  | ||||||
|     try { |  | ||||||
|       // Convert buffer to string and look for XML patterns |  | ||||||
|       const pdfString = Buffer.from(pdfBuffer).toString('utf8', 0, Math.min(pdfBuffer.length, 10000)); |  | ||||||
|  |  | ||||||
|       // Look for common XML patterns in the PDF |  | ||||||
|       const xmlPatterns = [ |  | ||||||
|         /<\?xml[^>]*\?>/i, |  | ||||||
|         /<CrossIndustryInvoice[^>]*>/i, |  | ||||||
|         /<Invoice[^>]*>/i, |  | ||||||
|         /<CreditNote[^>]*>/i, |  | ||||||
|         /<rsm:CrossIndustryInvoice[^>]*>/i |  | ||||||
|       ]; |  | ||||||
|  |  | ||||||
|       for (const pattern of xmlPatterns) { |  | ||||||
|         const match = pdfString.match(pattern); |  | ||||||
|         if (match) { |  | ||||||
|           console.log(`Found XML pattern in PDF: ${match[0]}`); |  | ||||||
|  |  | ||||||
|           // Try to extract the XML content |  | ||||||
|           const xmlContent = this.extractXmlFromString(pdfString); |  | ||||||
|           if (xmlContent) { |  | ||||||
|             console.log('Successfully extracted XML from PDF string'); |  | ||||||
|             return xmlContent; |  | ||||||
|           } |  | ||||||
|         } |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       return null; |  | ||||||
|     } catch (error) { |  | ||||||
|       console.error('Error in alternative extraction:', error); |  | ||||||
|       return null; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   /** |  | ||||||
|    * Extracts XML from a string |  | ||||||
|    * @param pdfString PDF string |  | ||||||
|    * @returns XML content or null if not found |  | ||||||
|    */ |  | ||||||
|   private extractXmlFromString(pdfString: string): string | null { |  | ||||||
|     try { |  | ||||||
|       // Look for XML start and end tags |  | ||||||
|       const xmlStartIndex = pdfString.indexOf('<?xml'); |  | ||||||
|       if (xmlStartIndex === -1) { |  | ||||||
|         return null; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       // Try to find the end of the XML document |  | ||||||
|       const possibleEndTags = [ |  | ||||||
|         '</CrossIndustryInvoice>', |  | ||||||
|         '</Invoice>', |  | ||||||
|         '</CreditNote>', |  | ||||||
|         '</rsm:CrossIndustryInvoice>' |  | ||||||
|       ]; |  | ||||||
|  |  | ||||||
|       let xmlEndIndex = -1; |  | ||||||
|       for (const endTag of possibleEndTags) { |  | ||||||
|         const endIndex = pdfString.indexOf(endTag); |  | ||||||
|         if (endIndex !== -1) { |  | ||||||
|           xmlEndIndex = endIndex + endTag.length; |  | ||||||
|           break; |  | ||||||
|         } |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       if (xmlEndIndex === -1) { |  | ||||||
|         return null; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       // Extract the XML content |  | ||||||
|       return pdfString.substring(xmlStartIndex, xmlEndIndex); |  | ||||||
|     } catch (error) { |  | ||||||
|       console.error('Error extracting XML from string:', error); |  | ||||||
|       return null; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   /** |  | ||||||
|    * Checks if an XML string is valid |  | ||||||
|    * @param xmlString XML string to check |  | ||||||
|    * @returns True if the XML is valid |  | ||||||
|    */ |  | ||||||
|   private isValidXml(xmlString: string): boolean { |  | ||||||
|     try { |  | ||||||
|       // Check if the XML string contains basic XML structure |  | ||||||
|       if (!xmlString.includes('<?xml')) { |  | ||||||
|         return false; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       // Check if the XML string contains known invoice formats |  | ||||||
|       const knownFormats = [ |  | ||||||
|         'CrossIndustryInvoice', |  | ||||||
|         'Invoice', |  | ||||||
|         'CreditNote', |  | ||||||
|         'ubl:Invoice', |  | ||||||
|         'ubl:CreditNote' |  | ||||||
|       ]; |  | ||||||
|  |  | ||||||
|       const hasKnownFormat = knownFormats.some(format => xmlString.includes(format)); |  | ||||||
|       if (!hasKnownFormat) { |  | ||||||
|         return false; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       // Check if the XML string contains binary data or invalid characters |  | ||||||
|       const invalidChars = ['\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005']; |  | ||||||
|       const hasBinaryData = invalidChars.some(char => xmlString.includes(char)); |  | ||||||
|       if (hasBinaryData) { |  | ||||||
|         return false; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       // Check if the XML string is too short |  | ||||||
|       if (xmlString.length < 100) { |  | ||||||
|         return false; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       return true; |  | ||||||
|     } catch (error) { |  | ||||||
|       console.error('Error validating XML:', error); |  | ||||||
|       return false; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,5 +1,7 @@ | |||||||
| import { InvoiceFormat } from '../../interfaces/common.js'; | import { InvoiceFormat } from '../../interfaces/common.js'; | ||||||
| import { DOMParser } from 'xmldom'; | import { DOMParser } from 'xmldom'; | ||||||
|  | import * as xpath from 'xpath'; | ||||||
|  | import { CII_PROFILE_IDS, ZUGFERD_V1_NAMESPACES } from '../cii/cii.types.js'; | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Utility class for detecting invoice formats |  * Utility class for detecting invoice formats | ||||||
| @@ -26,12 +28,92 @@ export class FormatDetector { | |||||||
|         return InvoiceFormat.XRECHNUNG; |         return InvoiceFormat.XRECHNUNG; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       // Factur-X/ZUGFeRD detection (CrossIndustryInvoice root element) |       // Factur-X/ZUGFeRD detection (CrossIndustryInvoice or CrossIndustryDocument root element) | ||||||
|       if (root.nodeName === 'rsm:CrossIndustryInvoice' || root.nodeName === 'CrossIndustryInvoice') { |       if (root.nodeName === 'rsm:CrossIndustryInvoice' || root.nodeName === 'CrossIndustryInvoice') { | ||||||
|         // For simplicity, we'll treat all CII documents as Factur-X for now |         // Set up namespaces for XPath queries (ZUGFeRD v2/Factur-X) | ||||||
|         // In a real implementation, we would check for specific profiles |         const namespaces = { | ||||||
|  |           rsm: 'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100', | ||||||
|  |           ram: 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100' | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         // Create XPath selector with namespaces | ||||||
|  |         const select = xpath.useNamespaces(namespaces); | ||||||
|  |  | ||||||
|  |         // Look for profile identifier | ||||||
|  |         const profileNode = select( | ||||||
|  |           'string(//rsm:ExchangedDocumentContext/ram:GuidelineSpecifiedDocumentContextParameter/ram:ID)', | ||||||
|  |           doc | ||||||
|  |         ); | ||||||
|  |  | ||||||
|  |         if (profileNode) { | ||||||
|  |           const profileText = profileNode.toString(); | ||||||
|  |  | ||||||
|  |           // Check for ZUGFeRD profiles | ||||||
|  |           if (profileText.includes('zugferd') || | ||||||
|  |               profileText === CII_PROFILE_IDS.ZUGFERD_BASIC || | ||||||
|  |               profileText === CII_PROFILE_IDS.ZUGFERD_COMFORT || | ||||||
|  |               profileText === CII_PROFILE_IDS.ZUGFERD_EXTENDED) { | ||||||
|  |             return InvoiceFormat.ZUGFERD; | ||||||
|  |           } | ||||||
|  |  | ||||||
|  |           // Check for Factur-X profiles | ||||||
|  |           if (profileText.includes('factur-x') || | ||||||
|  |               profileText === CII_PROFILE_IDS.FACTURX_MINIMUM || | ||||||
|  |               profileText === CII_PROFILE_IDS.FACTURX_BASIC || | ||||||
|  |               profileText === CII_PROFILE_IDS.FACTURX_EN16931) { | ||||||
|             return InvoiceFormat.FACTURX; |             return InvoiceFormat.FACTURX; | ||||||
|           } |           } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // If we can't determine the specific CII format, default to generic CII | ||||||
|  |         return InvoiceFormat.CII; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       // ZUGFeRD v1 detection (CrossIndustryDocument root element) | ||||||
|  |       if (root.nodeName === 'rsm:CrossIndustryDocument' || root.nodeName === 'CrossIndustryDocument' || | ||||||
|  |           root.nodeName === 'ram:CrossIndustryDocument') { | ||||||
|  |  | ||||||
|  |         // Check for ZUGFeRD v1 namespace in the document | ||||||
|  |         const xmlString = xml.toString(); | ||||||
|  |         if (xmlString.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') || | ||||||
|  |             xmlString.includes('urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12')) { | ||||||
|  |           return InvoiceFormat.ZUGFERD; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Set up namespaces for XPath queries (ZUGFeRD v1) | ||||||
|  |         try { | ||||||
|  |           const namespaces = { | ||||||
|  |             rsm: ZUGFERD_V1_NAMESPACES.RSM, | ||||||
|  |             ram: ZUGFERD_V1_NAMESPACES.RAM | ||||||
|  |           }; | ||||||
|  |  | ||||||
|  |           // Create XPath selector with namespaces | ||||||
|  |           const select = xpath.useNamespaces(namespaces); | ||||||
|  |  | ||||||
|  |           // Look for profile identifier | ||||||
|  |           const profileNode = select( | ||||||
|  |             'string(//rsm:SpecifiedExchangedDocumentContext/ram:GuidelineSpecifiedDocumentContextParameter/ram:ID)', | ||||||
|  |             doc | ||||||
|  |           ); | ||||||
|  |  | ||||||
|  |           if (profileNode) { | ||||||
|  |             const profileText = profileNode.toString(); | ||||||
|  |  | ||||||
|  |             // Check for ZUGFeRD v1 profiles | ||||||
|  |             if (profileText.includes('ferd:CrossIndustryDocument:invoice:1p0') || | ||||||
|  |                 profileText === CII_PROFILE_IDS.ZUGFERD_V1_BASIC || | ||||||
|  |                 profileText === CII_PROFILE_IDS.ZUGFERD_V1_COMFORT || | ||||||
|  |                 profileText === CII_PROFILE_IDS.ZUGFERD_V1_EXTENDED) { | ||||||
|  |               return InvoiceFormat.ZUGFERD; | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         } catch (error) { | ||||||
|  |           console.log('Error in ZUGFeRD v1 XPath detection:', error); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // If we can't determine the specific profile but it's a CrossIndustryDocument, it's likely ZUGFeRD v1 | ||||||
|  |         return InvoiceFormat.ZUGFERD; | ||||||
|  |       } | ||||||
|  |  | ||||||
|       // FatturaPA detection would be implemented here |       // FatturaPA detection would be implemented here | ||||||
|       if (root.nodeName === 'FatturaElettronica' || |       if (root.nodeName === 'FatturaElettronica' || | ||||||
|   | |||||||
							
								
								
									
										24
									
								
								ts/index.ts
									
									
									
									
									
								
							
							
						
						
									
										24
									
								
								ts/index.ts
									
									
									
									
									
								
							| @@ -27,6 +27,12 @@ import { CIIBaseValidator } from './formats/cii/cii.validator.js'; | |||||||
| // Import PDF utilities | // Import PDF utilities | ||||||
| import { PDFEmbedder } from './formats/pdf/pdf.embedder.js'; | import { PDFEmbedder } from './formats/pdf/pdf.embedder.js'; | ||||||
| import { PDFExtractor } from './formats/pdf/pdf.extractor.js'; | import { PDFExtractor } from './formats/pdf/pdf.extractor.js'; | ||||||
|  | import { | ||||||
|  |   BaseXMLExtractor, | ||||||
|  |   StandardXMLExtractor, | ||||||
|  |   AssociatedFilesExtractor, | ||||||
|  |   TextXMLExtractor | ||||||
|  | } from './formats/pdf/extractors/index.js'; | ||||||
|  |  | ||||||
| // Import format detector | // Import format detector | ||||||
| import { FormatDetector } from './formats/utils/format.detector.js'; | import { FormatDetector } from './formats/utils/format.detector.js'; | ||||||
| @@ -36,6 +42,12 @@ import { FacturXDecoder } from './formats/cii/facturx/facturx.decoder.js'; | |||||||
| import { FacturXEncoder } from './formats/cii/facturx/facturx.encoder.js'; | import { FacturXEncoder } from './formats/cii/facturx/facturx.encoder.js'; | ||||||
| import { FacturXValidator } from './formats/cii/facturx/facturx.validator.js'; | import { FacturXValidator } from './formats/cii/facturx/facturx.validator.js'; | ||||||
|  |  | ||||||
|  | // Import ZUGFeRD implementation | ||||||
|  | import { ZUGFeRDDecoder } from './formats/cii/zugferd/zugferd.decoder.js'; | ||||||
|  | import { ZUGFeRDEncoder } from './formats/cii/zugferd/zugferd.encoder.js'; | ||||||
|  | import { ZUGFeRDValidator } from './formats/cii/zugferd/zugferd.validator.js'; | ||||||
|  | import { ZUGFeRDV1Decoder } from './formats/cii/zugferd/zugferd.v1.decoder.js'; | ||||||
|  |  | ||||||
| // Export interfaces | // Export interfaces | ||||||
| export type { | export type { | ||||||
|   // Common interfaces |   // Common interfaces | ||||||
| @@ -80,8 +92,18 @@ export { CIIBaseDecoder, CIIBaseEncoder, CIIBaseValidator }; | |||||||
| // Export Factur-X implementation | // Export Factur-X implementation | ||||||
| export { FacturXDecoder, FacturXEncoder, FacturXValidator }; | export { FacturXDecoder, FacturXEncoder, FacturXValidator }; | ||||||
|  |  | ||||||
|  | // Export ZUGFeRD implementation | ||||||
|  | export { ZUGFeRDDecoder, ZUGFeRDEncoder, ZUGFeRDValidator, ZUGFeRDV1Decoder }; | ||||||
|  |  | ||||||
| // Export PDF utilities | // Export PDF utilities | ||||||
| export { PDFEmbedder, PDFExtractor }; | export { | ||||||
|  |   PDFEmbedder, | ||||||
|  |   PDFExtractor, | ||||||
|  |   BaseXMLExtractor, | ||||||
|  |   StandardXMLExtractor, | ||||||
|  |   AssociatedFilesExtractor, | ||||||
|  |   TextXMLExtractor | ||||||
|  | }; | ||||||
|  |  | ||||||
| // Export format detector | // Export format detector | ||||||
| export { FormatDetector }; | export { FormatDetector }; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user