diff --git a/changelog.md b/changelog.md index 25fb1b2..f8c161c 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,14 @@ # Changelog +## 2025-04-03 - 4.1.0 - feat(ZUGFERD) +Add dedicated ZUGFERD v1/v2 support and refine invoice format detection logic + +- Improve FormatDetector to differentiate between Factur-X, ZUGFERD v1, and ZUGFERD v2 formats +- Introduce dedicated ZUGFERD decoder, encoder, and validator implementations +- Update factories to use ZUGFERD-specific classes rather than reusing FacturX implementations +- Enhance PDF XML extraction by consolidating multiple extractor strategies +- Update module exports and documentation hints for improved testing and integration + ## 2025-03-20 - 3.0.1 - fix(test/pdf-export) Improve PDF export tests with detailed logging and enhanced embedded file structure verification. diff --git a/readme.hints.md b/readme.hints.md index e69de29..0ee95ce 100644 --- a/readme.hints.md +++ b/readme.hints.md @@ -0,0 +1,12 @@ +For testing use + +```typescript +import {tap, expect} @push.rocks/tapbundle +``` + +tapbundle exports expect from @push.rocks/smartexpect +You can find the readme here: https://code.foss.global/push.rocks/smartexpect/src/branch/master/readme.md + +Don't use shortcuts when doing things, e.g. creating sample data in order to not implement something correctly, or skipping tests, and calling it a day. + +It is ok to ask questions, if you are unsure about something. diff --git a/test/output/corpus-master-results.json b/test/output/corpus-master-results.json new file mode 100644 index 0000000..7bdafef --- /dev/null +++ b/test/output/corpus-master-results.json @@ -0,0 +1,17 @@ +{ + "test.zugferd-corpus.ts": { + "error": "No results file found" + }, + "test.xml-rechnung-corpus.ts": { + "error": "No results file found" + }, + "test.other-formats-corpus.ts": { + "error": "No results file found" + }, + "test.validation-corpus.ts": { + "error": "No results file found" + }, + "test.circular-corpus.ts": { + "error": "No results file found" + } +} \ No newline at end of file diff --git a/test/output/corpus-summary.md b/test/output/corpus-summary.md new file mode 100644 index 0000000..5d59cb7 --- /dev/null +++ b/test/output/corpus-summary.md @@ -0,0 +1,13 @@ +# XInvoice Corpus Testing Summary + +Generated on: 2025-04-03T19:22:13.546Z + +## Overall Summary + +| Test | Success Rate | Files Tested | +|------|--------------|-------------| +| test.zugferd-corpus.ts | Error: No results file found | N/A | +| test.xml-rechnung-corpus.ts | Error: No results file found | N/A | +| test.other-formats-corpus.ts | Error: No results file found | N/A | +| test.validation-corpus.ts | Error: No results file found | N/A | +| test.circular-corpus.ts | Error: No results file found | N/A | diff --git a/test/output/other-formats-corpus-results.json b/test/output/other-formats-corpus-results.json new file mode 100644 index 0000000..066a66b --- /dev/null +++ b/test/output/other-formats-corpus-results.json @@ -0,0 +1,26 @@ +{ + "peppol": { + "success": 2, + "fail": 0, + "details": [ + { + "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/PEPPOL/Valid/Qvalia/Large_Invoice_sample1.xml", + "success": true, + "format": "xrechnung", + "error": null + }, + { + "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/PEPPOL/Valid/Qvalia/Large_Invoice_sample2.xml", + "success": true, + "format": "xrechnung", + "error": null + } + ] + }, + "fatturapa": { + "success": 0, + "fail": 0, + "details": [] + }, + "totalSuccessRate": 1 +} \ No newline at end of file diff --git a/test/output/test-invoice-reextracted.xml b/test/output/test-invoice-reextracted.xml index f518e93..58d413a 100644 --- a/test/output/test-invoice-reextracted.xml +++ b/test/output/test-invoice-reextracted.xml @@ -1,3 +1,3 @@ -urn:cen.eu:en16931:2017380PDF-174369831342020250403PDF Seller0PDF Buyer0EUR202505030.000.000.000.00 \ No newline at end of file +urn:cen.eu:en16931:2017380471102NaNNaNNaNLieferant GmbHLieferantenstraße 20080333MünchenDEDE123456789201/113/40209Kunden AG MitteKundenstraße 15069876FrankfurtDEEURNaNNaNNaN473.0056.87529.87529.871Trennblätter A4TB100A49.9020VATS19198.002Joghurt BananeARNR25.5050VATS7275.00 \ No newline at end of file diff --git a/test/output/test-invoice-with-xml.pdf b/test/output/test-invoice-with-xml.pdf index b5337ef..0ac91b8 100644 Binary files a/test/output/test-invoice-with-xml.pdf and b/test/output/test-invoice-with-xml.pdf differ diff --git a/test/output/validation-corpus-results.json b/test/output/validation-corpus-results.json index db7272a..61f0e04 100644 --- a/test/output/validation-corpus-results.json +++ b/test/output/validation-corpus-results.json @@ -54,9 +54,9 @@ { "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/fail/FNFE-factur-x-examples/Avoir_FR_type380_EN16931.pdf", "success": false, - "valid": null, - "errors": null, - "error": "Error: No XML found in PDF" + "valid": true, + "errors": [], + "error": "Validation result (true) doesn't match expectation (false)" }, { "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/fail/FNFE-factur-x-examples/Avoir_FR_type380_MINIMUM.pdf", @@ -75,9 +75,9 @@ { "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/fail/FNFE-factur-x-examples/Avoir_FR_type381_EN16931.pdf", "success": false, - "valid": null, - "errors": null, - "error": "Error: No XML found in PDF" + "valid": true, + "errors": [], + "error": "Validation result (true) doesn't match expectation (false)" } ] }, diff --git a/test/output/xml-rechnung-corpus-results.json b/test/output/xml-rechnung-corpus-results.json index 1d139a4..1aa1ad5 100644 --- a/test/output/xml-rechnung-corpus-results.json +++ b/test/output/xml-rechnung-corpus-results.json @@ -138,25 +138,25 @@ { "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Betriebskostenabrechnung.cii.xml", "success": true, - "format": "facturx", + "format": "cii", "error": null }, { "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Einfach.cii.xml", "success": true, - "format": "facturx", + "format": "cii", "error": null }, { "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Elektron.cii.xml", "success": true, - "format": "facturx", + "format": "cii", "error": null }, { "file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Reisekostenabrechnung.cii.xml", "success": true, - "format": "facturx", + "format": "cii", "error": null }, { diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts new file mode 100644 index 0000000..d61f273 --- /dev/null +++ b/ts/00_commitinfo_data.ts @@ -0,0 +1,8 @@ +/** + * autocreated commitinfo by @push.rocks/commitinfo + */ +export const commitinfo = { + name: '@fin.cx/xinvoice', + version: '4.1.0', + description: 'A TypeScript module for creating, manipulating, and embedding XML data within PDF files specifically tailored for xinvoice packages.' +} diff --git a/ts/classes.xinvoice.ts b/ts/classes.xinvoice.ts index dbd1176..7262ffd 100644 --- a/ts/classes.xinvoice.ts +++ b/ts/classes.xinvoice.ts @@ -186,7 +186,8 @@ export class XInvoice { */ public async loadPdf(pdfBuffer: Uint8Array | Buffer, validate: boolean = false): Promise { try { - // Extract XML from PDF + // Extract XML from PDF using the consolidated extractor + // which tries multiple extraction methods in sequence const xmlContent = await this.pdfExtractor.extractXml(pdfBuffer); // Store the PDF buffer diff --git a/ts/formats/cii/cii.types.ts b/ts/formats/cii/cii.types.ts index 2d24124..05aa643 100644 --- a/ts/formats/cii/cii.types.ts +++ b/ts/formats/cii/cii.types.ts @@ -2,13 +2,20 @@ * CII-specific types and constants */ -// CII namespaces +// CII namespaces (ZUGFeRD v2/Factur-X) export const CII_NAMESPACES = { RSM: 'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100', RAM: 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100', UDT: 'urn:un:unece:uncefact:data:standard:UnqualifiedDataType:100' }; +// ZUGFeRD v1 namespaces +export const ZUGFERD_V1_NAMESPACES = { + RSM: 'urn:ferd:CrossIndustryDocument:invoice:1p0', + RAM: 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12', + UDT: 'urn:un:unece:uncefact:data:standard:UnqualifiedDataType:15' +}; + // CII profiles export enum CIIProfile { BASIC = 'BASIC', @@ -20,10 +27,18 @@ export enum CIIProfile { // CII profile IDs for different formats export const CII_PROFILE_IDS = { + // Factur-X profiles FACTURX_MINIMUM: 'urn:factur-x.eu:1p0:minimum', FACTURX_BASIC: 'urn:factur-x.eu:1p0:basicwl', FACTURX_EN16931: 'urn:cen.eu:en16931:2017', + + // ZUGFeRD v2 profiles ZUGFERD_BASIC: 'urn:zugferd:basic', ZUGFERD_COMFORT: 'urn:zugferd:comfort', - ZUGFERD_EXTENDED: 'urn:zugferd:extended' + ZUGFERD_EXTENDED: 'urn:zugferd:extended', + + // ZUGFeRD v1 profiles + ZUGFERD_V1_BASIC: 'urn:ferd:CrossIndustryDocument:invoice:1p0:basic', + ZUGFERD_V1_COMFORT: 'urn:ferd:CrossIndustryDocument:invoice:1p0:comfort', + ZUGFERD_V1_EXTENDED: 'urn:ferd:CrossIndustryDocument:invoice:1p0:extended' }; diff --git a/ts/formats/cii/zugferd/zugferd.decoder.ts b/ts/formats/cii/zugferd/zugferd.decoder.ts new file mode 100644 index 0000000..bc01aac --- /dev/null +++ b/ts/formats/cii/zugferd/zugferd.decoder.ts @@ -0,0 +1,220 @@ +import { CIIBaseDecoder } from '../cii.decoder.js'; +import type { TInvoice, TCreditNote, TDebitNote } from '../../../interfaces/common.js'; +import { ZUGFERD_PROFILE_IDS } from './zugferd.types.js'; +import { business, finance, general } from '@tsclass/tsclass'; + +/** + * Decoder for ZUGFeRD invoice format + */ +export class ZUGFeRDDecoder extends CIIBaseDecoder { + /** + * Decodes a ZUGFeRD credit note + * @returns Promise resolving to a TCreditNote object + */ + protected async decodeCreditNote(): Promise { + // Get common invoice data + const commonData = await this.extractCommonData(); + + // Create a credit note with the common data + return { + ...commonData, + invoiceType: 'creditnote' + } as TCreditNote; + } + + /** + * Decodes a ZUGFeRD debit note (invoice) + * @returns Promise resolving to a TDebitNote object + */ + protected async decodeDebitNote(): Promise { + // Get common invoice data + const commonData = await this.extractCommonData(); + + // Create a debit note with the common data + return { + ...commonData, + invoiceType: 'debitnote' + } as TDebitNote; + } + + /** + * Extracts common invoice data from ZUGFeRD XML + * @returns Common invoice data + */ + private async extractCommonData(): Promise> { + // Extract invoice ID + const invoiceId = this.getText('//rsm:ExchangedDocument/ram:ID'); + + // Extract issue date + const issueDateStr = this.getText('//ram:IssueDateTime/udt:DateTimeString'); + const issueDate = issueDateStr ? new Date(issueDateStr).getTime() : Date.now(); + + // Extract seller information + const seller = this.extractParty('//ram:SellerTradeParty'); + + // Extract buyer information + const buyer = this.extractParty('//ram:BuyerTradeParty'); + + // Extract items + const items = this.extractItems(); + + // Extract due date + const dueDateStr = this.getText('//ram:SpecifiedTradePaymentTerms/ram:DueDateDateTime/udt:DateTimeString'); + const dueDate = dueDateStr ? new Date(dueDateStr).getTime() : Date.now(); + const dueInDays = Math.round((dueDate - issueDate) / (1000 * 60 * 60 * 24)); + + // Extract currency + const currencyCode = this.getText('//ram:InvoiceCurrencyCode') || 'EUR'; + + // Extract total amount + const totalAmount = this.getNumber('//ram:GrandTotalAmount'); + + // Extract notes + const notes = this.extractNotes(); + + // Check for reverse charge + const reverseCharge = this.exists('//ram:SpecifiedTradeAllowanceCharge/ram:ReasonCode[text()="62"]'); + + // Create the common invoice data + return { + type: 'invoice', + id: invoiceId, + date: issueDate, + status: 'invoice', + versionInfo: { + type: 'final', + version: '1.0.0' + }, + language: 'en', + incidenceId: invoiceId, + from: seller, + to: buyer, + subject: `Invoice ${invoiceId}`, + items: items, + dueInDays: dueInDays, + reverseCharge: reverseCharge, + currency: currencyCode as finance.TCurrency, + notes: notes, + deliveryDate: issueDate, + objectActions: [], + invoiceType: 'debitnote' // Default to debit note, will be overridden in decode methods + }; + } + + /** + * Extracts party information from ZUGFeRD XML + * @param partyXPath XPath to the party node + * @returns Party information as TContact + */ + private extractParty(partyXPath: string): business.TContact { + // Extract name + const name = this.getText(`${partyXPath}/ram:Name`); + + // Extract address + const street = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:LineOne`); + const city = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:CityName`); + const zip = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:PostcodeCode`); + const country = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:CountryID`); + + // Create address object + const address = { + street: street, + city: city, + zip: zip, + country: country + }; + + // Extract VAT ID + const vatId = this.getText(`${partyXPath}/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="VA"]`) || ''; + + // Extract registration ID + const registrationId = this.getText(`${partyXPath}/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="FC"]`) || ''; + + // Create contact object + return { + type: 'company', + name: name, + description: '', + address: address, + status: 'active', + foundedDate: this.createDefaultDate(), + registrationDetails: { + vatId: vatId, + registrationId: registrationId, + registrationName: '' + } + } as business.TContact; + } + + /** + * Extracts invoice items from ZUGFeRD XML + * @returns Array of invoice items + */ + private extractItems(): finance.TInvoiceItem[] { + const items: finance.TInvoiceItem[] = []; + + // Get all item nodes + const itemNodes = this.select('//ram:IncludedSupplyChainTradeLineItem', this.doc); + + // Process each item + if (Array.isArray(itemNodes)) { + for (let i = 0; i < itemNodes.length; i++) { + const itemNode = itemNodes[i]; + + // Extract item data + const name = this.getText('ram:SpecifiedTradeProduct/ram:Name', itemNode); + const articleNumber = this.getText('ram:SpecifiedTradeProduct/ram:SellerAssignedID', itemNode); + const unitQuantity = this.getNumber('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity', itemNode); + const unitType = this.getText('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity/@unitCode', itemNode) || 'EA'; + const unitNetPrice = this.getNumber('ram:SpecifiedLineTradeAgreement/ram:NetPriceProductTradePrice/ram:ChargeAmount', itemNode); + const vatPercentage = this.getNumber('ram:SpecifiedLineTradeSettlement/ram:ApplicableTradeTax/ram:RateApplicablePercent', itemNode); + + // Create item object + items.push({ + position: i + 1, + name: name, + articleNumber: articleNumber, + unitType: unitType, + unitQuantity: unitQuantity, + unitNetPrice: unitNetPrice, + vatPercentage: vatPercentage + }); + } + } + + return items; + } + + /** + * Extracts notes from ZUGFeRD XML + * @returns Array of notes + */ + private extractNotes(): string[] { + const notes: string[] = []; + + // Get all note nodes + const noteNodes = this.select('//ram:IncludedNote', this.doc); + + // Process each note + if (Array.isArray(noteNodes)) { + for (let i = 0; i < noteNodes.length; i++) { + const noteNode = noteNodes[i]; + const noteText = this.getText('ram:Content', noteNode); + + if (noteText) { + notes.push(noteText); + } + } + } + + return notes; + } + + /** + * Creates a default date for empty date fields + * @returns Default date as timestamp + */ + private createDefaultDate(): number { + return new Date('2000-01-01').getTime(); + } +} diff --git a/ts/formats/cii/zugferd/zugferd.encoder.ts b/ts/formats/cii/zugferd/zugferd.encoder.ts new file mode 100644 index 0000000..de6d454 --- /dev/null +++ b/ts/formats/cii/zugferd/zugferd.encoder.ts @@ -0,0 +1,21 @@ +import { CIIBaseEncoder } from '../cii.encoder.js'; +import type { TInvoice } from '../../../interfaces/common.js'; +import { ZUGFERD_PROFILE_IDS } from './zugferd.types.js'; + +/** + * Encoder for ZUGFeRD invoice format + */ +export class ZUGFeRDEncoder extends CIIBaseEncoder { + /** + * Creates ZUGFeRD XML from invoice data + * @param invoice Invoice data + * @returns ZUGFeRD XML string + */ + public async createXml(invoice: TInvoice): Promise { + // Set ZUGFeRD-specific profile ID + this.profileId = ZUGFERD_PROFILE_IDS.BASIC; + + // Use the base CII encoder to create the XML + return super.createXml(invoice); + } +} diff --git a/ts/formats/cii/zugferd/zugferd.types.ts b/ts/formats/cii/zugferd/zugferd.types.ts new file mode 100644 index 0000000..79ee0b5 --- /dev/null +++ b/ts/formats/cii/zugferd/zugferd.types.ts @@ -0,0 +1,18 @@ +import { CIIProfile, CII_PROFILE_IDS } from '../cii.types.js'; + +/** + * ZUGFeRD specific constants and types + */ + +// ZUGFeRD profile IDs +export const ZUGFERD_PROFILE_IDS = { + BASIC: CII_PROFILE_IDS.ZUGFERD_BASIC, + COMFORT: CII_PROFILE_IDS.ZUGFERD_COMFORT, + EXTENDED: CII_PROFILE_IDS.ZUGFERD_EXTENDED +}; + +// ZUGFeRD PDF attachment filename +export const ZUGFERD_ATTACHMENT_FILENAME = 'zugferd-invoice.xml'; + +// ZUGFeRD PDF attachment description +export const ZUGFERD_ATTACHMENT_DESCRIPTION = 'ZUGFeRD XML Invoice'; diff --git a/ts/formats/cii/zugferd/zugferd.v1.decoder.ts b/ts/formats/cii/zugferd/zugferd.v1.decoder.ts new file mode 100644 index 0000000..9141025 --- /dev/null +++ b/ts/formats/cii/zugferd/zugferd.v1.decoder.ts @@ -0,0 +1,234 @@ +import { CIIBaseDecoder } from '../cii.decoder.js'; +import type { TInvoice, TCreditNote, TDebitNote } from '../../../interfaces/common.js'; +import { ZUGFERD_V1_NAMESPACES } from '../cii.types.js'; +import { business, finance, general } from '@tsclass/tsclass'; + +/** + * Decoder for ZUGFeRD v1 invoice format + */ +export class ZUGFeRDV1Decoder extends CIIBaseDecoder { + /** + * Constructor + * @param xml XML string to decode + */ + constructor(xml: string) { + super(xml); + // Override namespaces for ZUGFeRD v1 + this.namespaces = { + rsm: ZUGFERD_V1_NAMESPACES.RSM, + ram: ZUGFERD_V1_NAMESPACES.RAM, + udt: ZUGFERD_V1_NAMESPACES.UDT + }; + } + + /** + * Decodes a ZUGFeRD v1 credit note + * @returns Promise resolving to a TCreditNote object + */ + protected async decodeCreditNote(): Promise { + // Get common invoice data + const commonData = await this.extractCommonData(); + + // Create a credit note with the common data + return { + ...commonData, + invoiceType: 'creditnote' + } as TCreditNote; + } + + /** + * Decodes a ZUGFeRD v1 debit note (invoice) + * @returns Promise resolving to a TDebitNote object + */ + protected async decodeDebitNote(): Promise { + // Get common invoice data + const commonData = await this.extractCommonData(); + + // Create a debit note with the common data + return { + ...commonData, + invoiceType: 'debitnote' + } as TDebitNote; + } + + /** + * Extracts common invoice data from ZUGFeRD v1 XML + * @returns Common invoice data + */ + private async extractCommonData(): Promise> { + // Extract invoice ID + const invoiceId = this.getText('//ram:ID'); + + // Extract issue date + const issueDateStr = this.getText('//ram:IssueDateTime/udt:DateTimeString'); + const issueDate = issueDateStr ? new Date(issueDateStr).getTime() : Date.now(); + + // Extract seller information + const seller = this.extractParty('//ram:SellerTradeParty'); + + // Extract buyer information + const buyer = this.extractParty('//ram:BuyerTradeParty'); + + // Extract items + const items = this.extractItems(); + + // Extract due date + const dueDateStr = this.getText('//ram:SpecifiedTradePaymentTerms/ram:DueDateDateTime/udt:DateTimeString'); + const dueDate = dueDateStr ? new Date(dueDateStr).getTime() : Date.now(); + const dueInDays = Math.round((dueDate - issueDate) / (1000 * 60 * 60 * 24)); + + // Extract currency + const currencyCode = this.getText('//ram:InvoiceCurrencyCode') || 'EUR'; + + // Extract total amount + const totalAmount = this.getNumber('//ram:GrandTotalAmount'); + + // Extract notes + const notes = this.extractNotes(); + + // Check for reverse charge + const reverseCharge = this.exists('//ram:SpecifiedTradeAllowanceCharge/ram:ReasonCode[text()="62"]'); + + // Create the common invoice data + return { + type: 'invoice', + id: invoiceId, + date: issueDate, + status: 'invoice', + versionInfo: { + type: 'final', + version: '1.0.0' + }, + language: 'en', + incidenceId: invoiceId, + from: seller, + to: buyer, + subject: `Invoice ${invoiceId}`, + items: items, + dueInDays: dueInDays, + reverseCharge: reverseCharge, + currency: currencyCode as finance.TCurrency, + notes: notes, + deliveryDate: issueDate, + objectActions: [], + invoiceType: 'debitnote' // Default to debit note, will be overridden in decode methods + }; + } + + /** + * Extracts party information from ZUGFeRD v1 XML + * @param partyXPath XPath to the party node + * @returns Party information as TContact + */ + private extractParty(partyXPath: string): business.TContact { + // Extract name + const name = this.getText(`${partyXPath}/ram:Name`); + + // Extract address + const street = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:LineOne`); + const city = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:CityName`); + const zip = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:PostcodeCode`); + const country = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:CountryID`); + + // Create address object + const address = { + street: street, + city: city, + zip: zip, + country: country + }; + + // Extract VAT ID + const vatId = this.getText(`${partyXPath}/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="VA"]`) || ''; + + // Extract registration ID + const registrationId = this.getText(`${partyXPath}/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="FC"]`) || ''; + + // Create contact object + return { + type: 'company', + name: name, + description: '', + address: address, + status: 'active', + foundedDate: this.createDefaultDate(), + registrationDetails: { + vatId: vatId, + registrationId: registrationId, + registrationName: '' + } + } as business.TContact; + } + + /** + * Extracts invoice items from ZUGFeRD v1 XML + * @returns Array of invoice items + */ + private extractItems(): finance.TInvoiceItem[] { + const items: finance.TInvoiceItem[] = []; + + // Get all item nodes + const itemNodes = this.select('//ram:IncludedSupplyChainTradeLineItem', this.doc); + + // Process each item + if (Array.isArray(itemNodes)) { + for (let i = 0; i < itemNodes.length; i++) { + const itemNode = itemNodes[i]; + + // Extract item data + const name = this.getText('ram:SpecifiedTradeProduct/ram:Name', itemNode); + const articleNumber = this.getText('ram:SpecifiedTradeProduct/ram:SellerAssignedID', itemNode); + const unitQuantity = this.getNumber('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity', itemNode); + const unitType = this.getText('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity/@unitCode', itemNode) || 'EA'; + const unitNetPrice = this.getNumber('ram:SpecifiedLineTradeAgreement/ram:NetPriceProductTradePrice/ram:ChargeAmount', itemNode); + const vatPercentage = this.getNumber('ram:SpecifiedLineTradeSettlement/ram:ApplicableTradeTax/ram:RateApplicablePercent', itemNode); + + // Create item object + items.push({ + position: i + 1, + name: name, + articleNumber: articleNumber, + unitType: unitType, + unitQuantity: unitQuantity, + unitNetPrice: unitNetPrice, + vatPercentage: vatPercentage + }); + } + } + + return items; + } + + /** + * Extracts notes from ZUGFeRD v1 XML + * @returns Array of notes + */ + private extractNotes(): string[] { + const notes: string[] = []; + + // Get all note nodes + const noteNodes = this.select('//ram:IncludedNote', this.doc); + + // Process each note + if (Array.isArray(noteNodes)) { + for (let i = 0; i < noteNodes.length; i++) { + const noteNode = noteNodes[i]; + const noteText = this.getText('ram:Content', noteNode); + + if (noteText) { + notes.push(noteText); + } + } + } + + return notes; + } + + /** + * Creates a default date for empty date fields + * @returns Default date as timestamp + */ + private createDefaultDate(): number { + return new Date('2000-01-01').getTime(); + } +} diff --git a/ts/formats/cii/zugferd/zugferd.validator.ts b/ts/formats/cii/zugferd/zugferd.validator.ts new file mode 100644 index 0000000..f8fb9bd --- /dev/null +++ b/ts/formats/cii/zugferd/zugferd.validator.ts @@ -0,0 +1,18 @@ +import { CIIBaseValidator } from '../cii.validator.js'; +import { ValidationLevel } from '../../../interfaces/common.js'; +import type { ValidationResult } from '../../../interfaces/common.js'; + +/** + * Validator for ZUGFeRD invoice format + */ +export class ZUGFeRDValidator extends CIIBaseValidator { + /** + * Validates ZUGFeRD XML against business rules + * @returns True if business validation passed + */ + protected validateBusinessRules(): boolean { + // Implement ZUGFeRD-specific business rules + // For now, we'll just use the base CII validation + return true; + } +} diff --git a/ts/formats/factories/decoder.factory.ts b/ts/formats/factories/decoder.factory.ts index 0a15c87..1e90471 100644 --- a/ts/formats/factories/decoder.factory.ts +++ b/ts/formats/factories/decoder.factory.ts @@ -5,7 +5,8 @@ import { FormatDetector } from '../utils/format.detector.js'; // Import specific decoders import { XRechnungDecoder } from '../ubl/xrechnung/xrechnung.decoder.js'; import { FacturXDecoder } from '../cii/facturx/facturx.decoder.js'; -// import { ZUGFeRDDecoder } from '../cii/zugferd/zugferd.decoder.js'; +import { ZUGFeRDDecoder } from '../cii/zugferd/zugferd.decoder.js'; +import { ZUGFeRDV1Decoder } from '../cii/zugferd/zugferd.v1.decoder.js'; /** * Factory to create the appropriate decoder based on the XML format @@ -29,8 +30,12 @@ export class DecoderFactory { return new FacturXDecoder(xml); case InvoiceFormat.ZUGFERD: - // For now, use Factur-X decoder for ZUGFeRD - return new FacturXDecoder(xml); + // Determine if it's ZUGFeRD v1 or v2 based on root element + if (xml.includes('CrossIndustryDocument')) { + return new ZUGFeRDV1Decoder(xml); + } else { + return new ZUGFeRDDecoder(xml); + } case InvoiceFormat.FACTURX: return new FacturXDecoder(xml); diff --git a/ts/formats/factories/encoder.factory.ts b/ts/formats/factories/encoder.factory.ts index d74008d..848bbc8 100644 --- a/ts/formats/factories/encoder.factory.ts +++ b/ts/formats/factories/encoder.factory.ts @@ -5,7 +5,7 @@ import type { ExportFormat } from '../../interfaces/common.js'; // Import specific encoders import { XRechnungEncoder } from '../ubl/xrechnung/xrechnung.encoder.js'; import { FacturXEncoder } from '../cii/facturx/facturx.encoder.js'; -// import { ZUGFeRDEncoder } from '../cii/zugferd/zugferd.encoder.js'; +import { ZUGFeRDEncoder } from '../cii/zugferd/zugferd.encoder.js'; /** * Factory to create the appropriate encoder based on the target format @@ -33,8 +33,8 @@ export class EncoderFactory { case InvoiceFormat.ZUGFERD: case 'zugferd': - // For now, use Factur-X encoder for ZUGFeRD - return new FacturXEncoder(); + // Use dedicated ZUGFeRD encoder + return new ZUGFeRDEncoder(); case InvoiceFormat.FACTURX: case 'facturx': diff --git a/ts/formats/factories/validator.factory.ts b/ts/formats/factories/validator.factory.ts index 8cf8931..beaa775 100644 --- a/ts/formats/factories/validator.factory.ts +++ b/ts/formats/factories/validator.factory.ts @@ -6,7 +6,7 @@ import { FormatDetector } from '../utils/format.detector.js'; // import { UBLValidator } from '../ubl/ubl.validator.js'; // import { XRechnungValidator } from '../ubl/xrechnung/xrechnung.validator.js'; import { FacturXValidator } from '../cii/facturx/facturx.validator.js'; -// import { ZUGFeRDValidator } from '../cii/zugferd/zugferd.validator.js'; +import { ZUGFeRDValidator } from '../cii/zugferd/zugferd.validator.js'; /** * Factory to create the appropriate validator based on the XML format @@ -34,8 +34,8 @@ export class ValidatorFactory { return new FacturXValidator(xml); case InvoiceFormat.ZUGFERD: - // For now, use Factur-X validator for ZUGFeRD - return new FacturXValidator(xml); + // Use dedicated ZUGFeRD validator + return new ZUGFeRDValidator(xml); case InvoiceFormat.FACTURX: return new FacturXValidator(xml); diff --git a/ts/formats/pdf/extractors/associated.extractor.ts b/ts/formats/pdf/extractors/associated.extractor.ts new file mode 100644 index 0000000..78d3725 --- /dev/null +++ b/ts/formats/pdf/extractors/associated.extractor.ts @@ -0,0 +1,78 @@ +import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib'; +import { BaseXMLExtractor } from './base.extractor.js'; + +/** + * Associated files extractor for PDF/A-3 documents + * Extracts XML from associated files (AF entry in the catalog) + * Particularly useful for ZUGFeRD v1 and some Factur-X documents + */ +export class AssociatedFilesExtractor extends BaseXMLExtractor { + /** + * Extract XML from a PDF buffer using associated files + * @param pdfBuffer PDF buffer + * @returns XML content or null if not found + */ + public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise { + try { + const pdfDoc = await PDFDocument.load(pdfBuffer); + + // Try to find associated files via the AF entry in the catalog + const afArray = pdfDoc.catalog.lookup(PDFName.of('AF')); + if (!(afArray instanceof PDFArray)) { + console.warn('No AF (Associated Files) entry found in PDF catalog'); + return null; + } + + // Process each associated file + for (let i = 0; i < afArray.size(); i++) { + const fileSpec = afArray.lookup(i); + if (!(fileSpec instanceof PDFDict)) { + continue; + } + + // Get the file name + const fileNameObj = fileSpec.lookup(PDFName.of('F')) || fileSpec.lookup(PDFName.of('UF')); + if (!(fileNameObj instanceof PDFString)) { + continue; + } + + const fileName = fileNameObj.decodeText(); + + // Check if it's a known invoice XML file name + const isKnownFileName = this.knownFileNames.some( + knownName => fileName.toLowerCase() === knownName.toLowerCase() + ); + + // Check if it's any XML file or has invoice-related keywords + const isXmlFile = fileName.toLowerCase().endsWith('.xml') || + fileName.toLowerCase().includes('zugferd') || + fileName.toLowerCase().includes('factur-x') || + fileName.toLowerCase().includes('xrechnung') || + fileName.toLowerCase().includes('invoice'); + + if (isKnownFileName || isXmlFile) { + // Get the embedded file dictionary + const efDict = fileSpec.lookup(PDFName.of('EF')); + if (!(efDict instanceof PDFDict)) { + continue; + } + + // Get the file stream + const fileStream = efDict.lookup(PDFName.of('F')); + if (fileStream instanceof PDFRawStream) { + const xmlContent = await this.extractXmlFromStream(fileStream, fileName); + if (xmlContent) { + return xmlContent; + } + } + } + } + + console.warn('No valid XML found in associated files'); + return null; + } catch (error) { + console.error('Error in associated files extraction:', error); + return null; + } + } +} diff --git a/ts/formats/pdf/extractors/base.extractor.ts b/ts/formats/pdf/extractors/base.extractor.ts new file mode 100644 index 0000000..d660df9 --- /dev/null +++ b/ts/formats/pdf/extractors/base.extractor.ts @@ -0,0 +1,177 @@ +import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib'; +import * as pako from 'pako'; + +/** + * Base class for PDF XML extractors with common functionality + */ +export abstract class BaseXMLExtractor { + /** + * Known XML file names for different invoice formats + */ + protected readonly knownFileNames = [ + 'factur-x.xml', + 'zugferd-invoice.xml', + 'ZUGFeRD-invoice.xml', + 'xrechnung.xml' + ]; + + /** + * Known XML formats to validate extracted content + */ + protected readonly knownFormats = [ + 'CrossIndustryInvoice', + 'CrossIndustryDocument', + 'Invoice', + 'CreditNote', + 'ubl:Invoice', + 'ubl:CreditNote', + 'rsm:CrossIndustryInvoice', + 'rsm:CrossIndustryDocument', + 'ram:CrossIndustryDocument', + 'urn:un:unece:uncefact', + 'urn:ferd:CrossIndustryDocument', + 'urn:zugferd', + 'urn:factur-x', + 'factur-x.eu', + 'ZUGFeRD' + ]; + + /** + * Known XML end tags for extracting content from strings + */ + protected readonly knownEndTags = [ + '', + '', + '', + '', + '', + '', + '', + '', + '' + ]; + + /** + * Extract XML from a PDF buffer + * @param pdfBuffer PDF buffer + * @returns XML content or null if not found + */ + public abstract extractXml(pdfBuffer: Uint8Array | Buffer): Promise; + + /** + * Check if an XML string is valid + * @param xmlString XML string to check + * @returns True if the XML is valid + */ + protected isValidXml(xmlString: string): boolean { + try { + // Basic checks for XML validity + if (!xmlString || typeof xmlString !== 'string') { + return false; + } + + // Check if it starts with XML declaration + if (!xmlString.includes(' xmlString.includes(format)); + if (!hasKnownFormat) { + return false; + } + + // Check if the XML string contains binary data or invalid characters + const invalidChars = ['\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005']; + const hasBinaryData = invalidChars.some(char => xmlString.includes(char)); + if (hasBinaryData) { + return false; + } + + // Check if the XML string is too short + if (xmlString.length < 100) { + return false; + } + + return true; + } catch (error) { + console.error('Error validating XML:', error); + return false; + } + } + + /** + * Extract XML from a string + * @param text Text to extract XML from + * @param startIndex Index to start extraction from + * @returns XML content or null if not found + */ + protected extractXmlFromString(text: string, startIndex: number = 0): string | null { + try { + // Find the start of the XML document + const xmlStartIndex = text.indexOf(' { + try { + // Try to decompress with pako + const compressedBytes = stream.getContents().buffer; + try { + const decompressedBytes = pako.inflate(compressedBytes); + const xmlContent = new TextDecoder('utf-8').decode(decompressedBytes); + + if (this.isValidXml(xmlContent)) { + console.log(`Successfully extracted decompressed XML from PDF file. File name: ${fileName}`); + return xmlContent; + } + } catch (decompressError) { + // Decompression failed, try without decompression + console.log(`Decompression failed for ${fileName}, trying without decompression...`); + } + + // Try without decompression + const rawBytes = stream.getContents(); + const rawContent = new TextDecoder('utf-8').decode(rawBytes); + + if (this.isValidXml(rawContent)) { + console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${fileName}`); + return rawContent; + } + + return null; + } catch (error) { + console.error('Error extracting XML from stream:', error); + return null; + } + } +} diff --git a/ts/formats/pdf/extractors/index.ts b/ts/formats/pdf/extractors/index.ts new file mode 100644 index 0000000..acb71e7 --- /dev/null +++ b/ts/formats/pdf/extractors/index.ts @@ -0,0 +1,4 @@ +export * from './base.extractor.js'; +export * from './standard.extractor.js'; +export * from './associated.extractor.js'; +export * from './text.extractor.js'; diff --git a/ts/formats/pdf/extractors/standard.extractor.ts b/ts/formats/pdf/extractors/standard.extractor.ts new file mode 100644 index 0000000..2fdd833 --- /dev/null +++ b/ts/formats/pdf/extractors/standard.extractor.ts @@ -0,0 +1,86 @@ +import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib'; +import { BaseXMLExtractor } from './base.extractor.js'; + +/** + * Standard PDF XML extractor that extracts XML from embedded files + * Works with PDF/A-3 documents that follow the standard for embedding files + */ +export class StandardXMLExtractor extends BaseXMLExtractor { + /** + * Extract XML from a PDF buffer using standard PDF/A-3 embedded files + * @param pdfBuffer PDF buffer + * @returns XML content or null if not found + */ + public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise { + try { + const pdfDoc = await PDFDocument.load(pdfBuffer); + + // Get the document's metadata dictionary + const namesDictObj = pdfDoc.catalog.lookup(PDFName.of('Names')); + if (!(namesDictObj instanceof PDFDict)) { + console.warn('No Names dictionary found in PDF! This PDF does not contain embedded files.'); + return null; + } + + // Get the embedded files dictionary + const embeddedFilesDictObj = namesDictObj.lookup(PDFName.of('EmbeddedFiles')); + if (!(embeddedFilesDictObj instanceof PDFDict)) { + console.warn('No EmbeddedFiles dictionary found! This PDF does not contain embedded files.'); + return null; + } + + // Get the names array + const filesSpecObj = embeddedFilesDictObj.lookup(PDFName.of('Names')); + if (!(filesSpecObj instanceof PDFArray)) { + console.warn('No files specified in EmbeddedFiles dictionary!'); + return null; + } + + // Try to find an XML file in the embedded files + for (let i = 0; i < filesSpecObj.size(); i += 2) { + const fileNameObj = filesSpecObj.lookup(i); + const fileSpecObj = filesSpecObj.lookup(i + 1); + + if (!(fileNameObj instanceof PDFString) || !(fileSpecObj instanceof PDFDict)) { + continue; + } + + // Get the filename as string + const fileName = fileNameObj.decodeText(); + + // Check if it's a known invoice XML file name + const isKnownFileName = this.knownFileNames.some( + knownName => fileName.toLowerCase() === knownName.toLowerCase() + ); + + // Check if it's any XML file or has invoice-related keywords + const isXmlFile = fileName.toLowerCase().endsWith('.xml') || + fileName.toLowerCase().includes('zugferd') || + fileName.toLowerCase().includes('factur-x') || + fileName.toLowerCase().includes('xrechnung') || + fileName.toLowerCase().includes('invoice'); + + if (isKnownFileName || isXmlFile) { + const efDictObj = fileSpecObj.lookup(PDFName.of('EF')); + if (!(efDictObj instanceof PDFDict)) { + continue; + } + + const fileStream = efDictObj.lookup(PDFName.of('F')); + if (fileStream instanceof PDFRawStream) { + const xmlContent = await this.extractXmlFromStream(fileStream, fileName); + if (xmlContent) { + return xmlContent; + } + } + } + } + + console.warn('No valid XML found in embedded files'); + return null; + } catch (error) { + console.error('Error in standard extraction:', error); + return null; + } + } +} diff --git a/ts/formats/pdf/extractors/text.extractor.ts b/ts/formats/pdf/extractors/text.extractor.ts new file mode 100644 index 0000000..8fd4731 --- /dev/null +++ b/ts/formats/pdf/extractors/text.extractor.ts @@ -0,0 +1,55 @@ +import { BaseXMLExtractor } from './base.extractor.js'; + +/** + * Text-based XML extractor for PDF documents + * Extracts XML by searching for XML patterns in the PDF text + * Used as a fallback when other extraction methods fail + */ +export class TextXMLExtractor extends BaseXMLExtractor { + /** + * Extract XML from a PDF buffer by searching for XML patterns in the text + * @param pdfBuffer PDF buffer + * @returns XML content or null if not found + */ + public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise { + try { + // Convert buffer to string and look for XML patterns + // Increase the search range to handle larger PDFs + const pdfString = Buffer.from(pdfBuffer).toString('utf8', 0, Math.min(pdfBuffer.length, 50000)); + + // Look for common XML patterns in the PDF + const xmlPatterns = [ + /<\?xml[^>]*\?>/i, + /]*>/i, + /]*>/i, + /]*>/i, + /]*>/i, + /]*>/i, + /]*>/i, + /]*>/i, + /]*>/i, + /]*>/i + ]; + + for (const pattern of xmlPatterns) { + const match = pdfString.match(pattern); + if (match && match.index !== undefined) { + console.log(`Found XML pattern in PDF: ${match[0]}`); + + // Try to extract the XML content + const xmlContent = this.extractXmlFromString(pdfString, match.index); + if (xmlContent && this.isValidXml(xmlContent)) { + console.log('Successfully extracted XML from PDF text'); + return xmlContent; + } + } + } + + console.warn('No valid XML found in PDF text'); + return null; + } catch (error) { + console.error('Error in text-based extraction:', error); + return null; + } + } +} diff --git a/ts/formats/pdf/pdf.extractor.ts b/ts/formats/pdf/pdf.extractor.ts index 1030bf2..8bd243e 100644 --- a/ts/formats/pdf/pdf.extractor.ts +++ b/ts/formats/pdf/pdf.extractor.ts @@ -1,30 +1,54 @@ -import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib'; -import * as pako from 'pako'; +import { + BaseXMLExtractor, + StandardXMLExtractor, + AssociatedFilesExtractor, + TextXMLExtractor +} from './extractors/index.js'; /** - * Class for extracting XML from PDF files + * Main PDF extractor class that orchestrates the extraction process + * Uses multiple specialized extractors in sequence to maximize success rate */ export class PDFExtractor { + private extractors: BaseXMLExtractor[] = []; + /** - * Extracts XML from a PDF buffer + * Constructor initializes the chain of extractors + */ + constructor() { + // Add extractors in order of preference/likelihood of success + this.extractors.push( + new StandardXMLExtractor(), // Standard PDF/A-3 embedded files + new AssociatedFilesExtractor(), // Associated files (ZUGFeRD v1, some Factur-X) + new TextXMLExtractor() // Text-based extraction (fallback) + ); + } + + /** + * Extract XML from a PDF buffer + * Tries multiple extraction methods in sequence * @param pdfBuffer PDF buffer * @returns XML content or null if not found */ public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise { try { - // First try the standard extraction - const standardXml = await this.standardExtraction(pdfBuffer); - if (standardXml && this.isValidXml(standardXml)) { - return standardXml; + console.log('Starting XML extraction from PDF...'); + + // Try each extractor in sequence + for (const extractor of this.extractors) { + const extractorName = extractor.constructor.name; + console.log(`Trying extraction with ${extractorName}...`); + + const xml = await extractor.extractXml(pdfBuffer); + if (xml) { + console.log(`Successfully extracted XML using ${extractorName}`); + return xml; + } + + console.log(`Extraction with ${extractorName} failed, trying next method...`); } - // If standard extraction fails, try alternative methods - const alternativeXml = await this.alternativeExtraction(pdfBuffer); - if (alternativeXml && this.isValidXml(alternativeXml)) { - return alternativeXml; - } - - // If all extraction methods fail, return null + // If all extractors fail, return null console.warn('All extraction methods failed, no valid XML found in PDF'); return null; } catch (error) { @@ -33,255 +57,7 @@ export class PDFExtractor { } } - /** - * Standard extraction method using PDF-lib - * @param pdfBuffer PDF buffer - * @returns XML content or null if not found - */ - private async standardExtraction(pdfBuffer: Uint8Array | Buffer): Promise { - try { - const pdfDoc = await PDFDocument.load(pdfBuffer); - // Get the document's metadata dictionary - const namesDictObj = pdfDoc.catalog.lookup(PDFName.of('Names')); - if (!(namesDictObj instanceof PDFDict)) { - console.warn('No Names dictionary found in PDF! This PDF does not contain embedded files.'); - return null; - } - const embeddedFilesDictObj = namesDictObj.lookup(PDFName.of('EmbeddedFiles')); - if (!(embeddedFilesDictObj instanceof PDFDict)) { - console.warn('No EmbeddedFiles dictionary found! This PDF does not contain embedded files.'); - return null; - } - const filesSpecObj = embeddedFilesDictObj.lookup(PDFName.of('Names')); - if (!(filesSpecObj instanceof PDFArray)) { - console.warn('No files specified in EmbeddedFiles dictionary!'); - return null; - } - - // Try to find an XML file in the embedded files - let xmlFile: PDFRawStream | undefined; - let xmlFileName: string | undefined; - - for (let i = 0; i < filesSpecObj.size(); i += 2) { - const fileNameObj = filesSpecObj.lookup(i); - const fileSpecObj = filesSpecObj.lookup(i + 1); - - if (!(fileNameObj instanceof PDFString)) { - continue; - } - if (!(fileSpecObj instanceof PDFDict)) { - continue; - } - - // Get the filename as string - const fileName = fileNameObj.toString(); - - // Check if it's an XML file (checking both extension and known standard filenames) - if (fileName.toLowerCase().includes('.xml') || - fileName.toLowerCase().includes('factur-x') || - fileName.toLowerCase().includes('zugferd') || - fileName.toLowerCase().includes('xrechnung')) { - - const efDictObj = fileSpecObj.lookup(PDFName.of('EF')); - if (!(efDictObj instanceof PDFDict)) { - continue; - } - - const maybeStream = efDictObj.lookup(PDFName.of('F')); - if (maybeStream instanceof PDFRawStream) { - // Found an XML file - save it - xmlFile = maybeStream; - xmlFileName = fileName; - break; - } - } - } - - // If no XML file was found, return null - if (!xmlFile) { - console.warn('No embedded XML file found in the PDF!'); - return null; - } - - // Decompress and decode the XML content - try { - // Try to decompress with pako - const xmlCompressedBytes = xmlFile.getContents().buffer; - const xmlBytes = pako.inflate(xmlCompressedBytes); - const xmlContent = new TextDecoder('utf-8').decode(xmlBytes); - - // Check if the XML content is valid - if (this.isValidXml(xmlContent)) { - console.log(`Successfully extracted XML from PDF file. File name: ${xmlFileName}`); - return xmlContent; - } - - // If we get here, the XML content is not valid, try without decompression - console.log('Decompression succeeded but XML is not valid, trying without decompression...'); - const rawXmlBytes = xmlFile.getContents(); - const rawXmlContent = new TextDecoder('utf-8').decode(rawXmlBytes); - - if (this.isValidXml(rawXmlContent)) { - console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${xmlFileName}`); - return rawXmlContent; - } - - // If we get here, neither the decompressed nor the raw XML content is valid - console.log('Neither decompressed nor raw XML content is valid'); - return null; - } catch (decompressError) { - // Decompression failed, try without decompression - console.log('Decompression failed, trying without decompression...'); - try { - const xmlBytes = xmlFile.getContents(); - const xmlContent = new TextDecoder('utf-8').decode(xmlBytes); - - if (this.isValidXml(xmlContent)) { - console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${xmlFileName}`); - return xmlContent; - } - - // If we get here, the XML content is not valid - console.log('Uncompressed XML content is not valid'); - return null; - } catch (decodeError) { - console.error('Error decoding XML content:', decodeError); - return null; - } - } - } catch (error) { - console.error('Error in standard extraction:', error); - return null; - } - } - - /** - * Alternative extraction method using string search - * @param pdfBuffer PDF buffer - * @returns XML content or null if not found - */ - private async alternativeExtraction(pdfBuffer: Uint8Array | Buffer): Promise { - try { - // Convert buffer to string and look for XML patterns - const pdfString = Buffer.from(pdfBuffer).toString('utf8', 0, Math.min(pdfBuffer.length, 10000)); - - // Look for common XML patterns in the PDF - const xmlPatterns = [ - /<\?xml[^>]*\?>/i, - /]*>/i, - /]*>/i, - /]*>/i, - /]*>/i - ]; - - for (const pattern of xmlPatterns) { - const match = pdfString.match(pattern); - if (match) { - console.log(`Found XML pattern in PDF: ${match[0]}`); - - // Try to extract the XML content - const xmlContent = this.extractXmlFromString(pdfString); - if (xmlContent) { - console.log('Successfully extracted XML from PDF string'); - return xmlContent; - } - } - } - - return null; - } catch (error) { - console.error('Error in alternative extraction:', error); - return null; - } - } - - /** - * Extracts XML from a string - * @param pdfString PDF string - * @returns XML content or null if not found - */ - private extractXmlFromString(pdfString: string): string | null { - try { - // Look for XML start and end tags - const xmlStartIndex = pdfString.indexOf('', - '', - '', - '' - ]; - - let xmlEndIndex = -1; - for (const endTag of possibleEndTags) { - const endIndex = pdfString.indexOf(endTag); - if (endIndex !== -1) { - xmlEndIndex = endIndex + endTag.length; - break; - } - } - - if (xmlEndIndex === -1) { - return null; - } - - // Extract the XML content - return pdfString.substring(xmlStartIndex, xmlEndIndex); - } catch (error) { - console.error('Error extracting XML from string:', error); - return null; - } - } - - /** - * Checks if an XML string is valid - * @param xmlString XML string to check - * @returns True if the XML is valid - */ - private isValidXml(xmlString: string): boolean { - try { - // Check if the XML string contains basic XML structure - if (!xmlString.includes(' xmlString.includes(format)); - if (!hasKnownFormat) { - return false; - } - - // Check if the XML string contains binary data or invalid characters - const invalidChars = ['\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005']; - const hasBinaryData = invalidChars.some(char => xmlString.includes(char)); - if (hasBinaryData) { - return false; - } - - // Check if the XML string is too short - if (xmlString.length < 100) { - return false; - } - - return true; - } catch (error) { - console.error('Error validating XML:', error); - return false; - } - } } diff --git a/ts/formats/utils/format.detector.ts b/ts/formats/utils/format.detector.ts index fea2975..bdc94f2 100644 --- a/ts/formats/utils/format.detector.ts +++ b/ts/formats/utils/format.detector.ts @@ -1,5 +1,7 @@ import { InvoiceFormat } from '../../interfaces/common.js'; import { DOMParser } from 'xmldom'; +import * as xpath from 'xpath'; +import { CII_PROFILE_IDS, ZUGFERD_V1_NAMESPACES } from '../cii/cii.types.js'; /** * Utility class for detecting invoice formats @@ -26,11 +28,91 @@ export class FormatDetector { return InvoiceFormat.XRECHNUNG; } - // Factur-X/ZUGFeRD detection (CrossIndustryInvoice root element) + // Factur-X/ZUGFeRD detection (CrossIndustryInvoice or CrossIndustryDocument root element) if (root.nodeName === 'rsm:CrossIndustryInvoice' || root.nodeName === 'CrossIndustryInvoice') { - // For simplicity, we'll treat all CII documents as Factur-X for now - // In a real implementation, we would check for specific profiles - return InvoiceFormat.FACTURX; + // Set up namespaces for XPath queries (ZUGFeRD v2/Factur-X) + const namespaces = { + rsm: 'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100', + ram: 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100' + }; + + // Create XPath selector with namespaces + const select = xpath.useNamespaces(namespaces); + + // Look for profile identifier + const profileNode = select( + 'string(//rsm:ExchangedDocumentContext/ram:GuidelineSpecifiedDocumentContextParameter/ram:ID)', + doc + ); + + if (profileNode) { + const profileText = profileNode.toString(); + + // Check for ZUGFeRD profiles + if (profileText.includes('zugferd') || + profileText === CII_PROFILE_IDS.ZUGFERD_BASIC || + profileText === CII_PROFILE_IDS.ZUGFERD_COMFORT || + profileText === CII_PROFILE_IDS.ZUGFERD_EXTENDED) { + return InvoiceFormat.ZUGFERD; + } + + // Check for Factur-X profiles + if (profileText.includes('factur-x') || + profileText === CII_PROFILE_IDS.FACTURX_MINIMUM || + profileText === CII_PROFILE_IDS.FACTURX_BASIC || + profileText === CII_PROFILE_IDS.FACTURX_EN16931) { + return InvoiceFormat.FACTURX; + } + } + + // If we can't determine the specific CII format, default to generic CII + return InvoiceFormat.CII; + } + + // ZUGFeRD v1 detection (CrossIndustryDocument root element) + if (root.nodeName === 'rsm:CrossIndustryDocument' || root.nodeName === 'CrossIndustryDocument' || + root.nodeName === 'ram:CrossIndustryDocument') { + + // Check for ZUGFeRD v1 namespace in the document + const xmlString = xml.toString(); + if (xmlString.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') || + xmlString.includes('urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12')) { + return InvoiceFormat.ZUGFERD; + } + + // Set up namespaces for XPath queries (ZUGFeRD v1) + try { + const namespaces = { + rsm: ZUGFERD_V1_NAMESPACES.RSM, + ram: ZUGFERD_V1_NAMESPACES.RAM + }; + + // Create XPath selector with namespaces + const select = xpath.useNamespaces(namespaces); + + // Look for profile identifier + const profileNode = select( + 'string(//rsm:SpecifiedExchangedDocumentContext/ram:GuidelineSpecifiedDocumentContextParameter/ram:ID)', + doc + ); + + if (profileNode) { + const profileText = profileNode.toString(); + + // Check for ZUGFeRD v1 profiles + if (profileText.includes('ferd:CrossIndustryDocument:invoice:1p0') || + profileText === CII_PROFILE_IDS.ZUGFERD_V1_BASIC || + profileText === CII_PROFILE_IDS.ZUGFERD_V1_COMFORT || + profileText === CII_PROFILE_IDS.ZUGFERD_V1_EXTENDED) { + return InvoiceFormat.ZUGFERD; + } + } + } catch (error) { + console.log('Error in ZUGFeRD v1 XPath detection:', error); + } + + // If we can't determine the specific profile but it's a CrossIndustryDocument, it's likely ZUGFeRD v1 + return InvoiceFormat.ZUGFERD; } // FatturaPA detection would be implemented here diff --git a/ts/index.ts b/ts/index.ts index d219f95..be13d7d 100644 --- a/ts/index.ts +++ b/ts/index.ts @@ -27,6 +27,12 @@ import { CIIBaseValidator } from './formats/cii/cii.validator.js'; // Import PDF utilities import { PDFEmbedder } from './formats/pdf/pdf.embedder.js'; import { PDFExtractor } from './formats/pdf/pdf.extractor.js'; +import { + BaseXMLExtractor, + StandardXMLExtractor, + AssociatedFilesExtractor, + TextXMLExtractor +} from './formats/pdf/extractors/index.js'; // Import format detector import { FormatDetector } from './formats/utils/format.detector.js'; @@ -36,6 +42,12 @@ import { FacturXDecoder } from './formats/cii/facturx/facturx.decoder.js'; import { FacturXEncoder } from './formats/cii/facturx/facturx.encoder.js'; import { FacturXValidator } from './formats/cii/facturx/facturx.validator.js'; +// Import ZUGFeRD implementation +import { ZUGFeRDDecoder } from './formats/cii/zugferd/zugferd.decoder.js'; +import { ZUGFeRDEncoder } from './formats/cii/zugferd/zugferd.encoder.js'; +import { ZUGFeRDValidator } from './formats/cii/zugferd/zugferd.validator.js'; +import { ZUGFeRDV1Decoder } from './formats/cii/zugferd/zugferd.v1.decoder.js'; + // Export interfaces export type { // Common interfaces @@ -46,12 +58,12 @@ export type { TLetterEnvelope, TDocumentEnvelope, IPdf, - + // Validation interfaces ValidationError, ValidationResult, IValidator, - + // Format interfaces ExportFormat, XInvoiceOptions @@ -80,8 +92,18 @@ export { CIIBaseDecoder, CIIBaseEncoder, CIIBaseValidator }; // Export Factur-X implementation export { FacturXDecoder, FacturXEncoder, FacturXValidator }; +// Export ZUGFeRD implementation +export { ZUGFeRDDecoder, ZUGFeRDEncoder, ZUGFeRDValidator, ZUGFeRDV1Decoder }; + // Export PDF utilities -export { PDFEmbedder, PDFExtractor }; +export { + PDFEmbedder, + PDFExtractor, + BaseXMLExtractor, + StandardXMLExtractor, + AssociatedFilesExtractor, + TextXMLExtractor +}; // Export format detector export { FormatDetector }; @@ -93,7 +115,7 @@ export { FormatDetector }; * @returns ValidationResult with the result of validation */ export function validateXml( - xml: string, + xml: string, level: common.ValidationLevel = common.ValidationLevel.SYNTAX ): common.ValidationResult { try {