feat(ZUGFERD): Add dedicated ZUGFERD v1/v2 support and refine invoice format detection logic
This commit is contained in:
parent
b4a95de482
commit
46331c2bf6
@ -1,5 +1,14 @@
|
||||
# Changelog
|
||||
|
||||
## 2025-04-03 - 4.1.0 - feat(ZUGFERD)
|
||||
Add dedicated ZUGFERD v1/v2 support and refine invoice format detection logic
|
||||
|
||||
- Improve FormatDetector to differentiate between Factur-X, ZUGFERD v1, and ZUGFERD v2 formats
|
||||
- Introduce dedicated ZUGFERD decoder, encoder, and validator implementations
|
||||
- Update factories to use ZUGFERD-specific classes rather than reusing FacturX implementations
|
||||
- Enhance PDF XML extraction by consolidating multiple extractor strategies
|
||||
- Update module exports and documentation hints for improved testing and integration
|
||||
|
||||
## 2025-03-20 - 3.0.1 - fix(test/pdf-export)
|
||||
Improve PDF export tests with detailed logging and enhanced embedded file structure verification.
|
||||
|
||||
|
@ -0,0 +1,12 @@
|
||||
For testing use
|
||||
|
||||
```typescript
|
||||
import {tap, expect} @push.rocks/tapbundle
|
||||
```
|
||||
|
||||
tapbundle exports expect from @push.rocks/smartexpect
|
||||
You can find the readme here: https://code.foss.global/push.rocks/smartexpect/src/branch/master/readme.md
|
||||
|
||||
Don't use shortcuts when doing things, e.g. creating sample data in order to not implement something correctly, or skipping tests, and calling it a day.
|
||||
|
||||
It is ok to ask questions, if you are unsure about something.
|
17
test/output/corpus-master-results.json
Normal file
17
test/output/corpus-master-results.json
Normal file
@ -0,0 +1,17 @@
|
||||
{
|
||||
"test.zugferd-corpus.ts": {
|
||||
"error": "No results file found"
|
||||
},
|
||||
"test.xml-rechnung-corpus.ts": {
|
||||
"error": "No results file found"
|
||||
},
|
||||
"test.other-formats-corpus.ts": {
|
||||
"error": "No results file found"
|
||||
},
|
||||
"test.validation-corpus.ts": {
|
||||
"error": "No results file found"
|
||||
},
|
||||
"test.circular-corpus.ts": {
|
||||
"error": "No results file found"
|
||||
}
|
||||
}
|
13
test/output/corpus-summary.md
Normal file
13
test/output/corpus-summary.md
Normal file
@ -0,0 +1,13 @@
|
||||
# XInvoice Corpus Testing Summary
|
||||
|
||||
Generated on: 2025-04-03T19:22:13.546Z
|
||||
|
||||
## Overall Summary
|
||||
|
||||
| Test | Success Rate | Files Tested |
|
||||
|------|--------------|-------------|
|
||||
| test.zugferd-corpus.ts | Error: No results file found | N/A |
|
||||
| test.xml-rechnung-corpus.ts | Error: No results file found | N/A |
|
||||
| test.other-formats-corpus.ts | Error: No results file found | N/A |
|
||||
| test.validation-corpus.ts | Error: No results file found | N/A |
|
||||
| test.circular-corpus.ts | Error: No results file found | N/A |
|
26
test/output/other-formats-corpus-results.json
Normal file
26
test/output/other-formats-corpus-results.json
Normal file
@ -0,0 +1,26 @@
|
||||
{
|
||||
"peppol": {
|
||||
"success": 2,
|
||||
"fail": 0,
|
||||
"details": [
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/PEPPOL/Valid/Qvalia/Large_Invoice_sample1.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/PEPPOL/Valid/Qvalia/Large_Invoice_sample2.xml",
|
||||
"success": true,
|
||||
"format": "xrechnung",
|
||||
"error": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"fatturapa": {
|
||||
"success": 0,
|
||||
"fail": 0,
|
||||
"details": []
|
||||
},
|
||||
"totalSuccessRate": 1
|
||||
}
|
@ -1,3 +1,3 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100" xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100" xmlns:udt="urn:un:unece:uncefact:data:standard:UnqualifiedDataType:100">
|
||||
<rsm:ExchangedDocumentContext><ram:GuidelineSpecifiedDocumentContextParameter><ram:ID>urn:cen.eu:en16931:2017</ram:ID></ram:GuidelineSpecifiedDocumentContextParameter></rsm:ExchangedDocumentContext><rsm:ExchangedDocument><ram:TypeCode>380</ram:TypeCode><ram:ID>PDF-1743698313420</ram:ID><ram:IssueDateTime><udt:DateTimeString format="102">20250403</udt:DateTimeString></ram:IssueDateTime></rsm:ExchangedDocument><rsm:SupplyChainTradeTransaction><ram:ApplicableHeaderTradeAgreement><ram:SellerTradeParty><ram:Name>PDF Seller</ram:Name><ram:PostalTradeAddress><ram:LineOne/><ram:LineTwo>0</ram:LineTwo><ram:PostcodeCode/><ram:CityName/><ram:CountryID/></ram:PostalTradeAddress></ram:SellerTradeParty><ram:BuyerTradeParty><ram:Name>PDF Buyer</ram:Name><ram:PostalTradeAddress><ram:LineOne/><ram:LineTwo>0</ram:LineTwo><ram:PostcodeCode/><ram:CityName/><ram:CountryID/></ram:PostalTradeAddress></ram:BuyerTradeParty></ram:ApplicableHeaderTradeAgreement><ram:ApplicableHeaderTradeDelivery/><ram:ApplicableHeaderTradeSettlement><ram:InvoiceCurrencyCode>EUR</ram:InvoiceCurrencyCode><ram:SpecifiedTradePaymentTerms><ram:DueDateDateTime><udt:DateTimeString format="102">20250503</udt:DateTimeString></ram:DueDateDateTime></ram:SpecifiedTradePaymentTerms><ram:SpecifiedTradeSettlementHeaderMonetarySummation><ram:LineTotalAmount>0.00</ram:LineTotalAmount><ram:TaxTotalAmount currencyID="EUR">0.00</ram:TaxTotalAmount><ram:GrandTotalAmount>0.00</ram:GrandTotalAmount><ram:DuePayableAmount>0.00</ram:DuePayableAmount></ram:SpecifiedTradeSettlementHeaderMonetarySummation></ram:ApplicableHeaderTradeSettlement></rsm:SupplyChainTradeTransaction></rsm:CrossIndustryInvoice>
|
||||
<rsm:ExchangedDocumentContext><ram:GuidelineSpecifiedDocumentContextParameter><ram:ID>urn:cen.eu:en16931:2017</ram:ID></ram:GuidelineSpecifiedDocumentContextParameter></rsm:ExchangedDocumentContext><rsm:ExchangedDocument><ram:TypeCode>380</ram:TypeCode><ram:ID>471102</ram:ID><ram:IssueDateTime><udt:DateTimeString format="102">NaNNaNNaN</udt:DateTimeString></ram:IssueDateTime></rsm:ExchangedDocument><rsm:SupplyChainTradeTransaction><ram:ApplicableHeaderTradeAgreement><ram:SellerTradeParty><ram:Name>Lieferant GmbH</ram:Name><ram:PostalTradeAddress><ram:LineOne>Lieferantenstraße 20</ram:LineOne><ram:LineTwo>0</ram:LineTwo><ram:PostcodeCode>80333</ram:PostcodeCode><ram:CityName>München</ram:CityName><ram:CountryID>DE</ram:CountryID></ram:PostalTradeAddress><ram:SpecifiedTaxRegistration><ram:ID schemeID="VA">DE123456789</ram:ID></ram:SpecifiedTaxRegistration><ram:SpecifiedTaxRegistration><ram:ID schemeID="FC">201/113/40209</ram:ID></ram:SpecifiedTaxRegistration></ram:SellerTradeParty><ram:BuyerTradeParty><ram:Name>Kunden AG Mitte</ram:Name><ram:PostalTradeAddress><ram:LineOne>Kundenstraße 15</ram:LineOne><ram:LineTwo>0</ram:LineTwo><ram:PostcodeCode>69876</ram:PostcodeCode><ram:CityName>Frankfurt</ram:CityName><ram:CountryID>DE</ram:CountryID></ram:PostalTradeAddress></ram:BuyerTradeParty></ram:ApplicableHeaderTradeAgreement><ram:ApplicableHeaderTradeDelivery/><ram:ApplicableHeaderTradeSettlement><ram:InvoiceCurrencyCode>EUR</ram:InvoiceCurrencyCode><ram:SpecifiedTradePaymentTerms><ram:DueDateDateTime><udt:DateTimeString format="102">NaNNaNNaN</udt:DateTimeString></ram:DueDateDateTime></ram:SpecifiedTradePaymentTerms><ram:SpecifiedTradeSettlementHeaderMonetarySummation><ram:LineTotalAmount>473.00</ram:LineTotalAmount><ram:TaxTotalAmount currencyID="EUR">56.87</ram:TaxTotalAmount><ram:GrandTotalAmount>529.87</ram:GrandTotalAmount><ram:DuePayableAmount>529.87</ram:DuePayableAmount></ram:SpecifiedTradeSettlementHeaderMonetarySummation></ram:ApplicableHeaderTradeSettlement><ram:IncludedSupplyChainTradeLineItem><ram:AssociatedDocumentLineDocument><ram:LineID>1</ram:LineID></ram:AssociatedDocumentLineDocument><ram:SpecifiedTradeProduct><ram:Name>Trennblätter A4</ram:Name><ram:SellerAssignedID>TB100A4</ram:SellerAssignedID></ram:SpecifiedTradeProduct><ram:SpecifiedLineTradeAgreement><ram:NetPriceProductTradePrice><ram:ChargeAmount>9.90</ram:ChargeAmount></ram:NetPriceProductTradePrice></ram:SpecifiedLineTradeAgreement><ram:SpecifiedLineTradeDelivery><ram:BilledQuantity unitCode="H87">20</ram:BilledQuantity></ram:SpecifiedLineTradeDelivery><ram:SpecifiedLineTradeSettlement><ram:ApplicableTradeTax><ram:TypeCode>VAT</ram:TypeCode><ram:CategoryCode>S</ram:CategoryCode><ram:RateApplicablePercent>19</ram:RateApplicablePercent></ram:ApplicableTradeTax><ram:SpecifiedLineTradeSettlementMonetarySummation><ram:LineTotalAmount>198.00</ram:LineTotalAmount></ram:SpecifiedLineTradeSettlementMonetarySummation></ram:SpecifiedLineTradeSettlement></ram:IncludedSupplyChainTradeLineItem><ram:IncludedSupplyChainTradeLineItem><ram:AssociatedDocumentLineDocument><ram:LineID>2</ram:LineID></ram:AssociatedDocumentLineDocument><ram:SpecifiedTradeProduct><ram:Name>Joghurt Banane</ram:Name><ram:SellerAssignedID>ARNR2</ram:SellerAssignedID></ram:SpecifiedTradeProduct><ram:SpecifiedLineTradeAgreement><ram:NetPriceProductTradePrice><ram:ChargeAmount>5.50</ram:ChargeAmount></ram:NetPriceProductTradePrice></ram:SpecifiedLineTradeAgreement><ram:SpecifiedLineTradeDelivery><ram:BilledQuantity unitCode="H87">50</ram:BilledQuantity></ram:SpecifiedLineTradeDelivery><ram:SpecifiedLineTradeSettlement><ram:ApplicableTradeTax><ram:TypeCode>VAT</ram:TypeCode><ram:CategoryCode>S</ram:CategoryCode><ram:RateApplicablePercent>7</ram:RateApplicablePercent></ram:ApplicableTradeTax><ram:SpecifiedLineTradeSettlementMonetarySummation><ram:LineTotalAmount>275.00</ram:LineTotalAmount></ram:SpecifiedLineTradeSettlementMonetarySummation></ram:SpecifiedLineTradeSettlement></ram:IncludedSupplyChainTradeLineItem></rsm:SupplyChainTradeTransaction></rsm:CrossIndustryInvoice>
|
Binary file not shown.
@ -54,9 +54,9 @@
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/fail/FNFE-factur-x-examples/Avoir_FR_type380_EN16931.pdf",
|
||||
"success": false,
|
||||
"valid": null,
|
||||
"errors": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"valid": true,
|
||||
"errors": [],
|
||||
"error": "Validation result (true) doesn't match expectation (false)"
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/fail/FNFE-factur-x-examples/Avoir_FR_type380_MINIMUM.pdf",
|
||||
@ -75,9 +75,9 @@
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/ZUGFeRDv2/fail/FNFE-factur-x-examples/Avoir_FR_type381_EN16931.pdf",
|
||||
"success": false,
|
||||
"valid": null,
|
||||
"errors": null,
|
||||
"error": "Error: No XML found in PDF"
|
||||
"valid": true,
|
||||
"errors": [],
|
||||
"error": "Validation result (true) doesn't match expectation (false)"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -138,25 +138,25 @@
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Betriebskostenabrechnung.cii.xml",
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"format": "cii",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Einfach.cii.xml",
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"format": "cii",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Elektron.cii.xml",
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"format": "cii",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
"file": "/mnt/data/lossless/fin.cx/xinvoice/test/assets/corpus/XML-Rechnung/CII/XRECHNUNG_Reisekostenabrechnung.cii.xml",
|
||||
"success": true,
|
||||
"format": "facturx",
|
||||
"format": "cii",
|
||||
"error": null
|
||||
},
|
||||
{
|
||||
|
8
ts/00_commitinfo_data.ts
Normal file
8
ts/00_commitinfo_data.ts
Normal file
@ -0,0 +1,8 @@
|
||||
/**
|
||||
* autocreated commitinfo by @push.rocks/commitinfo
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@fin.cx/xinvoice',
|
||||
version: '4.1.0',
|
||||
description: 'A TypeScript module for creating, manipulating, and embedding XML data within PDF files specifically tailored for xinvoice packages.'
|
||||
}
|
@ -186,7 +186,8 @@ export class XInvoice {
|
||||
*/
|
||||
public async loadPdf(pdfBuffer: Uint8Array | Buffer, validate: boolean = false): Promise<XInvoice> {
|
||||
try {
|
||||
// Extract XML from PDF
|
||||
// Extract XML from PDF using the consolidated extractor
|
||||
// which tries multiple extraction methods in sequence
|
||||
const xmlContent = await this.pdfExtractor.extractXml(pdfBuffer);
|
||||
|
||||
// Store the PDF buffer
|
||||
|
@ -2,13 +2,20 @@
|
||||
* CII-specific types and constants
|
||||
*/
|
||||
|
||||
// CII namespaces
|
||||
// CII namespaces (ZUGFeRD v2/Factur-X)
|
||||
export const CII_NAMESPACES = {
|
||||
RSM: 'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100',
|
||||
RAM: 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100',
|
||||
UDT: 'urn:un:unece:uncefact:data:standard:UnqualifiedDataType:100'
|
||||
};
|
||||
|
||||
// ZUGFeRD v1 namespaces
|
||||
export const ZUGFERD_V1_NAMESPACES = {
|
||||
RSM: 'urn:ferd:CrossIndustryDocument:invoice:1p0',
|
||||
RAM: 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12',
|
||||
UDT: 'urn:un:unece:uncefact:data:standard:UnqualifiedDataType:15'
|
||||
};
|
||||
|
||||
// CII profiles
|
||||
export enum CIIProfile {
|
||||
BASIC = 'BASIC',
|
||||
@ -20,10 +27,18 @@ export enum CIIProfile {
|
||||
|
||||
// CII profile IDs for different formats
|
||||
export const CII_PROFILE_IDS = {
|
||||
// Factur-X profiles
|
||||
FACTURX_MINIMUM: 'urn:factur-x.eu:1p0:minimum',
|
||||
FACTURX_BASIC: 'urn:factur-x.eu:1p0:basicwl',
|
||||
FACTURX_EN16931: 'urn:cen.eu:en16931:2017',
|
||||
|
||||
// ZUGFeRD v2 profiles
|
||||
ZUGFERD_BASIC: 'urn:zugferd:basic',
|
||||
ZUGFERD_COMFORT: 'urn:zugferd:comfort',
|
||||
ZUGFERD_EXTENDED: 'urn:zugferd:extended'
|
||||
ZUGFERD_EXTENDED: 'urn:zugferd:extended',
|
||||
|
||||
// ZUGFeRD v1 profiles
|
||||
ZUGFERD_V1_BASIC: 'urn:ferd:CrossIndustryDocument:invoice:1p0:basic',
|
||||
ZUGFERD_V1_COMFORT: 'urn:ferd:CrossIndustryDocument:invoice:1p0:comfort',
|
||||
ZUGFERD_V1_EXTENDED: 'urn:ferd:CrossIndustryDocument:invoice:1p0:extended'
|
||||
};
|
||||
|
220
ts/formats/cii/zugferd/zugferd.decoder.ts
Normal file
220
ts/formats/cii/zugferd/zugferd.decoder.ts
Normal file
@ -0,0 +1,220 @@
|
||||
import { CIIBaseDecoder } from '../cii.decoder.js';
|
||||
import type { TInvoice, TCreditNote, TDebitNote } from '../../../interfaces/common.js';
|
||||
import { ZUGFERD_PROFILE_IDS } from './zugferd.types.js';
|
||||
import { business, finance, general } from '@tsclass/tsclass';
|
||||
|
||||
/**
|
||||
* Decoder for ZUGFeRD invoice format
|
||||
*/
|
||||
export class ZUGFeRDDecoder extends CIIBaseDecoder {
|
||||
/**
|
||||
* Decodes a ZUGFeRD credit note
|
||||
* @returns Promise resolving to a TCreditNote object
|
||||
*/
|
||||
protected async decodeCreditNote(): Promise<TCreditNote> {
|
||||
// Get common invoice data
|
||||
const commonData = await this.extractCommonData();
|
||||
|
||||
// Create a credit note with the common data
|
||||
return {
|
||||
...commonData,
|
||||
invoiceType: 'creditnote'
|
||||
} as TCreditNote;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a ZUGFeRD debit note (invoice)
|
||||
* @returns Promise resolving to a TDebitNote object
|
||||
*/
|
||||
protected async decodeDebitNote(): Promise<TDebitNote> {
|
||||
// Get common invoice data
|
||||
const commonData = await this.extractCommonData();
|
||||
|
||||
// Create a debit note with the common data
|
||||
return {
|
||||
...commonData,
|
||||
invoiceType: 'debitnote'
|
||||
} as TDebitNote;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts common invoice data from ZUGFeRD XML
|
||||
* @returns Common invoice data
|
||||
*/
|
||||
private async extractCommonData(): Promise<Partial<TInvoice>> {
|
||||
// Extract invoice ID
|
||||
const invoiceId = this.getText('//rsm:ExchangedDocument/ram:ID');
|
||||
|
||||
// Extract issue date
|
||||
const issueDateStr = this.getText('//ram:IssueDateTime/udt:DateTimeString');
|
||||
const issueDate = issueDateStr ? new Date(issueDateStr).getTime() : Date.now();
|
||||
|
||||
// Extract seller information
|
||||
const seller = this.extractParty('//ram:SellerTradeParty');
|
||||
|
||||
// Extract buyer information
|
||||
const buyer = this.extractParty('//ram:BuyerTradeParty');
|
||||
|
||||
// Extract items
|
||||
const items = this.extractItems();
|
||||
|
||||
// Extract due date
|
||||
const dueDateStr = this.getText('//ram:SpecifiedTradePaymentTerms/ram:DueDateDateTime/udt:DateTimeString');
|
||||
const dueDate = dueDateStr ? new Date(dueDateStr).getTime() : Date.now();
|
||||
const dueInDays = Math.round((dueDate - issueDate) / (1000 * 60 * 60 * 24));
|
||||
|
||||
// Extract currency
|
||||
const currencyCode = this.getText('//ram:InvoiceCurrencyCode') || 'EUR';
|
||||
|
||||
// Extract total amount
|
||||
const totalAmount = this.getNumber('//ram:GrandTotalAmount');
|
||||
|
||||
// Extract notes
|
||||
const notes = this.extractNotes();
|
||||
|
||||
// Check for reverse charge
|
||||
const reverseCharge = this.exists('//ram:SpecifiedTradeAllowanceCharge/ram:ReasonCode[text()="62"]');
|
||||
|
||||
// Create the common invoice data
|
||||
return {
|
||||
type: 'invoice',
|
||||
id: invoiceId,
|
||||
date: issueDate,
|
||||
status: 'invoice',
|
||||
versionInfo: {
|
||||
type: 'final',
|
||||
version: '1.0.0'
|
||||
},
|
||||
language: 'en',
|
||||
incidenceId: invoiceId,
|
||||
from: seller,
|
||||
to: buyer,
|
||||
subject: `Invoice ${invoiceId}`,
|
||||
items: items,
|
||||
dueInDays: dueInDays,
|
||||
reverseCharge: reverseCharge,
|
||||
currency: currencyCode as finance.TCurrency,
|
||||
notes: notes,
|
||||
deliveryDate: issueDate,
|
||||
objectActions: [],
|
||||
invoiceType: 'debitnote' // Default to debit note, will be overridden in decode methods
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts party information from ZUGFeRD XML
|
||||
* @param partyXPath XPath to the party node
|
||||
* @returns Party information as TContact
|
||||
*/
|
||||
private extractParty(partyXPath: string): business.TContact {
|
||||
// Extract name
|
||||
const name = this.getText(`${partyXPath}/ram:Name`);
|
||||
|
||||
// Extract address
|
||||
const street = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:LineOne`);
|
||||
const city = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:CityName`);
|
||||
const zip = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:PostcodeCode`);
|
||||
const country = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:CountryID`);
|
||||
|
||||
// Create address object
|
||||
const address = {
|
||||
street: street,
|
||||
city: city,
|
||||
zip: zip,
|
||||
country: country
|
||||
};
|
||||
|
||||
// Extract VAT ID
|
||||
const vatId = this.getText(`${partyXPath}/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="VA"]`) || '';
|
||||
|
||||
// Extract registration ID
|
||||
const registrationId = this.getText(`${partyXPath}/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="FC"]`) || '';
|
||||
|
||||
// Create contact object
|
||||
return {
|
||||
type: 'company',
|
||||
name: name,
|
||||
description: '',
|
||||
address: address,
|
||||
status: 'active',
|
||||
foundedDate: this.createDefaultDate(),
|
||||
registrationDetails: {
|
||||
vatId: vatId,
|
||||
registrationId: registrationId,
|
||||
registrationName: ''
|
||||
}
|
||||
} as business.TContact;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts invoice items from ZUGFeRD XML
|
||||
* @returns Array of invoice items
|
||||
*/
|
||||
private extractItems(): finance.TInvoiceItem[] {
|
||||
const items: finance.TInvoiceItem[] = [];
|
||||
|
||||
// Get all item nodes
|
||||
const itemNodes = this.select('//ram:IncludedSupplyChainTradeLineItem', this.doc);
|
||||
|
||||
// Process each item
|
||||
if (Array.isArray(itemNodes)) {
|
||||
for (let i = 0; i < itemNodes.length; i++) {
|
||||
const itemNode = itemNodes[i];
|
||||
|
||||
// Extract item data
|
||||
const name = this.getText('ram:SpecifiedTradeProduct/ram:Name', itemNode);
|
||||
const articleNumber = this.getText('ram:SpecifiedTradeProduct/ram:SellerAssignedID', itemNode);
|
||||
const unitQuantity = this.getNumber('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity', itemNode);
|
||||
const unitType = this.getText('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity/@unitCode', itemNode) || 'EA';
|
||||
const unitNetPrice = this.getNumber('ram:SpecifiedLineTradeAgreement/ram:NetPriceProductTradePrice/ram:ChargeAmount', itemNode);
|
||||
const vatPercentage = this.getNumber('ram:SpecifiedLineTradeSettlement/ram:ApplicableTradeTax/ram:RateApplicablePercent', itemNode);
|
||||
|
||||
// Create item object
|
||||
items.push({
|
||||
position: i + 1,
|
||||
name: name,
|
||||
articleNumber: articleNumber,
|
||||
unitType: unitType,
|
||||
unitQuantity: unitQuantity,
|
||||
unitNetPrice: unitNetPrice,
|
||||
vatPercentage: vatPercentage
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return items;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts notes from ZUGFeRD XML
|
||||
* @returns Array of notes
|
||||
*/
|
||||
private extractNotes(): string[] {
|
||||
const notes: string[] = [];
|
||||
|
||||
// Get all note nodes
|
||||
const noteNodes = this.select('//ram:IncludedNote', this.doc);
|
||||
|
||||
// Process each note
|
||||
if (Array.isArray(noteNodes)) {
|
||||
for (let i = 0; i < noteNodes.length; i++) {
|
||||
const noteNode = noteNodes[i];
|
||||
const noteText = this.getText('ram:Content', noteNode);
|
||||
|
||||
if (noteText) {
|
||||
notes.push(noteText);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return notes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a default date for empty date fields
|
||||
* @returns Default date as timestamp
|
||||
*/
|
||||
private createDefaultDate(): number {
|
||||
return new Date('2000-01-01').getTime();
|
||||
}
|
||||
}
|
21
ts/formats/cii/zugferd/zugferd.encoder.ts
Normal file
21
ts/formats/cii/zugferd/zugferd.encoder.ts
Normal file
@ -0,0 +1,21 @@
|
||||
import { CIIBaseEncoder } from '../cii.encoder.js';
|
||||
import type { TInvoice } from '../../../interfaces/common.js';
|
||||
import { ZUGFERD_PROFILE_IDS } from './zugferd.types.js';
|
||||
|
||||
/**
|
||||
* Encoder for ZUGFeRD invoice format
|
||||
*/
|
||||
export class ZUGFeRDEncoder extends CIIBaseEncoder {
|
||||
/**
|
||||
* Creates ZUGFeRD XML from invoice data
|
||||
* @param invoice Invoice data
|
||||
* @returns ZUGFeRD XML string
|
||||
*/
|
||||
public async createXml(invoice: TInvoice): Promise<string> {
|
||||
// Set ZUGFeRD-specific profile ID
|
||||
this.profileId = ZUGFERD_PROFILE_IDS.BASIC;
|
||||
|
||||
// Use the base CII encoder to create the XML
|
||||
return super.createXml(invoice);
|
||||
}
|
||||
}
|
18
ts/formats/cii/zugferd/zugferd.types.ts
Normal file
18
ts/formats/cii/zugferd/zugferd.types.ts
Normal file
@ -0,0 +1,18 @@
|
||||
import { CIIProfile, CII_PROFILE_IDS } from '../cii.types.js';
|
||||
|
||||
/**
|
||||
* ZUGFeRD specific constants and types
|
||||
*/
|
||||
|
||||
// ZUGFeRD profile IDs
|
||||
export const ZUGFERD_PROFILE_IDS = {
|
||||
BASIC: CII_PROFILE_IDS.ZUGFERD_BASIC,
|
||||
COMFORT: CII_PROFILE_IDS.ZUGFERD_COMFORT,
|
||||
EXTENDED: CII_PROFILE_IDS.ZUGFERD_EXTENDED
|
||||
};
|
||||
|
||||
// ZUGFeRD PDF attachment filename
|
||||
export const ZUGFERD_ATTACHMENT_FILENAME = 'zugferd-invoice.xml';
|
||||
|
||||
// ZUGFeRD PDF attachment description
|
||||
export const ZUGFERD_ATTACHMENT_DESCRIPTION = 'ZUGFeRD XML Invoice';
|
234
ts/formats/cii/zugferd/zugferd.v1.decoder.ts
Normal file
234
ts/formats/cii/zugferd/zugferd.v1.decoder.ts
Normal file
@ -0,0 +1,234 @@
|
||||
import { CIIBaseDecoder } from '../cii.decoder.js';
|
||||
import type { TInvoice, TCreditNote, TDebitNote } from '../../../interfaces/common.js';
|
||||
import { ZUGFERD_V1_NAMESPACES } from '../cii.types.js';
|
||||
import { business, finance, general } from '@tsclass/tsclass';
|
||||
|
||||
/**
|
||||
* Decoder for ZUGFeRD v1 invoice format
|
||||
*/
|
||||
export class ZUGFeRDV1Decoder extends CIIBaseDecoder {
|
||||
/**
|
||||
* Constructor
|
||||
* @param xml XML string to decode
|
||||
*/
|
||||
constructor(xml: string) {
|
||||
super(xml);
|
||||
// Override namespaces for ZUGFeRD v1
|
||||
this.namespaces = {
|
||||
rsm: ZUGFERD_V1_NAMESPACES.RSM,
|
||||
ram: ZUGFERD_V1_NAMESPACES.RAM,
|
||||
udt: ZUGFERD_V1_NAMESPACES.UDT
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a ZUGFeRD v1 credit note
|
||||
* @returns Promise resolving to a TCreditNote object
|
||||
*/
|
||||
protected async decodeCreditNote(): Promise<TCreditNote> {
|
||||
// Get common invoice data
|
||||
const commonData = await this.extractCommonData();
|
||||
|
||||
// Create a credit note with the common data
|
||||
return {
|
||||
...commonData,
|
||||
invoiceType: 'creditnote'
|
||||
} as TCreditNote;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a ZUGFeRD v1 debit note (invoice)
|
||||
* @returns Promise resolving to a TDebitNote object
|
||||
*/
|
||||
protected async decodeDebitNote(): Promise<TDebitNote> {
|
||||
// Get common invoice data
|
||||
const commonData = await this.extractCommonData();
|
||||
|
||||
// Create a debit note with the common data
|
||||
return {
|
||||
...commonData,
|
||||
invoiceType: 'debitnote'
|
||||
} as TDebitNote;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts common invoice data from ZUGFeRD v1 XML
|
||||
* @returns Common invoice data
|
||||
*/
|
||||
private async extractCommonData(): Promise<Partial<TInvoice>> {
|
||||
// Extract invoice ID
|
||||
const invoiceId = this.getText('//ram:ID');
|
||||
|
||||
// Extract issue date
|
||||
const issueDateStr = this.getText('//ram:IssueDateTime/udt:DateTimeString');
|
||||
const issueDate = issueDateStr ? new Date(issueDateStr).getTime() : Date.now();
|
||||
|
||||
// Extract seller information
|
||||
const seller = this.extractParty('//ram:SellerTradeParty');
|
||||
|
||||
// Extract buyer information
|
||||
const buyer = this.extractParty('//ram:BuyerTradeParty');
|
||||
|
||||
// Extract items
|
||||
const items = this.extractItems();
|
||||
|
||||
// Extract due date
|
||||
const dueDateStr = this.getText('//ram:SpecifiedTradePaymentTerms/ram:DueDateDateTime/udt:DateTimeString');
|
||||
const dueDate = dueDateStr ? new Date(dueDateStr).getTime() : Date.now();
|
||||
const dueInDays = Math.round((dueDate - issueDate) / (1000 * 60 * 60 * 24));
|
||||
|
||||
// Extract currency
|
||||
const currencyCode = this.getText('//ram:InvoiceCurrencyCode') || 'EUR';
|
||||
|
||||
// Extract total amount
|
||||
const totalAmount = this.getNumber('//ram:GrandTotalAmount');
|
||||
|
||||
// Extract notes
|
||||
const notes = this.extractNotes();
|
||||
|
||||
// Check for reverse charge
|
||||
const reverseCharge = this.exists('//ram:SpecifiedTradeAllowanceCharge/ram:ReasonCode[text()="62"]');
|
||||
|
||||
// Create the common invoice data
|
||||
return {
|
||||
type: 'invoice',
|
||||
id: invoiceId,
|
||||
date: issueDate,
|
||||
status: 'invoice',
|
||||
versionInfo: {
|
||||
type: 'final',
|
||||
version: '1.0.0'
|
||||
},
|
||||
language: 'en',
|
||||
incidenceId: invoiceId,
|
||||
from: seller,
|
||||
to: buyer,
|
||||
subject: `Invoice ${invoiceId}`,
|
||||
items: items,
|
||||
dueInDays: dueInDays,
|
||||
reverseCharge: reverseCharge,
|
||||
currency: currencyCode as finance.TCurrency,
|
||||
notes: notes,
|
||||
deliveryDate: issueDate,
|
||||
objectActions: [],
|
||||
invoiceType: 'debitnote' // Default to debit note, will be overridden in decode methods
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts party information from ZUGFeRD v1 XML
|
||||
* @param partyXPath XPath to the party node
|
||||
* @returns Party information as TContact
|
||||
*/
|
||||
private extractParty(partyXPath: string): business.TContact {
|
||||
// Extract name
|
||||
const name = this.getText(`${partyXPath}/ram:Name`);
|
||||
|
||||
// Extract address
|
||||
const street = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:LineOne`);
|
||||
const city = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:CityName`);
|
||||
const zip = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:PostcodeCode`);
|
||||
const country = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:CountryID`);
|
||||
|
||||
// Create address object
|
||||
const address = {
|
||||
street: street,
|
||||
city: city,
|
||||
zip: zip,
|
||||
country: country
|
||||
};
|
||||
|
||||
// Extract VAT ID
|
||||
const vatId = this.getText(`${partyXPath}/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="VA"]`) || '';
|
||||
|
||||
// Extract registration ID
|
||||
const registrationId = this.getText(`${partyXPath}/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="FC"]`) || '';
|
||||
|
||||
// Create contact object
|
||||
return {
|
||||
type: 'company',
|
||||
name: name,
|
||||
description: '',
|
||||
address: address,
|
||||
status: 'active',
|
||||
foundedDate: this.createDefaultDate(),
|
||||
registrationDetails: {
|
||||
vatId: vatId,
|
||||
registrationId: registrationId,
|
||||
registrationName: ''
|
||||
}
|
||||
} as business.TContact;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts invoice items from ZUGFeRD v1 XML
|
||||
* @returns Array of invoice items
|
||||
*/
|
||||
private extractItems(): finance.TInvoiceItem[] {
|
||||
const items: finance.TInvoiceItem[] = [];
|
||||
|
||||
// Get all item nodes
|
||||
const itemNodes = this.select('//ram:IncludedSupplyChainTradeLineItem', this.doc);
|
||||
|
||||
// Process each item
|
||||
if (Array.isArray(itemNodes)) {
|
||||
for (let i = 0; i < itemNodes.length; i++) {
|
||||
const itemNode = itemNodes[i];
|
||||
|
||||
// Extract item data
|
||||
const name = this.getText('ram:SpecifiedTradeProduct/ram:Name', itemNode);
|
||||
const articleNumber = this.getText('ram:SpecifiedTradeProduct/ram:SellerAssignedID', itemNode);
|
||||
const unitQuantity = this.getNumber('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity', itemNode);
|
||||
const unitType = this.getText('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity/@unitCode', itemNode) || 'EA';
|
||||
const unitNetPrice = this.getNumber('ram:SpecifiedLineTradeAgreement/ram:NetPriceProductTradePrice/ram:ChargeAmount', itemNode);
|
||||
const vatPercentage = this.getNumber('ram:SpecifiedLineTradeSettlement/ram:ApplicableTradeTax/ram:RateApplicablePercent', itemNode);
|
||||
|
||||
// Create item object
|
||||
items.push({
|
||||
position: i + 1,
|
||||
name: name,
|
||||
articleNumber: articleNumber,
|
||||
unitType: unitType,
|
||||
unitQuantity: unitQuantity,
|
||||
unitNetPrice: unitNetPrice,
|
||||
vatPercentage: vatPercentage
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return items;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts notes from ZUGFeRD v1 XML
|
||||
* @returns Array of notes
|
||||
*/
|
||||
private extractNotes(): string[] {
|
||||
const notes: string[] = [];
|
||||
|
||||
// Get all note nodes
|
||||
const noteNodes = this.select('//ram:IncludedNote', this.doc);
|
||||
|
||||
// Process each note
|
||||
if (Array.isArray(noteNodes)) {
|
||||
for (let i = 0; i < noteNodes.length; i++) {
|
||||
const noteNode = noteNodes[i];
|
||||
const noteText = this.getText('ram:Content', noteNode);
|
||||
|
||||
if (noteText) {
|
||||
notes.push(noteText);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return notes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a default date for empty date fields
|
||||
* @returns Default date as timestamp
|
||||
*/
|
||||
private createDefaultDate(): number {
|
||||
return new Date('2000-01-01').getTime();
|
||||
}
|
||||
}
|
18
ts/formats/cii/zugferd/zugferd.validator.ts
Normal file
18
ts/formats/cii/zugferd/zugferd.validator.ts
Normal file
@ -0,0 +1,18 @@
|
||||
import { CIIBaseValidator } from '../cii.validator.js';
|
||||
import { ValidationLevel } from '../../../interfaces/common.js';
|
||||
import type { ValidationResult } from '../../../interfaces/common.js';
|
||||
|
||||
/**
|
||||
* Validator for ZUGFeRD invoice format
|
||||
*/
|
||||
export class ZUGFeRDValidator extends CIIBaseValidator {
|
||||
/**
|
||||
* Validates ZUGFeRD XML against business rules
|
||||
* @returns True if business validation passed
|
||||
*/
|
||||
protected validateBusinessRules(): boolean {
|
||||
// Implement ZUGFeRD-specific business rules
|
||||
// For now, we'll just use the base CII validation
|
||||
return true;
|
||||
}
|
||||
}
|
@ -5,7 +5,8 @@ import { FormatDetector } from '../utils/format.detector.js';
|
||||
// Import specific decoders
|
||||
import { XRechnungDecoder } from '../ubl/xrechnung/xrechnung.decoder.js';
|
||||
import { FacturXDecoder } from '../cii/facturx/facturx.decoder.js';
|
||||
// import { ZUGFeRDDecoder } from '../cii/zugferd/zugferd.decoder.js';
|
||||
import { ZUGFeRDDecoder } from '../cii/zugferd/zugferd.decoder.js';
|
||||
import { ZUGFeRDV1Decoder } from '../cii/zugferd/zugferd.v1.decoder.js';
|
||||
|
||||
/**
|
||||
* Factory to create the appropriate decoder based on the XML format
|
||||
@ -29,8 +30,12 @@ export class DecoderFactory {
|
||||
return new FacturXDecoder(xml);
|
||||
|
||||
case InvoiceFormat.ZUGFERD:
|
||||
// For now, use Factur-X decoder for ZUGFeRD
|
||||
return new FacturXDecoder(xml);
|
||||
// Determine if it's ZUGFeRD v1 or v2 based on root element
|
||||
if (xml.includes('CrossIndustryDocument')) {
|
||||
return new ZUGFeRDV1Decoder(xml);
|
||||
} else {
|
||||
return new ZUGFeRDDecoder(xml);
|
||||
}
|
||||
|
||||
case InvoiceFormat.FACTURX:
|
||||
return new FacturXDecoder(xml);
|
||||
|
@ -5,7 +5,7 @@ import type { ExportFormat } from '../../interfaces/common.js';
|
||||
// Import specific encoders
|
||||
import { XRechnungEncoder } from '../ubl/xrechnung/xrechnung.encoder.js';
|
||||
import { FacturXEncoder } from '../cii/facturx/facturx.encoder.js';
|
||||
// import { ZUGFeRDEncoder } from '../cii/zugferd/zugferd.encoder.js';
|
||||
import { ZUGFeRDEncoder } from '../cii/zugferd/zugferd.encoder.js';
|
||||
|
||||
/**
|
||||
* Factory to create the appropriate encoder based on the target format
|
||||
@ -33,8 +33,8 @@ export class EncoderFactory {
|
||||
|
||||
case InvoiceFormat.ZUGFERD:
|
||||
case 'zugferd':
|
||||
// For now, use Factur-X encoder for ZUGFeRD
|
||||
return new FacturXEncoder();
|
||||
// Use dedicated ZUGFeRD encoder
|
||||
return new ZUGFeRDEncoder();
|
||||
|
||||
case InvoiceFormat.FACTURX:
|
||||
case 'facturx':
|
||||
|
@ -6,7 +6,7 @@ import { FormatDetector } from '../utils/format.detector.js';
|
||||
// import { UBLValidator } from '../ubl/ubl.validator.js';
|
||||
// import { XRechnungValidator } from '../ubl/xrechnung/xrechnung.validator.js';
|
||||
import { FacturXValidator } from '../cii/facturx/facturx.validator.js';
|
||||
// import { ZUGFeRDValidator } from '../cii/zugferd/zugferd.validator.js';
|
||||
import { ZUGFeRDValidator } from '../cii/zugferd/zugferd.validator.js';
|
||||
|
||||
/**
|
||||
* Factory to create the appropriate validator based on the XML format
|
||||
@ -34,8 +34,8 @@ export class ValidatorFactory {
|
||||
return new FacturXValidator(xml);
|
||||
|
||||
case InvoiceFormat.ZUGFERD:
|
||||
// For now, use Factur-X validator for ZUGFeRD
|
||||
return new FacturXValidator(xml);
|
||||
// Use dedicated ZUGFeRD validator
|
||||
return new ZUGFeRDValidator(xml);
|
||||
|
||||
case InvoiceFormat.FACTURX:
|
||||
return new FacturXValidator(xml);
|
||||
|
78
ts/formats/pdf/extractors/associated.extractor.ts
Normal file
78
ts/formats/pdf/extractors/associated.extractor.ts
Normal file
@ -0,0 +1,78 @@
|
||||
import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib';
|
||||
import { BaseXMLExtractor } from './base.extractor.js';
|
||||
|
||||
/**
|
||||
* Associated files extractor for PDF/A-3 documents
|
||||
* Extracts XML from associated files (AF entry in the catalog)
|
||||
* Particularly useful for ZUGFeRD v1 and some Factur-X documents
|
||||
*/
|
||||
export class AssociatedFilesExtractor extends BaseXMLExtractor {
|
||||
/**
|
||||
* Extract XML from a PDF buffer using associated files
|
||||
* @param pdfBuffer PDF buffer
|
||||
* @returns XML content or null if not found
|
||||
*/
|
||||
public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
|
||||
try {
|
||||
const pdfDoc = await PDFDocument.load(pdfBuffer);
|
||||
|
||||
// Try to find associated files via the AF entry in the catalog
|
||||
const afArray = pdfDoc.catalog.lookup(PDFName.of('AF'));
|
||||
if (!(afArray instanceof PDFArray)) {
|
||||
console.warn('No AF (Associated Files) entry found in PDF catalog');
|
||||
return null;
|
||||
}
|
||||
|
||||
// Process each associated file
|
||||
for (let i = 0; i < afArray.size(); i++) {
|
||||
const fileSpec = afArray.lookup(i);
|
||||
if (!(fileSpec instanceof PDFDict)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the file name
|
||||
const fileNameObj = fileSpec.lookup(PDFName.of('F')) || fileSpec.lookup(PDFName.of('UF'));
|
||||
if (!(fileNameObj instanceof PDFString)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const fileName = fileNameObj.decodeText();
|
||||
|
||||
// Check if it's a known invoice XML file name
|
||||
const isKnownFileName = this.knownFileNames.some(
|
||||
knownName => fileName.toLowerCase() === knownName.toLowerCase()
|
||||
);
|
||||
|
||||
// Check if it's any XML file or has invoice-related keywords
|
||||
const isXmlFile = fileName.toLowerCase().endsWith('.xml') ||
|
||||
fileName.toLowerCase().includes('zugferd') ||
|
||||
fileName.toLowerCase().includes('factur-x') ||
|
||||
fileName.toLowerCase().includes('xrechnung') ||
|
||||
fileName.toLowerCase().includes('invoice');
|
||||
|
||||
if (isKnownFileName || isXmlFile) {
|
||||
// Get the embedded file dictionary
|
||||
const efDict = fileSpec.lookup(PDFName.of('EF'));
|
||||
if (!(efDict instanceof PDFDict)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the file stream
|
||||
const fileStream = efDict.lookup(PDFName.of('F'));
|
||||
if (fileStream instanceof PDFRawStream) {
|
||||
const xmlContent = await this.extractXmlFromStream(fileStream, fileName);
|
||||
if (xmlContent) {
|
||||
return xmlContent;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.warn('No valid XML found in associated files');
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.error('Error in associated files extraction:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
177
ts/formats/pdf/extractors/base.extractor.ts
Normal file
177
ts/formats/pdf/extractors/base.extractor.ts
Normal file
@ -0,0 +1,177 @@
|
||||
import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib';
|
||||
import * as pako from 'pako';
|
||||
|
||||
/**
|
||||
* Base class for PDF XML extractors with common functionality
|
||||
*/
|
||||
export abstract class BaseXMLExtractor {
|
||||
/**
|
||||
* Known XML file names for different invoice formats
|
||||
*/
|
||||
protected readonly knownFileNames = [
|
||||
'factur-x.xml',
|
||||
'zugferd-invoice.xml',
|
||||
'ZUGFeRD-invoice.xml',
|
||||
'xrechnung.xml'
|
||||
];
|
||||
|
||||
/**
|
||||
* Known XML formats to validate extracted content
|
||||
*/
|
||||
protected readonly knownFormats = [
|
||||
'CrossIndustryInvoice',
|
||||
'CrossIndustryDocument',
|
||||
'Invoice',
|
||||
'CreditNote',
|
||||
'ubl:Invoice',
|
||||
'ubl:CreditNote',
|
||||
'rsm:CrossIndustryInvoice',
|
||||
'rsm:CrossIndustryDocument',
|
||||
'ram:CrossIndustryDocument',
|
||||
'urn:un:unece:uncefact',
|
||||
'urn:ferd:CrossIndustryDocument',
|
||||
'urn:zugferd',
|
||||
'urn:factur-x',
|
||||
'factur-x.eu',
|
||||
'ZUGFeRD'
|
||||
];
|
||||
|
||||
/**
|
||||
* Known XML end tags for extracting content from strings
|
||||
*/
|
||||
protected readonly knownEndTags = [
|
||||
'</CrossIndustryInvoice>',
|
||||
'</CrossIndustryDocument>',
|
||||
'</Invoice>',
|
||||
'</CreditNote>',
|
||||
'</rsm:CrossIndustryInvoice>',
|
||||
'</rsm:CrossIndustryDocument>',
|
||||
'</ram:CrossIndustryDocument>',
|
||||
'</ubl:Invoice>',
|
||||
'</ubl:CreditNote>'
|
||||
];
|
||||
|
||||
/**
|
||||
* Extract XML from a PDF buffer
|
||||
* @param pdfBuffer PDF buffer
|
||||
* @returns XML content or null if not found
|
||||
*/
|
||||
public abstract extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null>;
|
||||
|
||||
/**
|
||||
* Check if an XML string is valid
|
||||
* @param xmlString XML string to check
|
||||
* @returns True if the XML is valid
|
||||
*/
|
||||
protected isValidXml(xmlString: string): boolean {
|
||||
try {
|
||||
// Basic checks for XML validity
|
||||
if (!xmlString || typeof xmlString !== 'string') {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if it starts with XML declaration
|
||||
if (!xmlString.includes('<?xml')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if the XML string contains known invoice formats
|
||||
const hasKnownFormat = this.knownFormats.some(format => xmlString.includes(format));
|
||||
if (!hasKnownFormat) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if the XML string contains binary data or invalid characters
|
||||
const invalidChars = ['\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005'];
|
||||
const hasBinaryData = invalidChars.some(char => xmlString.includes(char));
|
||||
if (hasBinaryData) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if the XML string is too short
|
||||
if (xmlString.length < 100) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('Error validating XML:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract XML from a string
|
||||
* @param text Text to extract XML from
|
||||
* @param startIndex Index to start extraction from
|
||||
* @returns XML content or null if not found
|
||||
*/
|
||||
protected extractXmlFromString(text: string, startIndex: number = 0): string | null {
|
||||
try {
|
||||
// Find the start of the XML document
|
||||
const xmlStartIndex = text.indexOf('<?xml', startIndex);
|
||||
if (xmlStartIndex === -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Try to find the end of the XML document
|
||||
let xmlEndIndex = -1;
|
||||
for (const endTag of this.knownEndTags) {
|
||||
const endIndex = text.indexOf(endTag, xmlStartIndex);
|
||||
if (endIndex !== -1) {
|
||||
xmlEndIndex = endIndex + endTag.length;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (xmlEndIndex === -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Extract the XML content
|
||||
return text.substring(xmlStartIndex, xmlEndIndex);
|
||||
} catch (error) {
|
||||
console.error('Error extracting XML from string:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decompress and decode XML content from a PDF stream
|
||||
* @param stream PDF stream containing XML data
|
||||
* @param fileName Name of the file (for logging)
|
||||
* @returns XML content or null if not valid
|
||||
*/
|
||||
protected async extractXmlFromStream(stream: PDFRawStream, fileName: string): Promise<string | null> {
|
||||
try {
|
||||
// Try to decompress with pako
|
||||
const compressedBytes = stream.getContents().buffer;
|
||||
try {
|
||||
const decompressedBytes = pako.inflate(compressedBytes);
|
||||
const xmlContent = new TextDecoder('utf-8').decode(decompressedBytes);
|
||||
|
||||
if (this.isValidXml(xmlContent)) {
|
||||
console.log(`Successfully extracted decompressed XML from PDF file. File name: ${fileName}`);
|
||||
return xmlContent;
|
||||
}
|
||||
} catch (decompressError) {
|
||||
// Decompression failed, try without decompression
|
||||
console.log(`Decompression failed for ${fileName}, trying without decompression...`);
|
||||
}
|
||||
|
||||
// Try without decompression
|
||||
const rawBytes = stream.getContents();
|
||||
const rawContent = new TextDecoder('utf-8').decode(rawBytes);
|
||||
|
||||
if (this.isValidXml(rawContent)) {
|
||||
console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${fileName}`);
|
||||
return rawContent;
|
||||
}
|
||||
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.error('Error extracting XML from stream:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
4
ts/formats/pdf/extractors/index.ts
Normal file
4
ts/formats/pdf/extractors/index.ts
Normal file
@ -0,0 +1,4 @@
|
||||
export * from './base.extractor.js';
|
||||
export * from './standard.extractor.js';
|
||||
export * from './associated.extractor.js';
|
||||
export * from './text.extractor.js';
|
86
ts/formats/pdf/extractors/standard.extractor.ts
Normal file
86
ts/formats/pdf/extractors/standard.extractor.ts
Normal file
@ -0,0 +1,86 @@
|
||||
import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib';
|
||||
import { BaseXMLExtractor } from './base.extractor.js';
|
||||
|
||||
/**
|
||||
* Standard PDF XML extractor that extracts XML from embedded files
|
||||
* Works with PDF/A-3 documents that follow the standard for embedding files
|
||||
*/
|
||||
export class StandardXMLExtractor extends BaseXMLExtractor {
|
||||
/**
|
||||
* Extract XML from a PDF buffer using standard PDF/A-3 embedded files
|
||||
* @param pdfBuffer PDF buffer
|
||||
* @returns XML content or null if not found
|
||||
*/
|
||||
public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
|
||||
try {
|
||||
const pdfDoc = await PDFDocument.load(pdfBuffer);
|
||||
|
||||
// Get the document's metadata dictionary
|
||||
const namesDictObj = pdfDoc.catalog.lookup(PDFName.of('Names'));
|
||||
if (!(namesDictObj instanceof PDFDict)) {
|
||||
console.warn('No Names dictionary found in PDF! This PDF does not contain embedded files.');
|
||||
return null;
|
||||
}
|
||||
|
||||
// Get the embedded files dictionary
|
||||
const embeddedFilesDictObj = namesDictObj.lookup(PDFName.of('EmbeddedFiles'));
|
||||
if (!(embeddedFilesDictObj instanceof PDFDict)) {
|
||||
console.warn('No EmbeddedFiles dictionary found! This PDF does not contain embedded files.');
|
||||
return null;
|
||||
}
|
||||
|
||||
// Get the names array
|
||||
const filesSpecObj = embeddedFilesDictObj.lookup(PDFName.of('Names'));
|
||||
if (!(filesSpecObj instanceof PDFArray)) {
|
||||
console.warn('No files specified in EmbeddedFiles dictionary!');
|
||||
return null;
|
||||
}
|
||||
|
||||
// Try to find an XML file in the embedded files
|
||||
for (let i = 0; i < filesSpecObj.size(); i += 2) {
|
||||
const fileNameObj = filesSpecObj.lookup(i);
|
||||
const fileSpecObj = filesSpecObj.lookup(i + 1);
|
||||
|
||||
if (!(fileNameObj instanceof PDFString) || !(fileSpecObj instanceof PDFDict)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the filename as string
|
||||
const fileName = fileNameObj.decodeText();
|
||||
|
||||
// Check if it's a known invoice XML file name
|
||||
const isKnownFileName = this.knownFileNames.some(
|
||||
knownName => fileName.toLowerCase() === knownName.toLowerCase()
|
||||
);
|
||||
|
||||
// Check if it's any XML file or has invoice-related keywords
|
||||
const isXmlFile = fileName.toLowerCase().endsWith('.xml') ||
|
||||
fileName.toLowerCase().includes('zugferd') ||
|
||||
fileName.toLowerCase().includes('factur-x') ||
|
||||
fileName.toLowerCase().includes('xrechnung') ||
|
||||
fileName.toLowerCase().includes('invoice');
|
||||
|
||||
if (isKnownFileName || isXmlFile) {
|
||||
const efDictObj = fileSpecObj.lookup(PDFName.of('EF'));
|
||||
if (!(efDictObj instanceof PDFDict)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const fileStream = efDictObj.lookup(PDFName.of('F'));
|
||||
if (fileStream instanceof PDFRawStream) {
|
||||
const xmlContent = await this.extractXmlFromStream(fileStream, fileName);
|
||||
if (xmlContent) {
|
||||
return xmlContent;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.warn('No valid XML found in embedded files');
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.error('Error in standard extraction:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
55
ts/formats/pdf/extractors/text.extractor.ts
Normal file
55
ts/formats/pdf/extractors/text.extractor.ts
Normal file
@ -0,0 +1,55 @@
|
||||
import { BaseXMLExtractor } from './base.extractor.js';
|
||||
|
||||
/**
|
||||
* Text-based XML extractor for PDF documents
|
||||
* Extracts XML by searching for XML patterns in the PDF text
|
||||
* Used as a fallback when other extraction methods fail
|
||||
*/
|
||||
export class TextXMLExtractor extends BaseXMLExtractor {
|
||||
/**
|
||||
* Extract XML from a PDF buffer by searching for XML patterns in the text
|
||||
* @param pdfBuffer PDF buffer
|
||||
* @returns XML content or null if not found
|
||||
*/
|
||||
public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
|
||||
try {
|
||||
// Convert buffer to string and look for XML patterns
|
||||
// Increase the search range to handle larger PDFs
|
||||
const pdfString = Buffer.from(pdfBuffer).toString('utf8', 0, Math.min(pdfBuffer.length, 50000));
|
||||
|
||||
// Look for common XML patterns in the PDF
|
||||
const xmlPatterns = [
|
||||
/<\?xml[^>]*\?>/i,
|
||||
/<CrossIndustryInvoice[^>]*>/i,
|
||||
/<CrossIndustryDocument[^>]*>/i,
|
||||
/<Invoice[^>]*>/i,
|
||||
/<CreditNote[^>]*>/i,
|
||||
/<rsm:CrossIndustryInvoice[^>]*>/i,
|
||||
/<rsm:CrossIndustryDocument[^>]*>/i,
|
||||
/<ram:CrossIndustryDocument[^>]*>/i,
|
||||
/<ubl:Invoice[^>]*>/i,
|
||||
/<ubl:CreditNote[^>]*>/i
|
||||
];
|
||||
|
||||
for (const pattern of xmlPatterns) {
|
||||
const match = pdfString.match(pattern);
|
||||
if (match && match.index !== undefined) {
|
||||
console.log(`Found XML pattern in PDF: ${match[0]}`);
|
||||
|
||||
// Try to extract the XML content
|
||||
const xmlContent = this.extractXmlFromString(pdfString, match.index);
|
||||
if (xmlContent && this.isValidXml(xmlContent)) {
|
||||
console.log('Successfully extracted XML from PDF text');
|
||||
return xmlContent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.warn('No valid XML found in PDF text');
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.error('Error in text-based extraction:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,30 +1,54 @@
|
||||
import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib';
|
||||
import * as pako from 'pako';
|
||||
import {
|
||||
BaseXMLExtractor,
|
||||
StandardXMLExtractor,
|
||||
AssociatedFilesExtractor,
|
||||
TextXMLExtractor
|
||||
} from './extractors/index.js';
|
||||
|
||||
/**
|
||||
* Class for extracting XML from PDF files
|
||||
* Main PDF extractor class that orchestrates the extraction process
|
||||
* Uses multiple specialized extractors in sequence to maximize success rate
|
||||
*/
|
||||
export class PDFExtractor {
|
||||
private extractors: BaseXMLExtractor[] = [];
|
||||
|
||||
/**
|
||||
* Extracts XML from a PDF buffer
|
||||
* Constructor initializes the chain of extractors
|
||||
*/
|
||||
constructor() {
|
||||
// Add extractors in order of preference/likelihood of success
|
||||
this.extractors.push(
|
||||
new StandardXMLExtractor(), // Standard PDF/A-3 embedded files
|
||||
new AssociatedFilesExtractor(), // Associated files (ZUGFeRD v1, some Factur-X)
|
||||
new TextXMLExtractor() // Text-based extraction (fallback)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract XML from a PDF buffer
|
||||
* Tries multiple extraction methods in sequence
|
||||
* @param pdfBuffer PDF buffer
|
||||
* @returns XML content or null if not found
|
||||
*/
|
||||
public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
|
||||
try {
|
||||
// First try the standard extraction
|
||||
const standardXml = await this.standardExtraction(pdfBuffer);
|
||||
if (standardXml && this.isValidXml(standardXml)) {
|
||||
return standardXml;
|
||||
console.log('Starting XML extraction from PDF...');
|
||||
|
||||
// Try each extractor in sequence
|
||||
for (const extractor of this.extractors) {
|
||||
const extractorName = extractor.constructor.name;
|
||||
console.log(`Trying extraction with ${extractorName}...`);
|
||||
|
||||
const xml = await extractor.extractXml(pdfBuffer);
|
||||
if (xml) {
|
||||
console.log(`Successfully extracted XML using ${extractorName}`);
|
||||
return xml;
|
||||
}
|
||||
|
||||
console.log(`Extraction with ${extractorName} failed, trying next method...`);
|
||||
}
|
||||
|
||||
// If standard extraction fails, try alternative methods
|
||||
const alternativeXml = await this.alternativeExtraction(pdfBuffer);
|
||||
if (alternativeXml && this.isValidXml(alternativeXml)) {
|
||||
return alternativeXml;
|
||||
}
|
||||
|
||||
// If all extraction methods fail, return null
|
||||
// If all extractors fail, return null
|
||||
console.warn('All extraction methods failed, no valid XML found in PDF');
|
||||
return null;
|
||||
} catch (error) {
|
||||
@ -33,255 +57,7 @@ export class PDFExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Standard extraction method using PDF-lib
|
||||
* @param pdfBuffer PDF buffer
|
||||
* @returns XML content or null if not found
|
||||
*/
|
||||
private async standardExtraction(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
|
||||
try {
|
||||
const pdfDoc = await PDFDocument.load(pdfBuffer);
|
||||
|
||||
// Get the document's metadata dictionary
|
||||
const namesDictObj = pdfDoc.catalog.lookup(PDFName.of('Names'));
|
||||
if (!(namesDictObj instanceof PDFDict)) {
|
||||
console.warn('No Names dictionary found in PDF! This PDF does not contain embedded files.');
|
||||
return null;
|
||||
}
|
||||
|
||||
const embeddedFilesDictObj = namesDictObj.lookup(PDFName.of('EmbeddedFiles'));
|
||||
if (!(embeddedFilesDictObj instanceof PDFDict)) {
|
||||
console.warn('No EmbeddedFiles dictionary found! This PDF does not contain embedded files.');
|
||||
return null;
|
||||
}
|
||||
|
||||
const filesSpecObj = embeddedFilesDictObj.lookup(PDFName.of('Names'));
|
||||
if (!(filesSpecObj instanceof PDFArray)) {
|
||||
console.warn('No files specified in EmbeddedFiles dictionary!');
|
||||
return null;
|
||||
}
|
||||
|
||||
// Try to find an XML file in the embedded files
|
||||
let xmlFile: PDFRawStream | undefined;
|
||||
let xmlFileName: string | undefined;
|
||||
|
||||
for (let i = 0; i < filesSpecObj.size(); i += 2) {
|
||||
const fileNameObj = filesSpecObj.lookup(i);
|
||||
const fileSpecObj = filesSpecObj.lookup(i + 1);
|
||||
|
||||
if (!(fileNameObj instanceof PDFString)) {
|
||||
continue;
|
||||
}
|
||||
if (!(fileSpecObj instanceof PDFDict)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the filename as string
|
||||
const fileName = fileNameObj.toString();
|
||||
|
||||
// Check if it's an XML file (checking both extension and known standard filenames)
|
||||
if (fileName.toLowerCase().includes('.xml') ||
|
||||
fileName.toLowerCase().includes('factur-x') ||
|
||||
fileName.toLowerCase().includes('zugferd') ||
|
||||
fileName.toLowerCase().includes('xrechnung')) {
|
||||
|
||||
const efDictObj = fileSpecObj.lookup(PDFName.of('EF'));
|
||||
if (!(efDictObj instanceof PDFDict)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const maybeStream = efDictObj.lookup(PDFName.of('F'));
|
||||
if (maybeStream instanceof PDFRawStream) {
|
||||
// Found an XML file - save it
|
||||
xmlFile = maybeStream;
|
||||
xmlFileName = fileName;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no XML file was found, return null
|
||||
if (!xmlFile) {
|
||||
console.warn('No embedded XML file found in the PDF!');
|
||||
return null;
|
||||
}
|
||||
|
||||
// Decompress and decode the XML content
|
||||
try {
|
||||
// Try to decompress with pako
|
||||
const xmlCompressedBytes = xmlFile.getContents().buffer;
|
||||
const xmlBytes = pako.inflate(xmlCompressedBytes);
|
||||
const xmlContent = new TextDecoder('utf-8').decode(xmlBytes);
|
||||
|
||||
// Check if the XML content is valid
|
||||
if (this.isValidXml(xmlContent)) {
|
||||
console.log(`Successfully extracted XML from PDF file. File name: ${xmlFileName}`);
|
||||
return xmlContent;
|
||||
}
|
||||
|
||||
// If we get here, the XML content is not valid, try without decompression
|
||||
console.log('Decompression succeeded but XML is not valid, trying without decompression...');
|
||||
const rawXmlBytes = xmlFile.getContents();
|
||||
const rawXmlContent = new TextDecoder('utf-8').decode(rawXmlBytes);
|
||||
|
||||
if (this.isValidXml(rawXmlContent)) {
|
||||
console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${xmlFileName}`);
|
||||
return rawXmlContent;
|
||||
}
|
||||
|
||||
// If we get here, neither the decompressed nor the raw XML content is valid
|
||||
console.log('Neither decompressed nor raw XML content is valid');
|
||||
return null;
|
||||
} catch (decompressError) {
|
||||
// Decompression failed, try without decompression
|
||||
console.log('Decompression failed, trying without decompression...');
|
||||
try {
|
||||
const xmlBytes = xmlFile.getContents();
|
||||
const xmlContent = new TextDecoder('utf-8').decode(xmlBytes);
|
||||
|
||||
if (this.isValidXml(xmlContent)) {
|
||||
console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${xmlFileName}`);
|
||||
return xmlContent;
|
||||
}
|
||||
|
||||
// If we get here, the XML content is not valid
|
||||
console.log('Uncompressed XML content is not valid');
|
||||
return null;
|
||||
} catch (decodeError) {
|
||||
console.error('Error decoding XML content:', decodeError);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error in standard extraction:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Alternative extraction method using string search
|
||||
* @param pdfBuffer PDF buffer
|
||||
* @returns XML content or null if not found
|
||||
*/
|
||||
private async alternativeExtraction(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
|
||||
try {
|
||||
// Convert buffer to string and look for XML patterns
|
||||
const pdfString = Buffer.from(pdfBuffer).toString('utf8', 0, Math.min(pdfBuffer.length, 10000));
|
||||
|
||||
// Look for common XML patterns in the PDF
|
||||
const xmlPatterns = [
|
||||
/<\?xml[^>]*\?>/i,
|
||||
/<CrossIndustryInvoice[^>]*>/i,
|
||||
/<Invoice[^>]*>/i,
|
||||
/<CreditNote[^>]*>/i,
|
||||
/<rsm:CrossIndustryInvoice[^>]*>/i
|
||||
];
|
||||
|
||||
for (const pattern of xmlPatterns) {
|
||||
const match = pdfString.match(pattern);
|
||||
if (match) {
|
||||
console.log(`Found XML pattern in PDF: ${match[0]}`);
|
||||
|
||||
// Try to extract the XML content
|
||||
const xmlContent = this.extractXmlFromString(pdfString);
|
||||
if (xmlContent) {
|
||||
console.log('Successfully extracted XML from PDF string');
|
||||
return xmlContent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.error('Error in alternative extraction:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts XML from a string
|
||||
* @param pdfString PDF string
|
||||
* @returns XML content or null if not found
|
||||
*/
|
||||
private extractXmlFromString(pdfString: string): string | null {
|
||||
try {
|
||||
// Look for XML start and end tags
|
||||
const xmlStartIndex = pdfString.indexOf('<?xml');
|
||||
if (xmlStartIndex === -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Try to find the end of the XML document
|
||||
const possibleEndTags = [
|
||||
'</CrossIndustryInvoice>',
|
||||
'</Invoice>',
|
||||
'</CreditNote>',
|
||||
'</rsm:CrossIndustryInvoice>'
|
||||
];
|
||||
|
||||
let xmlEndIndex = -1;
|
||||
for (const endTag of possibleEndTags) {
|
||||
const endIndex = pdfString.indexOf(endTag);
|
||||
if (endIndex !== -1) {
|
||||
xmlEndIndex = endIndex + endTag.length;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (xmlEndIndex === -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Extract the XML content
|
||||
return pdfString.substring(xmlStartIndex, xmlEndIndex);
|
||||
} catch (error) {
|
||||
console.error('Error extracting XML from string:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if an XML string is valid
|
||||
* @param xmlString XML string to check
|
||||
* @returns True if the XML is valid
|
||||
*/
|
||||
private isValidXml(xmlString: string): boolean {
|
||||
try {
|
||||
// Check if the XML string contains basic XML structure
|
||||
if (!xmlString.includes('<?xml')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if the XML string contains known invoice formats
|
||||
const knownFormats = [
|
||||
'CrossIndustryInvoice',
|
||||
'Invoice',
|
||||
'CreditNote',
|
||||
'ubl:Invoice',
|
||||
'ubl:CreditNote'
|
||||
];
|
||||
|
||||
const hasKnownFormat = knownFormats.some(format => xmlString.includes(format));
|
||||
if (!hasKnownFormat) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if the XML string contains binary data or invalid characters
|
||||
const invalidChars = ['\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005'];
|
||||
const hasBinaryData = invalidChars.some(char => xmlString.includes(char));
|
||||
if (hasBinaryData) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if the XML string is too short
|
||||
if (xmlString.length < 100) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('Error validating XML:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
import { InvoiceFormat } from '../../interfaces/common.js';
|
||||
import { DOMParser } from 'xmldom';
|
||||
import * as xpath from 'xpath';
|
||||
import { CII_PROFILE_IDS, ZUGFERD_V1_NAMESPACES } from '../cii/cii.types.js';
|
||||
|
||||
/**
|
||||
* Utility class for detecting invoice formats
|
||||
@ -26,11 +28,91 @@ export class FormatDetector {
|
||||
return InvoiceFormat.XRECHNUNG;
|
||||
}
|
||||
|
||||
// Factur-X/ZUGFeRD detection (CrossIndustryInvoice root element)
|
||||
// Factur-X/ZUGFeRD detection (CrossIndustryInvoice or CrossIndustryDocument root element)
|
||||
if (root.nodeName === 'rsm:CrossIndustryInvoice' || root.nodeName === 'CrossIndustryInvoice') {
|
||||
// For simplicity, we'll treat all CII documents as Factur-X for now
|
||||
// In a real implementation, we would check for specific profiles
|
||||
return InvoiceFormat.FACTURX;
|
||||
// Set up namespaces for XPath queries (ZUGFeRD v2/Factur-X)
|
||||
const namespaces = {
|
||||
rsm: 'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100',
|
||||
ram: 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100'
|
||||
};
|
||||
|
||||
// Create XPath selector with namespaces
|
||||
const select = xpath.useNamespaces(namespaces);
|
||||
|
||||
// Look for profile identifier
|
||||
const profileNode = select(
|
||||
'string(//rsm:ExchangedDocumentContext/ram:GuidelineSpecifiedDocumentContextParameter/ram:ID)',
|
||||
doc
|
||||
);
|
||||
|
||||
if (profileNode) {
|
||||
const profileText = profileNode.toString();
|
||||
|
||||
// Check for ZUGFeRD profiles
|
||||
if (profileText.includes('zugferd') ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_BASIC ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_COMFORT ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_EXTENDED) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
|
||||
// Check for Factur-X profiles
|
||||
if (profileText.includes('factur-x') ||
|
||||
profileText === CII_PROFILE_IDS.FACTURX_MINIMUM ||
|
||||
profileText === CII_PROFILE_IDS.FACTURX_BASIC ||
|
||||
profileText === CII_PROFILE_IDS.FACTURX_EN16931) {
|
||||
return InvoiceFormat.FACTURX;
|
||||
}
|
||||
}
|
||||
|
||||
// If we can't determine the specific CII format, default to generic CII
|
||||
return InvoiceFormat.CII;
|
||||
}
|
||||
|
||||
// ZUGFeRD v1 detection (CrossIndustryDocument root element)
|
||||
if (root.nodeName === 'rsm:CrossIndustryDocument' || root.nodeName === 'CrossIndustryDocument' ||
|
||||
root.nodeName === 'ram:CrossIndustryDocument') {
|
||||
|
||||
// Check for ZUGFeRD v1 namespace in the document
|
||||
const xmlString = xml.toString();
|
||||
if (xmlString.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') ||
|
||||
xmlString.includes('urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12')) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
|
||||
// Set up namespaces for XPath queries (ZUGFeRD v1)
|
||||
try {
|
||||
const namespaces = {
|
||||
rsm: ZUGFERD_V1_NAMESPACES.RSM,
|
||||
ram: ZUGFERD_V1_NAMESPACES.RAM
|
||||
};
|
||||
|
||||
// Create XPath selector with namespaces
|
||||
const select = xpath.useNamespaces(namespaces);
|
||||
|
||||
// Look for profile identifier
|
||||
const profileNode = select(
|
||||
'string(//rsm:SpecifiedExchangedDocumentContext/ram:GuidelineSpecifiedDocumentContextParameter/ram:ID)',
|
||||
doc
|
||||
);
|
||||
|
||||
if (profileNode) {
|
||||
const profileText = profileNode.toString();
|
||||
|
||||
// Check for ZUGFeRD v1 profiles
|
||||
if (profileText.includes('ferd:CrossIndustryDocument:invoice:1p0') ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_V1_BASIC ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_V1_COMFORT ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_V1_EXTENDED) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('Error in ZUGFeRD v1 XPath detection:', error);
|
||||
}
|
||||
|
||||
// If we can't determine the specific profile but it's a CrossIndustryDocument, it's likely ZUGFeRD v1
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
|
||||
// FatturaPA detection would be implemented here
|
||||
|
30
ts/index.ts
30
ts/index.ts
@ -27,6 +27,12 @@ import { CIIBaseValidator } from './formats/cii/cii.validator.js';
|
||||
// Import PDF utilities
|
||||
import { PDFEmbedder } from './formats/pdf/pdf.embedder.js';
|
||||
import { PDFExtractor } from './formats/pdf/pdf.extractor.js';
|
||||
import {
|
||||
BaseXMLExtractor,
|
||||
StandardXMLExtractor,
|
||||
AssociatedFilesExtractor,
|
||||
TextXMLExtractor
|
||||
} from './formats/pdf/extractors/index.js';
|
||||
|
||||
// Import format detector
|
||||
import { FormatDetector } from './formats/utils/format.detector.js';
|
||||
@ -36,6 +42,12 @@ import { FacturXDecoder } from './formats/cii/facturx/facturx.decoder.js';
|
||||
import { FacturXEncoder } from './formats/cii/facturx/facturx.encoder.js';
|
||||
import { FacturXValidator } from './formats/cii/facturx/facturx.validator.js';
|
||||
|
||||
// Import ZUGFeRD implementation
|
||||
import { ZUGFeRDDecoder } from './formats/cii/zugferd/zugferd.decoder.js';
|
||||
import { ZUGFeRDEncoder } from './formats/cii/zugferd/zugferd.encoder.js';
|
||||
import { ZUGFeRDValidator } from './formats/cii/zugferd/zugferd.validator.js';
|
||||
import { ZUGFeRDV1Decoder } from './formats/cii/zugferd/zugferd.v1.decoder.js';
|
||||
|
||||
// Export interfaces
|
||||
export type {
|
||||
// Common interfaces
|
||||
@ -46,12 +58,12 @@ export type {
|
||||
TLetterEnvelope,
|
||||
TDocumentEnvelope,
|
||||
IPdf,
|
||||
|
||||
|
||||
// Validation interfaces
|
||||
ValidationError,
|
||||
ValidationResult,
|
||||
IValidator,
|
||||
|
||||
|
||||
// Format interfaces
|
||||
ExportFormat,
|
||||
XInvoiceOptions
|
||||
@ -80,8 +92,18 @@ export { CIIBaseDecoder, CIIBaseEncoder, CIIBaseValidator };
|
||||
// Export Factur-X implementation
|
||||
export { FacturXDecoder, FacturXEncoder, FacturXValidator };
|
||||
|
||||
// Export ZUGFeRD implementation
|
||||
export { ZUGFeRDDecoder, ZUGFeRDEncoder, ZUGFeRDValidator, ZUGFeRDV1Decoder };
|
||||
|
||||
// Export PDF utilities
|
||||
export { PDFEmbedder, PDFExtractor };
|
||||
export {
|
||||
PDFEmbedder,
|
||||
PDFExtractor,
|
||||
BaseXMLExtractor,
|
||||
StandardXMLExtractor,
|
||||
AssociatedFilesExtractor,
|
||||
TextXMLExtractor
|
||||
};
|
||||
|
||||
// Export format detector
|
||||
export { FormatDetector };
|
||||
@ -93,7 +115,7 @@ export { FormatDetector };
|
||||
* @returns ValidationResult with the result of validation
|
||||
*/
|
||||
export function validateXml(
|
||||
xml: string,
|
||||
xml: string,
|
||||
level: common.ValidationLevel = common.ValidationLevel.SYNTAX
|
||||
): common.ValidationResult {
|
||||
try {
|
||||
|
Loading…
x
Reference in New Issue
Block a user