feat(ZUGFERD): Add dedicated ZUGFERD v1/v2 support and refine invoice format detection logic

2025-04-03 20:08:02 +00:00
parent b4a95de482
commit 46331c2bf6
28 changed files with 1191 additions and 294 deletions
--- a/ts/formats/cii/cii.types.ts
+++ b/ts/formats/cii/cii.types.ts
@@ -2,13 +2,20 @@
 * CII-specific types and constants
 */

-// CII namespaces
+// CII namespaces (ZUGFeRD v2/Factur-X)
 export const CII_NAMESPACES = {
  RSM: 'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100',
  RAM: 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100',
  UDT: 'urn:un:unece:uncefact:data:standard:UnqualifiedDataType:100'
 };

+// ZUGFeRD v1 namespaces
+export const ZUGFERD_V1_NAMESPACES = {
+  RSM: 'urn:ferd:CrossIndustryDocument:invoice:1p0',
+  RAM: 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12',
+  UDT: 'urn:un:unece:uncefact:data:standard:UnqualifiedDataType:15'
+};
+
 // CII profiles
 export enum CIIProfile {
  BASIC = 'BASIC',
@@ -20,10 +27,18 @@ export enum CIIProfile {

 // CII profile IDs for different formats
 export const CII_PROFILE_IDS = {
+  // Factur-X profiles
  FACTURX_MINIMUM: 'urn:factur-x.eu:1p0:minimum',
  FACTURX_BASIC: 'urn:factur-x.eu:1p0:basicwl',
  FACTURX_EN16931: 'urn:cen.eu:en16931:2017',
+
+  // ZUGFeRD v2 profiles
  ZUGFERD_BASIC: 'urn:zugferd:basic',
  ZUGFERD_COMFORT: 'urn:zugferd:comfort',
-  ZUGFERD_EXTENDED: 'urn:zugferd:extended'
+  ZUGFERD_EXTENDED: 'urn:zugferd:extended',
+
+  // ZUGFeRD v1 profiles
+  ZUGFERD_V1_BASIC: 'urn:ferd:CrossIndustryDocument:invoice:1p0:basic',
+  ZUGFERD_V1_COMFORT: 'urn:ferd:CrossIndustryDocument:invoice:1p0:comfort',
+  ZUGFERD_V1_EXTENDED: 'urn:ferd:CrossIndustryDocument:invoice:1p0:extended'
 };
--- a/ts/formats/cii/zugferd/zugferd.decoder.ts
+++ b/ts/formats/cii/zugferd/zugferd.decoder.ts
@@ -0,0 +1,220 @@
+import { CIIBaseDecoder } from '../cii.decoder.js';
+import type { TInvoice, TCreditNote, TDebitNote } from '../../../interfaces/common.js';
+import { ZUGFERD_PROFILE_IDS } from './zugferd.types.js';
+import { business, finance, general } from '@tsclass/tsclass';
+
+/**
+ * Decoder for ZUGFeRD invoice format
+ */
+export class ZUGFeRDDecoder extends CIIBaseDecoder {
+  /**
+   * Decodes a ZUGFeRD credit note
+   * @returns Promise resolving to a TCreditNote object
+   */
+  protected async decodeCreditNote(): Promise<TCreditNote> {
+    // Get common invoice data
+    const commonData = await this.extractCommonData();
+
+    // Create a credit note with the common data
+    return {
+      ...commonData,
+      invoiceType: 'creditnote'
+    } as TCreditNote;
+  }
+
+  /**
+   * Decodes a ZUGFeRD debit note (invoice)
+   * @returns Promise resolving to a TDebitNote object
+   */
+  protected async decodeDebitNote(): Promise<TDebitNote> {
+    // Get common invoice data
+    const commonData = await this.extractCommonData();
+
+    // Create a debit note with the common data
+    return {
+      ...commonData,
+      invoiceType: 'debitnote'
+    } as TDebitNote;
+  }
+
+  /**
+   * Extracts common invoice data from ZUGFeRD XML
+   * @returns Common invoice data
+   */
+  private async extractCommonData(): Promise<Partial<TInvoice>> {
+    // Extract invoice ID
+    const invoiceId = this.getText('//rsm:ExchangedDocument/ram:ID');
+
+    // Extract issue date
+    const issueDateStr = this.getText('//ram:IssueDateTime/udt:DateTimeString');
+    const issueDate = issueDateStr ? new Date(issueDateStr).getTime() : Date.now();
+
+    // Extract seller information
+    const seller = this.extractParty('//ram:SellerTradeParty');
+
+    // Extract buyer information
+    const buyer = this.extractParty('//ram:BuyerTradeParty');
+
+    // Extract items
+    const items = this.extractItems();
+
+    // Extract due date
+    const dueDateStr = this.getText('//ram:SpecifiedTradePaymentTerms/ram:DueDateDateTime/udt:DateTimeString');
+    const dueDate = dueDateStr ? new Date(dueDateStr).getTime() : Date.now();
+    const dueInDays = Math.round((dueDate - issueDate) / (1000 * 60 * 60 * 24));
+
+    // Extract currency
+    const currencyCode = this.getText('//ram:InvoiceCurrencyCode') || 'EUR';
+
+    // Extract total amount
+    const totalAmount = this.getNumber('//ram:GrandTotalAmount');
+
+    // Extract notes
+    const notes = this.extractNotes();
+
+    // Check for reverse charge
+    const reverseCharge = this.exists('//ram:SpecifiedTradeAllowanceCharge/ram:ReasonCode[text()="62"]');
+
+    // Create the common invoice data
+    return {
+      type: 'invoice',
+      id: invoiceId,
+      date: issueDate,
+      status: 'invoice',
+      versionInfo: {
+        type: 'final',
+        version: '1.0.0'
+      },
+      language: 'en',
+      incidenceId: invoiceId,
+      from: seller,
+      to: buyer,
+      subject: `Invoice ${invoiceId}`,
+      items: items,
+      dueInDays: dueInDays,
+      reverseCharge: reverseCharge,
+      currency: currencyCode as finance.TCurrency,
+      notes: notes,
+      deliveryDate: issueDate,
+      objectActions: [],
+      invoiceType: 'debitnote' // Default to debit note, will be overridden in decode methods
+    };
+  }
+
+  /**
+   * Extracts party information from ZUGFeRD XML
+   * @param partyXPath XPath to the party node
+   * @returns Party information as TContact
+   */
+  private extractParty(partyXPath: string): business.TContact {
+    // Extract name
+    const name = this.getText(`${partyXPath}/ram:Name`);
+
+    // Extract address
+    const street = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:LineOne`);
+    const city = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:CityName`);
+    const zip = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:PostcodeCode`);
+    const country = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:CountryID`);
+
+    // Create address object
+    const address = {
+      street: street,
+      city: city,
+      zip: zip,
+      country: country
+    };
+
+    // Extract VAT ID
+    const vatId = this.getText(`${partyXPath}/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="VA"]`) || '';
+
+    // Extract registration ID
+    const registrationId = this.getText(`${partyXPath}/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="FC"]`) || '';
+
+    // Create contact object
+    return {
+      type: 'company',
+      name: name,
+      description: '',
+      address: address,
+      status: 'active',
+      foundedDate: this.createDefaultDate(),
+      registrationDetails: {
+        vatId: vatId,
+        registrationId: registrationId,
+        registrationName: ''
+      }
+    } as business.TContact;
+  }
+
+  /**
+   * Extracts invoice items from ZUGFeRD XML
+   * @returns Array of invoice items
+   */
+  private extractItems(): finance.TInvoiceItem[] {
+    const items: finance.TInvoiceItem[] = [];
+
+    // Get all item nodes
+    const itemNodes = this.select('//ram:IncludedSupplyChainTradeLineItem', this.doc);
+
+    // Process each item
+    if (Array.isArray(itemNodes)) {
+      for (let i = 0; i < itemNodes.length; i++) {
+        const itemNode = itemNodes[i];
+
+        // Extract item data
+        const name = this.getText('ram:SpecifiedTradeProduct/ram:Name', itemNode);
+        const articleNumber = this.getText('ram:SpecifiedTradeProduct/ram:SellerAssignedID', itemNode);
+        const unitQuantity = this.getNumber('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity', itemNode);
+        const unitType = this.getText('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity/@unitCode', itemNode) || 'EA';
+        const unitNetPrice = this.getNumber('ram:SpecifiedLineTradeAgreement/ram:NetPriceProductTradePrice/ram:ChargeAmount', itemNode);
+        const vatPercentage = this.getNumber('ram:SpecifiedLineTradeSettlement/ram:ApplicableTradeTax/ram:RateApplicablePercent', itemNode);
+
+        // Create item object
+        items.push({
+          position: i + 1,
+          name: name,
+          articleNumber: articleNumber,
+          unitType: unitType,
+          unitQuantity: unitQuantity,
+          unitNetPrice: unitNetPrice,
+          vatPercentage: vatPercentage
+        });
+      }
+    }
+
+    return items;
+  }
+
+  /**
+   * Extracts notes from ZUGFeRD XML
+   * @returns Array of notes
+   */
+  private extractNotes(): string[] {
+    const notes: string[] = [];
+
+    // Get all note nodes
+    const noteNodes = this.select('//ram:IncludedNote', this.doc);
+
+    // Process each note
+    if (Array.isArray(noteNodes)) {
+      for (let i = 0; i < noteNodes.length; i++) {
+        const noteNode = noteNodes[i];
+        const noteText = this.getText('ram:Content', noteNode);
+
+        if (noteText) {
+          notes.push(noteText);
+        }
+      }
+    }
+
+    return notes;
+  }
+
+  /**
+   * Creates a default date for empty date fields
+   * @returns Default date as timestamp
+   */
+  private createDefaultDate(): number {
+    return new Date('2000-01-01').getTime();
+  }
+}
--- a/ts/formats/cii/zugferd/zugferd.encoder.ts
+++ b/ts/formats/cii/zugferd/zugferd.encoder.ts
@@ -0,0 +1,21 @@
+import { CIIBaseEncoder } from '../cii.encoder.js';
+import type { TInvoice } from '../../../interfaces/common.js';
+import { ZUGFERD_PROFILE_IDS } from './zugferd.types.js';
+
+/**
+ * Encoder for ZUGFeRD invoice format
+ */
+export class ZUGFeRDEncoder extends CIIBaseEncoder {
+  /**
+   * Creates ZUGFeRD XML from invoice data
+   * @param invoice Invoice data
+   * @returns ZUGFeRD XML string
+   */
+  public async createXml(invoice: TInvoice): Promise<string> {
+    // Set ZUGFeRD-specific profile ID
+    this.profileId = ZUGFERD_PROFILE_IDS.BASIC;
+    
+    // Use the base CII encoder to create the XML
+    return super.createXml(invoice);
+  }
+}
--- a/ts/formats/cii/zugferd/zugferd.types.ts
+++ b/ts/formats/cii/zugferd/zugferd.types.ts
@@ -0,0 +1,18 @@
+import { CIIProfile, CII_PROFILE_IDS } from '../cii.types.js';
+
+/**
+ * ZUGFeRD specific constants and types
+ */
+
+// ZUGFeRD profile IDs
+export const ZUGFERD_PROFILE_IDS = {
+  BASIC: CII_PROFILE_IDS.ZUGFERD_BASIC,
+  COMFORT: CII_PROFILE_IDS.ZUGFERD_COMFORT,
+  EXTENDED: CII_PROFILE_IDS.ZUGFERD_EXTENDED
+};
+
+// ZUGFeRD PDF attachment filename
+export const ZUGFERD_ATTACHMENT_FILENAME = 'zugferd-invoice.xml';
+
+// ZUGFeRD PDF attachment description
+export const ZUGFERD_ATTACHMENT_DESCRIPTION = 'ZUGFeRD XML Invoice';
--- a/ts/formats/cii/zugferd/zugferd.v1.decoder.ts
+++ b/ts/formats/cii/zugferd/zugferd.v1.decoder.ts
@@ -0,0 +1,234 @@
+import { CIIBaseDecoder } from '../cii.decoder.js';
+import type { TInvoice, TCreditNote, TDebitNote } from '../../../interfaces/common.js';
+import { ZUGFERD_V1_NAMESPACES } from '../cii.types.js';
+import { business, finance, general } from '@tsclass/tsclass';
+
+/**
+ * Decoder for ZUGFeRD v1 invoice format
+ */
+export class ZUGFeRDV1Decoder extends CIIBaseDecoder {
+  /**
+   * Constructor
+   * @param xml XML string to decode
+   */
+  constructor(xml: string) {
+    super(xml);
+    // Override namespaces for ZUGFeRD v1
+    this.namespaces = {
+      rsm: ZUGFERD_V1_NAMESPACES.RSM,
+      ram: ZUGFERD_V1_NAMESPACES.RAM,
+      udt: ZUGFERD_V1_NAMESPACES.UDT
+    };
+  }
+
+  /**
+   * Decodes a ZUGFeRD v1 credit note
+   * @returns Promise resolving to a TCreditNote object
+   */
+  protected async decodeCreditNote(): Promise<TCreditNote> {
+    // Get common invoice data
+    const commonData = await this.extractCommonData();
+
+    // Create a credit note with the common data
+    return {
+      ...commonData,
+      invoiceType: 'creditnote'
+    } as TCreditNote;
+  }
+
+  /**
+   * Decodes a ZUGFeRD v1 debit note (invoice)
+   * @returns Promise resolving to a TDebitNote object
+   */
+  protected async decodeDebitNote(): Promise<TDebitNote> {
+    // Get common invoice data
+    const commonData = await this.extractCommonData();
+
+    // Create a debit note with the common data
+    return {
+      ...commonData,
+      invoiceType: 'debitnote'
+    } as TDebitNote;
+  }
+
+  /**
+   * Extracts common invoice data from ZUGFeRD v1 XML
+   * @returns Common invoice data
+   */
+  private async extractCommonData(): Promise<Partial<TInvoice>> {
+    // Extract invoice ID
+    const invoiceId = this.getText('//ram:ID');
+
+    // Extract issue date
+    const issueDateStr = this.getText('//ram:IssueDateTime/udt:DateTimeString');
+    const issueDate = issueDateStr ? new Date(issueDateStr).getTime() : Date.now();
+
+    // Extract seller information
+    const seller = this.extractParty('//ram:SellerTradeParty');
+
+    // Extract buyer information
+    const buyer = this.extractParty('//ram:BuyerTradeParty');
+
+    // Extract items
+    const items = this.extractItems();
+
+    // Extract due date
+    const dueDateStr = this.getText('//ram:SpecifiedTradePaymentTerms/ram:DueDateDateTime/udt:DateTimeString');
+    const dueDate = dueDateStr ? new Date(dueDateStr).getTime() : Date.now();
+    const dueInDays = Math.round((dueDate - issueDate) / (1000 * 60 * 60 * 24));
+
+    // Extract currency
+    const currencyCode = this.getText('//ram:InvoiceCurrencyCode') || 'EUR';
+
+    // Extract total amount
+    const totalAmount = this.getNumber('//ram:GrandTotalAmount');
+
+    // Extract notes
+    const notes = this.extractNotes();
+
+    // Check for reverse charge
+    const reverseCharge = this.exists('//ram:SpecifiedTradeAllowanceCharge/ram:ReasonCode[text()="62"]');
+
+    // Create the common invoice data
+    return {
+      type: 'invoice',
+      id: invoiceId,
+      date: issueDate,
+      status: 'invoice',
+      versionInfo: {
+        type: 'final',
+        version: '1.0.0'
+      },
+      language: 'en',
+      incidenceId: invoiceId,
+      from: seller,
+      to: buyer,
+      subject: `Invoice ${invoiceId}`,
+      items: items,
+      dueInDays: dueInDays,
+      reverseCharge: reverseCharge,
+      currency: currencyCode as finance.TCurrency,
+      notes: notes,
+      deliveryDate: issueDate,
+      objectActions: [],
+      invoiceType: 'debitnote' // Default to debit note, will be overridden in decode methods
+    };
+  }
+
+  /**
+   * Extracts party information from ZUGFeRD v1 XML
+   * @param partyXPath XPath to the party node
+   * @returns Party information as TContact
+   */
+  private extractParty(partyXPath: string): business.TContact {
+    // Extract name
+    const name = this.getText(`${partyXPath}/ram:Name`);
+
+    // Extract address
+    const street = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:LineOne`);
+    const city = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:CityName`);
+    const zip = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:PostcodeCode`);
+    const country = this.getText(`${partyXPath}/ram:PostalTradeAddress/ram:CountryID`);
+
+    // Create address object
+    const address = {
+      street: street,
+      city: city,
+      zip: zip,
+      country: country
+    };
+
+    // Extract VAT ID
+    const vatId = this.getText(`${partyXPath}/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="VA"]`) || '';
+
+    // Extract registration ID
+    const registrationId = this.getText(`${partyXPath}/ram:SpecifiedTaxRegistration/ram:ID[@schemeID="FC"]`) || '';
+
+    // Create contact object
+    return {
+      type: 'company',
+      name: name,
+      description: '',
+      address: address,
+      status: 'active',
+      foundedDate: this.createDefaultDate(),
+      registrationDetails: {
+        vatId: vatId,
+        registrationId: registrationId,
+        registrationName: ''
+      }
+    } as business.TContact;
+  }
+
+  /**
+   * Extracts invoice items from ZUGFeRD v1 XML
+   * @returns Array of invoice items
+   */
+  private extractItems(): finance.TInvoiceItem[] {
+    const items: finance.TInvoiceItem[] = [];
+
+    // Get all item nodes
+    const itemNodes = this.select('//ram:IncludedSupplyChainTradeLineItem', this.doc);
+
+    // Process each item
+    if (Array.isArray(itemNodes)) {
+      for (let i = 0; i < itemNodes.length; i++) {
+        const itemNode = itemNodes[i];
+
+        // Extract item data
+        const name = this.getText('ram:SpecifiedTradeProduct/ram:Name', itemNode);
+        const articleNumber = this.getText('ram:SpecifiedTradeProduct/ram:SellerAssignedID', itemNode);
+        const unitQuantity = this.getNumber('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity', itemNode);
+        const unitType = this.getText('ram:SpecifiedLineTradeDelivery/ram:BilledQuantity/@unitCode', itemNode) || 'EA';
+        const unitNetPrice = this.getNumber('ram:SpecifiedLineTradeAgreement/ram:NetPriceProductTradePrice/ram:ChargeAmount', itemNode);
+        const vatPercentage = this.getNumber('ram:SpecifiedLineTradeSettlement/ram:ApplicableTradeTax/ram:RateApplicablePercent', itemNode);
+
+        // Create item object
+        items.push({
+          position: i + 1,
+          name: name,
+          articleNumber: articleNumber,
+          unitType: unitType,
+          unitQuantity: unitQuantity,
+          unitNetPrice: unitNetPrice,
+          vatPercentage: vatPercentage
+        });
+      }
+    }
+
+    return items;
+  }
+
+  /**
+   * Extracts notes from ZUGFeRD v1 XML
+   * @returns Array of notes
+   */
+  private extractNotes(): string[] {
+    const notes: string[] = [];
+
+    // Get all note nodes
+    const noteNodes = this.select('//ram:IncludedNote', this.doc);
+
+    // Process each note
+    if (Array.isArray(noteNodes)) {
+      for (let i = 0; i < noteNodes.length; i++) {
+        const noteNode = noteNodes[i];
+        const noteText = this.getText('ram:Content', noteNode);
+
+        if (noteText) {
+          notes.push(noteText);
+        }
+      }
+    }
+
+    return notes;
+  }
+
+  /**
+   * Creates a default date for empty date fields
+   * @returns Default date as timestamp
+   */
+  private createDefaultDate(): number {
+    return new Date('2000-01-01').getTime();
+  }
+}
--- a/ts/formats/cii/zugferd/zugferd.validator.ts
+++ b/ts/formats/cii/zugferd/zugferd.validator.ts
@@ -0,0 +1,18 @@
+import { CIIBaseValidator } from '../cii.validator.js';
+import { ValidationLevel } from '../../../interfaces/common.js';
+import type { ValidationResult } from '../../../interfaces/common.js';
+
+/**
+ * Validator for ZUGFeRD invoice format
+ */
+export class ZUGFeRDValidator extends CIIBaseValidator {
+  /**
+   * Validates ZUGFeRD XML against business rules
+   * @returns True if business validation passed
+   */
+  protected validateBusinessRules(): boolean {
+    // Implement ZUGFeRD-specific business rules
+    // For now, we'll just use the base CII validation
+    return true;
+  }
+}
--- a/ts/formats/factories/decoder.factory.ts
+++ b/ts/formats/factories/decoder.factory.ts
@@ -5,7 +5,8 @@ import { FormatDetector } from '../utils/format.detector.js';
 // Import specific decoders
 import { XRechnungDecoder } from '../ubl/xrechnung/xrechnung.decoder.js';
 import { FacturXDecoder } from '../cii/facturx/facturx.decoder.js';
-// import { ZUGFeRDDecoder } from '../cii/zugferd/zugferd.decoder.js';
+import { ZUGFeRDDecoder } from '../cii/zugferd/zugferd.decoder.js';
+import { ZUGFeRDV1Decoder } from '../cii/zugferd/zugferd.v1.decoder.js';

 /**
 * Factory to create the appropriate decoder based on the XML format
@@ -29,8 +30,12 @@ export class DecoderFactory {
        return new FacturXDecoder(xml);

      case InvoiceFormat.ZUGFERD:
-        // For now, use Factur-X decoder for ZUGFeRD
-        return new FacturXDecoder(xml);
+        // Determine if it's ZUGFeRD v1 or v2 based on root element
+        if (xml.includes('CrossIndustryDocument')) {
+          return new ZUGFeRDV1Decoder(xml);
+        } else {
+          return new ZUGFeRDDecoder(xml);
+        }

      case InvoiceFormat.FACTURX:
        return new FacturXDecoder(xml);
--- a/ts/formats/factories/encoder.factory.ts
+++ b/ts/formats/factories/encoder.factory.ts
@@ -5,7 +5,7 @@ import type { ExportFormat } from '../../interfaces/common.js';
 // Import specific encoders
 import { XRechnungEncoder } from '../ubl/xrechnung/xrechnung.encoder.js';
 import { FacturXEncoder } from '../cii/facturx/facturx.encoder.js';
-// import { ZUGFeRDEncoder } from '../cii/zugferd/zugferd.encoder.js';
+import { ZUGFeRDEncoder } from '../cii/zugferd/zugferd.encoder.js';

 /**
 * Factory to create the appropriate encoder based on the target format
@@ -33,8 +33,8 @@ export class EncoderFactory {

      case InvoiceFormat.ZUGFERD:
      case 'zugferd':
-        // For now, use Factur-X encoder for ZUGFeRD
-        return new FacturXEncoder();
+        // Use dedicated ZUGFeRD encoder
+        return new ZUGFeRDEncoder();

      case InvoiceFormat.FACTURX:
      case 'facturx':
--- a/ts/formats/factories/validator.factory.ts
+++ b/ts/formats/factories/validator.factory.ts
@@ -6,7 +6,7 @@ import { FormatDetector } from '../utils/format.detector.js';
 // import { UBLValidator } from '../ubl/ubl.validator.js';
 // import { XRechnungValidator } from '../ubl/xrechnung/xrechnung.validator.js';
 import { FacturXValidator } from '../cii/facturx/facturx.validator.js';
-// import { ZUGFeRDValidator } from '../cii/zugferd/zugferd.validator.js';
+import { ZUGFeRDValidator } from '../cii/zugferd/zugferd.validator.js';

 /**
 * Factory to create the appropriate validator based on the XML format
@@ -34,8 +34,8 @@ export class ValidatorFactory {
        return new FacturXValidator(xml);

      case InvoiceFormat.ZUGFERD:
-        // For now, use Factur-X validator for ZUGFeRD
-        return new FacturXValidator(xml);
+        // Use dedicated ZUGFeRD validator
+        return new ZUGFeRDValidator(xml);

      case InvoiceFormat.FACTURX:
        return new FacturXValidator(xml);
--- a/ts/formats/pdf/extractors/associated.extractor.ts
+++ b/ts/formats/pdf/extractors/associated.extractor.ts
@@ -0,0 +1,78 @@
+import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib';
+import { BaseXMLExtractor } from './base.extractor.js';
+
+/**
+ * Associated files extractor for PDF/A-3 documents
+ * Extracts XML from associated files (AF entry in the catalog)
+ * Particularly useful for ZUGFeRD v1 and some Factur-X documents
+ */
+export class AssociatedFilesExtractor extends BaseXMLExtractor {
+  /**
+   * Extract XML from a PDF buffer using associated files
+   * @param pdfBuffer PDF buffer
+   * @returns XML content or null if not found
+   */
+  public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
+    try {
+      const pdfDoc = await PDFDocument.load(pdfBuffer);
+      
+      // Try to find associated files via the AF entry in the catalog
+      const afArray = pdfDoc.catalog.lookup(PDFName.of('AF'));
+      if (!(afArray instanceof PDFArray)) {
+        console.warn('No AF (Associated Files) entry found in PDF catalog');
+        return null;
+      }
+      
+      // Process each associated file
+      for (let i = 0; i < afArray.size(); i++) {
+        const fileSpec = afArray.lookup(i);
+        if (!(fileSpec instanceof PDFDict)) {
+          continue;
+        }
+        
+        // Get the file name
+        const fileNameObj = fileSpec.lookup(PDFName.of('F')) || fileSpec.lookup(PDFName.of('UF'));
+        if (!(fileNameObj instanceof PDFString)) {
+          continue;
+        }
+        
+        const fileName = fileNameObj.decodeText();
+        
+        // Check if it's a known invoice XML file name
+        const isKnownFileName = this.knownFileNames.some(
+          knownName => fileName.toLowerCase() === knownName.toLowerCase()
+        );
+        
+        // Check if it's any XML file or has invoice-related keywords
+        const isXmlFile = fileName.toLowerCase().endsWith('.xml') || 
+                          fileName.toLowerCase().includes('zugferd') ||
+                          fileName.toLowerCase().includes('factur-x') ||
+                          fileName.toLowerCase().includes('xrechnung') ||
+                          fileName.toLowerCase().includes('invoice');
+        
+        if (isKnownFileName || isXmlFile) {
+          // Get the embedded file dictionary
+          const efDict = fileSpec.lookup(PDFName.of('EF'));
+          if (!(efDict instanceof PDFDict)) {
+            continue;
+          }
+          
+          // Get the file stream
+          const fileStream = efDict.lookup(PDFName.of('F'));
+          if (fileStream instanceof PDFRawStream) {
+            const xmlContent = await this.extractXmlFromStream(fileStream, fileName);
+            if (xmlContent) {
+              return xmlContent;
+            }
+          }
+        }
+      }
+      
+      console.warn('No valid XML found in associated files');
+      return null;
+    } catch (error) {
+      console.error('Error in associated files extraction:', error);
+      return null;
+    }
+  }
+}
--- a/ts/formats/pdf/extractors/base.extractor.ts
+++ b/ts/formats/pdf/extractors/base.extractor.ts
@@ -0,0 +1,177 @@
+import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib';
+import * as pako from 'pako';
+
+/**
+ * Base class for PDF XML extractors with common functionality
+ */
+export abstract class BaseXMLExtractor {
+  /**
+   * Known XML file names for different invoice formats
+   */
+  protected readonly knownFileNames = [
+    'factur-x.xml',
+    'zugferd-invoice.xml',
+    'ZUGFeRD-invoice.xml',
+    'xrechnung.xml'
+  ];
+
+  /**
+   * Known XML formats to validate extracted content
+   */
+  protected readonly knownFormats = [
+    'CrossIndustryInvoice',
+    'CrossIndustryDocument',
+    'Invoice',
+    'CreditNote',
+    'ubl:Invoice',
+    'ubl:CreditNote',
+    'rsm:CrossIndustryInvoice',
+    'rsm:CrossIndustryDocument',
+    'ram:CrossIndustryDocument',
+    'urn:un:unece:uncefact',
+    'urn:ferd:CrossIndustryDocument',
+    'urn:zugferd',
+    'urn:factur-x',
+    'factur-x.eu',
+    'ZUGFeRD'
+  ];
+
+  /**
+   * Known XML end tags for extracting content from strings
+   */
+  protected readonly knownEndTags = [
+    '</CrossIndustryInvoice>',
+    '</CrossIndustryDocument>',
+    '</Invoice>',
+    '</CreditNote>',
+    '</rsm:CrossIndustryInvoice>',
+    '</rsm:CrossIndustryDocument>',
+    '</ram:CrossIndustryDocument>',
+    '</ubl:Invoice>',
+    '</ubl:CreditNote>'
+  ];
+
+  /**
+   * Extract XML from a PDF buffer
+   * @param pdfBuffer PDF buffer
+   * @returns XML content or null if not found
+   */
+  public abstract extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null>;
+
+  /**
+   * Check if an XML string is valid
+   * @param xmlString XML string to check
+   * @returns True if the XML is valid
+   */
+  protected isValidXml(xmlString: string): boolean {
+    try {
+      // Basic checks for XML validity
+      if (!xmlString || typeof xmlString !== 'string') {
+        return false;
+      }
+
+      // Check if it starts with XML declaration
+      if (!xmlString.includes('<?xml')) {
+        return false;
+      }
+
+      // Check if the XML string contains known invoice formats
+      const hasKnownFormat = this.knownFormats.some(format => xmlString.includes(format));
+      if (!hasKnownFormat) {
+        return false;
+      }
+
+      // Check if the XML string contains binary data or invalid characters
+      const invalidChars = ['\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005'];
+      const hasBinaryData = invalidChars.some(char => xmlString.includes(char));
+      if (hasBinaryData) {
+        return false;
+      }
+
+      // Check if the XML string is too short
+      if (xmlString.length < 100) {
+        return false;
+      }
+
+      return true;
+    } catch (error) {
+      console.error('Error validating XML:', error);
+      return false;
+    }
+  }
+
+  /**
+   * Extract XML from a string
+   * @param text Text to extract XML from
+   * @param startIndex Index to start extraction from
+   * @returns XML content or null if not found
+   */
+  protected extractXmlFromString(text: string, startIndex: number = 0): string | null {
+    try {
+      // Find the start of the XML document
+      const xmlStartIndex = text.indexOf('<?xml', startIndex);
+      if (xmlStartIndex === -1) {
+        return null;
+      }
+
+      // Try to find the end of the XML document
+      let xmlEndIndex = -1;
+      for (const endTag of this.knownEndTags) {
+        const endIndex = text.indexOf(endTag, xmlStartIndex);
+        if (endIndex !== -1) {
+          xmlEndIndex = endIndex + endTag.length;
+          break;
+        }
+      }
+
+      if (xmlEndIndex === -1) {
+        return null;
+      }
+
+      // Extract the XML content
+      return text.substring(xmlStartIndex, xmlEndIndex);
+    } catch (error) {
+      console.error('Error extracting XML from string:', error);
+      return null;
+    }
+  }
+
+  /**
+   * Decompress and decode XML content from a PDF stream
+   * @param stream PDF stream containing XML data
+   * @param fileName Name of the file (for logging)
+   * @returns XML content or null if not valid
+   */
+  protected async extractXmlFromStream(stream: PDFRawStream, fileName: string): Promise<string | null> {
+    try {
+      // Try to decompress with pako
+      const compressedBytes = stream.getContents().buffer;
+      try {
+        const decompressedBytes = pako.inflate(compressedBytes);
+        const xmlContent = new TextDecoder('utf-8').decode(decompressedBytes);
+        
+        if (this.isValidXml(xmlContent)) {
+          console.log(`Successfully extracted decompressed XML from PDF file. File name: ${fileName}`);
+          return xmlContent;
+        }
+      } catch (decompressError) {
+        // Decompression failed, try without decompression
+        console.log(`Decompression failed for ${fileName}, trying without decompression...`);
+      }
+      
+      // Try without decompression
+      const rawBytes = stream.getContents();
+      const rawContent = new TextDecoder('utf-8').decode(rawBytes);
+      
+      if (this.isValidXml(rawContent)) {
+        console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${fileName}`);
+        return rawContent;
+      }
+      
+      return null;
+    } catch (error) {
+      console.error('Error extracting XML from stream:', error);
+      return null;
+    }
+  }
+}
--- a/ts/formats/pdf/extractors/index.ts
+++ b/ts/formats/pdf/extractors/index.ts
@@ -0,0 +1,4 @@
+export * from './base.extractor.js';
+export * from './standard.extractor.js';
+export * from './associated.extractor.js';
+export * from './text.extractor.js';
--- a/ts/formats/pdf/extractors/standard.extractor.ts
+++ b/ts/formats/pdf/extractors/standard.extractor.ts
@@ -0,0 +1,86 @@
+import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib';
+import { BaseXMLExtractor } from './base.extractor.js';
+
+/**
+ * Standard PDF XML extractor that extracts XML from embedded files
+ * Works with PDF/A-3 documents that follow the standard for embedding files
+ */
+export class StandardXMLExtractor extends BaseXMLExtractor {
+  /**
+   * Extract XML from a PDF buffer using standard PDF/A-3 embedded files
+   * @param pdfBuffer PDF buffer
+   * @returns XML content or null if not found
+   */
+  public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
+    try {
+      const pdfDoc = await PDFDocument.load(pdfBuffer);
+
+      // Get the document's metadata dictionary
+      const namesDictObj = pdfDoc.catalog.lookup(PDFName.of('Names'));
+      if (!(namesDictObj instanceof PDFDict)) {
+        console.warn('No Names dictionary found in PDF! This PDF does not contain embedded files.');
+        return null;
+      }
+
+      // Get the embedded files dictionary
+      const embeddedFilesDictObj = namesDictObj.lookup(PDFName.of('EmbeddedFiles'));
+      if (!(embeddedFilesDictObj instanceof PDFDict)) {
+        console.warn('No EmbeddedFiles dictionary found! This PDF does not contain embedded files.');
+        return null;
+      }
+
+      // Get the names array
+      const filesSpecObj = embeddedFilesDictObj.lookup(PDFName.of('Names'));
+      if (!(filesSpecObj instanceof PDFArray)) {
+        console.warn('No files specified in EmbeddedFiles dictionary!');
+        return null;
+      }
+
+      // Try to find an XML file in the embedded files
+      for (let i = 0; i < filesSpecObj.size(); i += 2) {
+        const fileNameObj = filesSpecObj.lookup(i);
+        const fileSpecObj = filesSpecObj.lookup(i + 1);
+
+        if (!(fileNameObj instanceof PDFString) || !(fileSpecObj instanceof PDFDict)) {
+          continue;
+        }
+
+        // Get the filename as string
+        const fileName = fileNameObj.decodeText();
+        
+        // Check if it's a known invoice XML file name
+        const isKnownFileName = this.knownFileNames.some(
+          knownName => fileName.toLowerCase() === knownName.toLowerCase()
+        );
+        
+        // Check if it's any XML file or has invoice-related keywords
+        const isXmlFile = fileName.toLowerCase().endsWith('.xml') || 
+                          fileName.toLowerCase().includes('zugferd') ||
+                          fileName.toLowerCase().includes('factur-x') ||
+                          fileName.toLowerCase().includes('xrechnung') ||
+                          fileName.toLowerCase().includes('invoice');
+        
+        if (isKnownFileName || isXmlFile) {
+          const efDictObj = fileSpecObj.lookup(PDFName.of('EF'));
+          if (!(efDictObj instanceof PDFDict)) {
+            continue;
+          }
+
+          const fileStream = efDictObj.lookup(PDFName.of('F'));
+          if (fileStream instanceof PDFRawStream) {
+            const xmlContent = await this.extractXmlFromStream(fileStream, fileName);
+            if (xmlContent) {
+              return xmlContent;
+            }
+          }
+        }
+      }
+
+      console.warn('No valid XML found in embedded files');
+      return null;
+    } catch (error) {
+      console.error('Error in standard extraction:', error);
+      return null;
+    }
+  }
+}
--- a/ts/formats/pdf/extractors/text.extractor.ts
+++ b/ts/formats/pdf/extractors/text.extractor.ts
@@ -0,0 +1,55 @@
+import { BaseXMLExtractor } from './base.extractor.js';
+
+/**
+ * Text-based XML extractor for PDF documents
+ * Extracts XML by searching for XML patterns in the PDF text
+ * Used as a fallback when other extraction methods fail
+ */
+export class TextXMLExtractor extends BaseXMLExtractor {
+  /**
+   * Extract XML from a PDF buffer by searching for XML patterns in the text
+   * @param pdfBuffer PDF buffer
+   * @returns XML content or null if not found
+   */
+  public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
+    try {
+      // Convert buffer to string and look for XML patterns
+      // Increase the search range to handle larger PDFs
+      const pdfString = Buffer.from(pdfBuffer).toString('utf8', 0, Math.min(pdfBuffer.length, 50000));
+
+      // Look for common XML patterns in the PDF
+      const xmlPatterns = [
+        /<\?xml[^>]*\?>/i,
+        /<CrossIndustryInvoice[^>]*>/i,
+        /<CrossIndustryDocument[^>]*>/i,
+        /<Invoice[^>]*>/i,
+        /<CreditNote[^>]*>/i,
+        /<rsm:CrossIndustryInvoice[^>]*>/i,
+        /<rsm:CrossIndustryDocument[^>]*>/i,
+        /<ram:CrossIndustryDocument[^>]*>/i,
+        /<ubl:Invoice[^>]*>/i,
+        /<ubl:CreditNote[^>]*>/i
+      ];
+
+      for (const pattern of xmlPatterns) {
+        const match = pdfString.match(pattern);
+        if (match && match.index !== undefined) {
+          console.log(`Found XML pattern in PDF: ${match[0]}`);
+          
+          // Try to extract the XML content
+          const xmlContent = this.extractXmlFromString(pdfString, match.index);
+          if (xmlContent && this.isValidXml(xmlContent)) {
+            console.log('Successfully extracted XML from PDF text');
+            return xmlContent;
+          }
+        }
+      }
+
+      console.warn('No valid XML found in PDF text');
+      return null;
+    } catch (error) {
+      console.error('Error in text-based extraction:', error);
+      return null;
+    }
+  }
+}
--- a/ts/formats/pdf/pdf.extractor.ts
+++ b/ts/formats/pdf/pdf.extractor.ts
@@ -1,30 +1,54 @@
-import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib';
-import * as pako from 'pako';
+import {
+  BaseXMLExtractor,
+  StandardXMLExtractor,
+  AssociatedFilesExtractor,
+  TextXMLExtractor
+} from './extractors/index.js';

 /**
- * Class for extracting XML from PDF files
+ * Main PDF extractor class that orchestrates the extraction process
+ * Uses multiple specialized extractors in sequence to maximize success rate
 */
 export class PDFExtractor {
+  private extractors: BaseXMLExtractor[] = [];
+
  /**
-   * Extracts XML from a PDF buffer
+   * Constructor initializes the chain of extractors
+   */
+  constructor() {
+    // Add extractors in order of preference/likelihood of success
+    this.extractors.push(
+      new StandardXMLExtractor(),    // Standard PDF/A-3 embedded files
+      new AssociatedFilesExtractor(), // Associated files (ZUGFeRD v1, some Factur-X)
+      new TextXMLExtractor()          // Text-based extraction (fallback)
+    );
+  }
+
+  /**
+   * Extract XML from a PDF buffer
+   * Tries multiple extraction methods in sequence
   * @param pdfBuffer PDF buffer
   * @returns XML content or null if not found
   */
  public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
    try {
-      // First try the standard extraction
-      const standardXml = await this.standardExtraction(pdfBuffer);
-      if (standardXml && this.isValidXml(standardXml)) {
-        return standardXml;
+      console.log('Starting XML extraction from PDF...');
+
+      // Try each extractor in sequence
+      for (const extractor of this.extractors) {
+        const extractorName = extractor.constructor.name;
+        console.log(`Trying extraction with ${extractorName}...`);
+
+        const xml = await extractor.extractXml(pdfBuffer);
+        if (xml) {
+          console.log(`Successfully extracted XML using ${extractorName}`);
+          return xml;
+        }
+
+        console.log(`Extraction with ${extractorName} failed, trying next method...`);
      }

-      // If standard extraction fails, try alternative methods
-      const alternativeXml = await this.alternativeExtraction(pdfBuffer);
-      if (alternativeXml && this.isValidXml(alternativeXml)) {
-        return alternativeXml;
-      }
-
-      // If all extraction methods fail, return null
+      // If all extractors fail, return null
      console.warn('All extraction methods failed, no valid XML found in PDF');
      return null;
    } catch (error) {
@@ -33,255 +57,7 @@ export class PDFExtractor {
    }
  }

-  /**
-   * Standard extraction method using PDF-lib
-   * @param pdfBuffer PDF buffer
-   * @returns XML content or null if not found
-   */
-  private async standardExtraction(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
-    try {
-      const pdfDoc = await PDFDocument.load(pdfBuffer);

-      // Get the document's metadata dictionary
-      const namesDictObj = pdfDoc.catalog.lookup(PDFName.of('Names'));
-      if (!(namesDictObj instanceof PDFDict)) {
-        console.warn('No Names dictionary found in PDF! This PDF does not contain embedded files.');
-        return null;
-      }

-      const embeddedFilesDictObj = namesDictObj.lookup(PDFName.of('EmbeddedFiles'));
-      if (!(embeddedFilesDictObj instanceof PDFDict)) {
-        console.warn('No EmbeddedFiles dictionary found! This PDF does not contain embedded files.');
-        return null;
-      }

-      const filesSpecObj = embeddedFilesDictObj.lookup(PDFName.of('Names'));
-      if (!(filesSpecObj instanceof PDFArray)) {
-        console.warn('No files specified in EmbeddedFiles dictionary!');
-        return null;
-      }
-
-      // Try to find an XML file in the embedded files
-      let xmlFile: PDFRawStream | undefined;
-      let xmlFileName: string | undefined;
-
-      for (let i = 0; i < filesSpecObj.size(); i += 2) {
-        const fileNameObj = filesSpecObj.lookup(i);
-        const fileSpecObj = filesSpecObj.lookup(i + 1);
-
-        if (!(fileNameObj instanceof PDFString)) {
-          continue;
-        }
-        if (!(fileSpecObj instanceof PDFDict)) {
-          continue;
-        }
-
-        // Get the filename as string
-        const fileName = fileNameObj.toString();
-
-        // Check if it's an XML file (checking both extension and known standard filenames)
-        if (fileName.toLowerCase().includes('.xml') ||
-            fileName.toLowerCase().includes('factur-x') ||
-            fileName.toLowerCase().includes('zugferd') ||
-            fileName.toLowerCase().includes('xrechnung')) {
-
-          const efDictObj = fileSpecObj.lookup(PDFName.of('EF'));
-          if (!(efDictObj instanceof PDFDict)) {
-            continue;
-          }
-
-          const maybeStream = efDictObj.lookup(PDFName.of('F'));
-          if (maybeStream instanceof PDFRawStream) {
-            // Found an XML file - save it
-            xmlFile = maybeStream;
-            xmlFileName = fileName;
-            break;
-          }
-        }
-      }
-
-      // If no XML file was found, return null
-      if (!xmlFile) {
-        console.warn('No embedded XML file found in the PDF!');
-        return null;
-      }
-
-      // Decompress and decode the XML content
-      try {
-        // Try to decompress with pako
-        const xmlCompressedBytes = xmlFile.getContents().buffer;
-        const xmlBytes = pako.inflate(xmlCompressedBytes);
-        const xmlContent = new TextDecoder('utf-8').decode(xmlBytes);
-
-        // Check if the XML content is valid
-        if (this.isValidXml(xmlContent)) {
-          console.log(`Successfully extracted XML from PDF file. File name: ${xmlFileName}`);
-          return xmlContent;
-        }
-
-        // If we get here, the XML content is not valid, try without decompression
-        console.log('Decompression succeeded but XML is not valid, trying without decompression...');
-        const rawXmlBytes = xmlFile.getContents();
-        const rawXmlContent = new TextDecoder('utf-8').decode(rawXmlBytes);
-
-        if (this.isValidXml(rawXmlContent)) {
-          console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${xmlFileName}`);
-          return rawXmlContent;
-        }
-
-        // If we get here, neither the decompressed nor the raw XML content is valid
-        console.log('Neither decompressed nor raw XML content is valid');
-        return null;
-      } catch (decompressError) {
-        // Decompression failed, try without decompression
-        console.log('Decompression failed, trying without decompression...');
-        try {
-          const xmlBytes = xmlFile.getContents();
-          const xmlContent = new TextDecoder('utf-8').decode(xmlBytes);
-
-          if (this.isValidXml(xmlContent)) {
-            console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${xmlFileName}`);
-            return xmlContent;
-          }
-
-          // If we get here, the XML content is not valid
-          console.log('Uncompressed XML content is not valid');
-          return null;
-        } catch (decodeError) {
-          console.error('Error decoding XML content:', decodeError);
-          return null;
-        }
-      }
-    } catch (error) {
-      console.error('Error in standard extraction:', error);
-      return null;
-    }
-  }
-
-  /**
-   * Alternative extraction method using string search
-   * @param pdfBuffer PDF buffer
-   * @returns XML content or null if not found
-   */
-  private async alternativeExtraction(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
-    try {
-      // Convert buffer to string and look for XML patterns
-      const pdfString = Buffer.from(pdfBuffer).toString('utf8', 0, Math.min(pdfBuffer.length, 10000));
-
-      // Look for common XML patterns in the PDF
-      const xmlPatterns = [
-        /<\?xml[^>]*\?>/i,
-        /<CrossIndustryInvoice[^>]*>/i,
-        /<Invoice[^>]*>/i,
-        /<CreditNote[^>]*>/i,
-        /<rsm:CrossIndustryInvoice[^>]*>/i
-      ];
-
-      for (const pattern of xmlPatterns) {
-        const match = pdfString.match(pattern);
-        if (match) {
-          console.log(`Found XML pattern in PDF: ${match[0]}`);
-
-          // Try to extract the XML content
-          const xmlContent = this.extractXmlFromString(pdfString);
-          if (xmlContent) {
-            console.log('Successfully extracted XML from PDF string');
-            return xmlContent;
-          }
-        }
-      }
-
-      return null;
-    } catch (error) {
-      console.error('Error in alternative extraction:', error);
-      return null;
-    }
-  }
-
-  /**
-   * Extracts XML from a string
-   * @param pdfString PDF string
-   * @returns XML content or null if not found
-   */
-  private extractXmlFromString(pdfString: string): string | null {
-    try {
-      // Look for XML start and end tags
-      const xmlStartIndex = pdfString.indexOf('<?xml');
-      if (xmlStartIndex === -1) {
-        return null;
-      }
-
-      // Try to find the end of the XML document
-      const possibleEndTags = [
-        '</CrossIndustryInvoice>',
-        '</Invoice>',
-        '</CreditNote>',
-        '</rsm:CrossIndustryInvoice>'
-      ];
-
-      let xmlEndIndex = -1;
-      for (const endTag of possibleEndTags) {
-        const endIndex = pdfString.indexOf(endTag);
-        if (endIndex !== -1) {
-          xmlEndIndex = endIndex + endTag.length;
-          break;
-        }
-      }
-
-      if (xmlEndIndex === -1) {
-        return null;
-      }
-
-      // Extract the XML content
-      return pdfString.substring(xmlStartIndex, xmlEndIndex);
-    } catch (error) {
-      console.error('Error extracting XML from string:', error);
-      return null;
-    }
-  }
-
-  /**
-   * Checks if an XML string is valid
-   * @param xmlString XML string to check
-   * @returns True if the XML is valid
-   */
-  private isValidXml(xmlString: string): boolean {
-    try {
-      // Check if the XML string contains basic XML structure
-      if (!xmlString.includes('<?xml')) {
-        return false;
-      }
-
-      // Check if the XML string contains known invoice formats
-      const knownFormats = [
-        'CrossIndustryInvoice',
-        'Invoice',
-        'CreditNote',
-        'ubl:Invoice',
-        'ubl:CreditNote'
-      ];
-
-      const hasKnownFormat = knownFormats.some(format => xmlString.includes(format));
-      if (!hasKnownFormat) {
-        return false;
-      }
-
-      // Check if the XML string contains binary data or invalid characters
-      const invalidChars = ['\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005'];
-      const hasBinaryData = invalidChars.some(char => xmlString.includes(char));
-      if (hasBinaryData) {
-        return false;
-      }
-
-      // Check if the XML string is too short
-      if (xmlString.length < 100) {
-        return false;
-      }
-
-      return true;
-    } catch (error) {
-      console.error('Error validating XML:', error);
-      return false;
-    }
-  }
 }
--- a/ts/formats/utils/format.detector.ts
+++ b/ts/formats/utils/format.detector.ts
@@ -1,5 +1,7 @@
 import { InvoiceFormat } from '../../interfaces/common.js';
 import { DOMParser } from 'xmldom';
+import * as xpath from 'xpath';
+import { CII_PROFILE_IDS, ZUGFERD_V1_NAMESPACES } from '../cii/cii.types.js';

 /**
 * Utility class for detecting invoice formats
@@ -26,11 +28,91 @@ export class FormatDetector {
        return InvoiceFormat.XRECHNUNG;
      }

-      // Factur-X/ZUGFeRD detection (CrossIndustryInvoice root element)
+      // Factur-X/ZUGFeRD detection (CrossIndustryInvoice or CrossIndustryDocument root element)
      if (root.nodeName === 'rsm:CrossIndustryInvoice' || root.nodeName === 'CrossIndustryInvoice') {
-        // For simplicity, we'll treat all CII documents as Factur-X for now
-        // In a real implementation, we would check for specific profiles
-        return InvoiceFormat.FACTURX;
+        // Set up namespaces for XPath queries (ZUGFeRD v2/Factur-X)
+        const namespaces = {
+          rsm: 'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100',
+          ram: 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100'
+        };
+
+        // Create XPath selector with namespaces
+        const select = xpath.useNamespaces(namespaces);
+
+        // Look for profile identifier
+        const profileNode = select(
+          'string(//rsm:ExchangedDocumentContext/ram:GuidelineSpecifiedDocumentContextParameter/ram:ID)',
+          doc
+        );
+
+        if (profileNode) {
+          const profileText = profileNode.toString();
+
+          // Check for ZUGFeRD profiles
+          if (profileText.includes('zugferd') ||
+              profileText === CII_PROFILE_IDS.ZUGFERD_BASIC ||
+              profileText === CII_PROFILE_IDS.ZUGFERD_COMFORT ||
+              profileText === CII_PROFILE_IDS.ZUGFERD_EXTENDED) {
+            return InvoiceFormat.ZUGFERD;
+          }
+
+          // Check for Factur-X profiles
+          if (profileText.includes('factur-x') ||
+              profileText === CII_PROFILE_IDS.FACTURX_MINIMUM ||
+              profileText === CII_PROFILE_IDS.FACTURX_BASIC ||
+              profileText === CII_PROFILE_IDS.FACTURX_EN16931) {
+            return InvoiceFormat.FACTURX;
+          }
+        }
+
+        // If we can't determine the specific CII format, default to generic CII
+        return InvoiceFormat.CII;
+      }
+
+      // ZUGFeRD v1 detection (CrossIndustryDocument root element)
+      if (root.nodeName === 'rsm:CrossIndustryDocument' || root.nodeName === 'CrossIndustryDocument' ||
+          root.nodeName === 'ram:CrossIndustryDocument') {
+
+        // Check for ZUGFeRD v1 namespace in the document
+        const xmlString = xml.toString();
+        if (xmlString.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') ||
+            xmlString.includes('urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12')) {
+          return InvoiceFormat.ZUGFERD;
+        }
+
+        // Set up namespaces for XPath queries (ZUGFeRD v1)
+        try {
+          const namespaces = {
+            rsm: ZUGFERD_V1_NAMESPACES.RSM,
+            ram: ZUGFERD_V1_NAMESPACES.RAM
+          };
+
+          // Create XPath selector with namespaces
+          const select = xpath.useNamespaces(namespaces);
+
+          // Look for profile identifier
+          const profileNode = select(
+            'string(//rsm:SpecifiedExchangedDocumentContext/ram:GuidelineSpecifiedDocumentContextParameter/ram:ID)',
+            doc
+          );
+
+          if (profileNode) {
+            const profileText = profileNode.toString();
+
+            // Check for ZUGFeRD v1 profiles
+            if (profileText.includes('ferd:CrossIndustryDocument:invoice:1p0') ||
+                profileText === CII_PROFILE_IDS.ZUGFERD_V1_BASIC ||
+                profileText === CII_PROFILE_IDS.ZUGFERD_V1_COMFORT ||
+                profileText === CII_PROFILE_IDS.ZUGFERD_V1_EXTENDED) {
+              return InvoiceFormat.ZUGFERD;
+            }
+          }
+        } catch (error) {
+          console.log('Error in ZUGFeRD v1 XPath detection:', error);
+        }
+
+        // If we can't determine the specific profile but it's a CrossIndustryDocument, it's likely ZUGFeRD v1
+        return InvoiceFormat.ZUGFERD;
      }

      // FatturaPA detection would be implemented here