import { InvoiceFormat } from '../../interfaces/common.js'; import { DOMParser, xpath } from '../../plugins.js'; import { CII_PROFILE_IDS, ZUGFERD_V1_NAMESPACES } from '../cii/cii.types.js'; /** * Utility class for detecting invoice formats */ export class FormatDetector { /** * Detects the format of an XML document * @param xml XML content to analyze * @returns Detected invoice format */ public static detectFormat(xml: string): InvoiceFormat { try { // Quick check for empty or invalid XML if (!xml || typeof xml !== 'string' || xml.trim().length === 0) { return InvoiceFormat.UNKNOWN; } // Quick string-based pre-checks for performance const quickCheck = FormatDetector.quickFormatCheck(xml); if (quickCheck !== InvoiceFormat.UNKNOWN) { return quickCheck; } // More thorough parsing-based checks const doc = new DOMParser().parseFromString(xml, 'application/xml'); const root = doc.documentElement; if (!root) { return InvoiceFormat.UNKNOWN; } // UBL detection (Invoice or CreditNote root element) if (FormatDetector.isUBLFormat(root)) { // Check for XRechnung customization if (FormatDetector.isXRechnungFormat(doc)) { return InvoiceFormat.XRECHNUNG; } return InvoiceFormat.UBL; } // Factur-X/ZUGFeRD detection (CrossIndustryInvoice root element) if (FormatDetector.isCIIFormat(root)) { return FormatDetector.detectCIIFormat(doc, xml); } // ZUGFeRD v1 detection (CrossIndustryDocument root element) if (FormatDetector.isZUGFeRDV1Format(root)) { return InvoiceFormat.ZUGFERD; } // FatturaPA detection if (FormatDetector.isFatturaPAFormat(root)) { return InvoiceFormat.FATTURAPA; } return InvoiceFormat.UNKNOWN; } catch (error) { console.error('Error detecting format:', error); return InvoiceFormat.UNKNOWN; } } /** * Performs a quick format check based on string content * This is faster than full XML parsing for obvious cases * @param xml XML string * @returns Detected format or UNKNOWN if more analysis is needed */ private static quickFormatCheck(xml: string): InvoiceFormat { // Only scan a small prefix so large payloads do not create another full-size string copy. const sample = xml.slice(0, 65536); // Root-element checks avoid a DOM parse for the common invoice formats. if (/<(?:[A-Za-z_][\w.-]*:)?(?:Invoice|CreditNote)\b/.test(sample)) { const customizationIdMatch = sample.match( /<[^>]*CustomizationID[^>]*>\s*([^<]+?)\s*<\/[^>]*CustomizationID>/i, ); const customizationId = customizationIdMatch?.[1] ?? ''; if (/xrechnung/i.test(customizationId) || /urn:xoev-de:kosit:standard:xrechnung/i.test(customizationId)) { return InvoiceFormat.XRECHNUNG; } return InvoiceFormat.UBL; } if (/<(?:[A-Za-z_][\w.-]*:)?CrossIndustryInvoice\b/.test(sample)) { const guidelineIdMatch = sample.match( /<[^>]*GuidelineSpecifiedDocumentContextParameter[^>]*>[\s\S]*?<[^>]*ID[^>]*>\s*([^<]+?)\s*<\/[^>]*ID>/i, ); const guidelineId = guidelineIdMatch?.[1] ?? ''; if (/xrechnung/i.test(guidelineId)) { return InvoiceFormat.XRECHNUNG; } if (/factur-x/i.test(guidelineId) || /urn:cen\.eu:en16931:2017/i.test(guidelineId)) { return InvoiceFormat.FACTURX; } if (/zugferd/i.test(guidelineId) || /urn:ferd:/i.test(guidelineId) || /urn:zugferd/i.test(guidelineId)) { return InvoiceFormat.ZUGFERD; } return InvoiceFormat.CII; } if (/<(?:[A-Za-z_][\w.-]*:)?CrossIndustryDocument\b/.test(sample)) { return InvoiceFormat.ZUGFERD; } if (/