import { InvoiceFormat } from '../../interfaces/common.js'; import { DOMParser, xpath } from '../../plugins.js'; import { CII_PROFILE_IDS, ZUGFERD_V1_NAMESPACES } from '../cii/cii.types.js'; /** * Utility class for detecting invoice formats */ export class FormatDetector { /** * Detects the format of an XML document * @param xml XML content to analyze * @returns Detected invoice format */ public static detectFormat(xml: string): InvoiceFormat { try { // Quick check for empty or invalid XML if (!xml || typeof xml !== 'string' || xml.trim().length === 0) { return InvoiceFormat.UNKNOWN; } // Quick string-based pre-checks for performance const quickCheck = FormatDetector.quickFormatCheck(xml); if (quickCheck !== InvoiceFormat.UNKNOWN) { return quickCheck; } // More thorough parsing-based checks const doc = new DOMParser().parseFromString(xml, 'application/xml'); const root = doc.documentElement; if (!root) { return InvoiceFormat.UNKNOWN; } // UBL detection (Invoice or CreditNote root element) if (FormatDetector.isUBLFormat(root)) { // Check for XRechnung customization if (FormatDetector.isXRechnungFormat(doc)) { return InvoiceFormat.XRECHNUNG; } return InvoiceFormat.UBL; } // Factur-X/ZUGFeRD detection (CrossIndustryInvoice root element) if (FormatDetector.isCIIFormat(root)) { return FormatDetector.detectCIIFormat(doc, xml); } // ZUGFeRD v1 detection (CrossIndustryDocument root element) if (FormatDetector.isZUGFeRDV1Format(root)) { return InvoiceFormat.ZUGFERD; } // FatturaPA detection if (FormatDetector.isFatturaPAFormat(root)) { return InvoiceFormat.FATTURAPA; } return InvoiceFormat.UNKNOWN; } catch (error) { console.error('Error detecting format:', error); return InvoiceFormat.UNKNOWN; } } /** * Performs a quick format check based on string content * This is faster than full XML parsing for obvious cases * @param xml XML string * @returns Detected format or UNKNOWN if more analysis is needed */ private static quickFormatCheck(xml: string): InvoiceFormat { const lowerXml = xml.toLowerCase(); // Check for obvious Factur-X indicators if ( lowerXml.includes('factur-x.eu') || lowerXml.includes('factur-x.xml') || lowerXml.includes('factur-x:') || lowerXml.includes('urn:cen.eu:en16931:2017') && lowerXml.includes('factur-x') ) { return InvoiceFormat.FACTURX; } // Check for obvious ZUGFeRD indicators if ( lowerXml.includes('zugferd:') || lowerXml.includes('zugferd-invoice.xml') || lowerXml.includes('urn:ferd:') || lowerXml.includes('urn:zugferd') ) { return InvoiceFormat.ZUGFERD; } // Check for obvious XRechnung indicators if ( lowerXml.includes('xrechnung') || lowerXml.includes('urn:xoev-de:kosit:standard:xrechnung') ) { return InvoiceFormat.XRECHNUNG; } // Check for obvious FatturaPA indicators if ( lowerXml.includes('fatturapa') || lowerXml.includes('fattura elettronica') || lowerXml.includes('fatturaelettronica') ) { return InvoiceFormat.FATTURAPA; } // Need more analysis return InvoiceFormat.UNKNOWN; } /** * Checks if the document is a UBL format * @param root Root element * @returns True if it's a UBL format */ private static isUBLFormat(root: Element): boolean { return ( root.nodeName === 'Invoice' || root.nodeName === 'CreditNote' || root.nodeName === 'ubl:Invoice' || root.nodeName === 'ubl:CreditNote' || root.nodeName.endsWith(':Invoice') || root.nodeName.endsWith(':CreditNote') ); } /** * Checks if the document is an XRechnung format * @param doc XML document * @returns True if it's an XRechnung format */ private static isXRechnungFormat(doc: Document): boolean { try { // Set up namespaces for XPath queries const namespaces = { 'cbc': 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2', 'ubl': 'urn:oasis:names:specification:ubl:schema:xsd:Invoice-2' }; // Create XPath selector with namespaces const select = xpath.useNamespaces(namespaces); // Use getElementsByTagName directly for more reliable results const customizationNodes = doc.getElementsByTagName('cbc:CustomizationID'); // Check if any CustomizationID node contains "xrechnung" for (let i = 0; i < customizationNodes.length; i++) { const node = customizationNodes[i]; if (node.textContent && node.textContent.includes('xrechnung')) { return true; } } return false; } catch (error) { console.warn('Error checking for XRechnung format:', error); // If direct DOM access fails, try a string-based approach const xmlStr = new XMLSerializer().serializeToString(doc); return xmlStr.includes('xrechnung') || xmlStr.includes('XRechnung'); } } /** * Checks if the document is a CII format (Factur-X/ZUGFeRD v2+) * @param root Root element * @returns True if it's a CII format */ private static isCIIFormat(root: Element): boolean { return ( root.nodeName === 'rsm:CrossIndustryInvoice' || root.nodeName === 'CrossIndustryInvoice' || root.nodeName.endsWith(':CrossIndustryInvoice') ); } /** * Checks if the document is a ZUGFeRD v1 format * @param root Root element * @returns True if it's a ZUGFeRD v1 format */ private static isZUGFeRDV1Format(root: Element): boolean { return ( root.nodeName === 'rsm:CrossIndustryDocument' || root.nodeName === 'CrossIndustryDocument' || root.nodeName === 'ram:CrossIndustryDocument' || root.nodeName.endsWith(':CrossIndustryDocument') ); } /** * Checks if the document is a FatturaPA format * @param root Root element * @returns True if it's a FatturaPA format */ private static isFatturaPAFormat(root: Element): boolean { return ( root.nodeName === 'FatturaElettronica' || (root.getAttribute('xmlns') && root.getAttribute('xmlns')!.includes('fatturapa.gov.it')) ); } /** * Detects the specific CII format (Factur-X vs ZUGFeRD) * @param doc XML document * @param xml Original XML string for fallback checks * @returns Detected format */ private static detectCIIFormat(doc: Document, xml: string): InvoiceFormat { try { // Use direct DOM traversal instead of XPath for more reliable behavior const contextNodes = doc.getElementsByTagNameNS( 'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100', 'ExchangedDocumentContext' ); if (contextNodes.length === 0) { // Try without namespace const noNsContextNodes = doc.getElementsByTagName('ExchangedDocumentContext'); if (noNsContextNodes.length === 0) { // Fallback to string-based detection return FormatDetector.detectCIIFormatFromString(xml); } } // Loop through all potential context nodes const allContextNodes = [...Array.from(contextNodes), ...Array.from(doc.getElementsByTagName('ExchangedDocumentContext'))]; for (const contextNode of allContextNodes) { // Find guideline parameter const guidelineNodes = contextNode.getElementsByTagName('ram:GuidelineSpecifiedDocumentContextParameter'); if (guidelineNodes.length === 0) { continue; } for (const guidelineNode of Array.from(guidelineNodes)) { // Find ID element const idNodes = guidelineNode.getElementsByTagName('ram:ID'); if (idNodes.length === 0) { continue; } for (const idNode of Array.from(idNodes)) { const profileText = idNode.textContent || ''; // Check for ZUGFeRD profiles if ( profileText.includes('zugferd') || profileText === CII_PROFILE_IDS.ZUGFERD_BASIC || profileText === CII_PROFILE_IDS.ZUGFERD_COMFORT || profileText === CII_PROFILE_IDS.ZUGFERD_EXTENDED ) { return InvoiceFormat.ZUGFERD; } // Check for Factur-X profiles if ( profileText.includes('factur-x') || profileText === CII_PROFILE_IDS.FACTURX_MINIMUM || profileText === CII_PROFILE_IDS.FACTURX_BASIC || profileText === CII_PROFILE_IDS.FACTURX_EN16931 ) { return InvoiceFormat.FACTURX; } } } } // If we reach here, fall back to string checking return FormatDetector.detectCIIFormatFromString(xml); } catch (error) { console.warn('Error detecting CII format, falling back to generic CII:', error); return FormatDetector.detectCIIFormatFromString(xml); } } /** * Fallback method to detect CII format from string content * @param xml XML string * @returns Detected format */ private static detectCIIFormatFromString(xml: string): InvoiceFormat { // Check for Factur-X indicators if (xml.includes('factur-x') || xml.includes('Factur-X')) { return InvoiceFormat.FACTURX; } // Check for ZUGFeRD indicators if (xml.includes('zugferd') || xml.includes('ZUGFeRD')) { return InvoiceFormat.ZUGFERD; } // Generic CII if we can't determine more specifically return InvoiceFormat.CII; } }