2025-04-03 15:53:08 +00:00
|
|
|
import { InvoiceFormat } from '../../interfaces/common.js';
|
2025-04-03 21:07:21 +00:00
|
|
|
import { DOMParser, xpath } from '../../plugins.js';
|
2025-04-03 20:08:02 +00:00
|
|
|
import { CII_PROFILE_IDS, ZUGFERD_V1_NAMESPACES } from '../cii/cii.types.js';
|
2025-03-17 16:49:49 +00:00
|
|
|
|
|
|
|
|
/**
|
2025-04-03 15:53:08 +00:00
|
|
|
* Utility class for detecting invoice formats
|
2025-03-17 16:49:49 +00:00
|
|
|
*/
|
2025-04-03 15:53:08 +00:00
|
|
|
export class FormatDetector {
|
2025-03-17 16:49:49 +00:00
|
|
|
/**
|
2025-04-03 15:53:08 +00:00
|
|
|
* Detects the format of an XML document
|
2025-03-17 16:49:49 +00:00
|
|
|
* @param xml XML content to analyze
|
|
|
|
|
* @returns Detected invoice format
|
|
|
|
|
*/
|
2025-04-03 15:53:08 +00:00
|
|
|
public static detectFormat(xml: string): InvoiceFormat {
|
2025-03-17 16:49:49 +00:00
|
|
|
try {
|
2025-04-04 12:14:41 +00:00
|
|
|
// Quick check for empty or invalid XML
|
|
|
|
|
if (!xml || typeof xml !== 'string' || xml.trim().length === 0) {
|
|
|
|
|
return InvoiceFormat.UNKNOWN;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Quick string-based pre-checks for performance
|
|
|
|
|
const quickCheck = FormatDetector.quickFormatCheck(xml);
|
|
|
|
|
if (quickCheck !== InvoiceFormat.UNKNOWN) {
|
|
|
|
|
return quickCheck;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// More thorough parsing-based checks
|
2025-03-17 16:49:49 +00:00
|
|
|
const doc = new DOMParser().parseFromString(xml, 'application/xml');
|
|
|
|
|
const root = doc.documentElement;
|
2025-04-03 16:41:10 +00:00
|
|
|
|
2025-03-17 16:49:49 +00:00
|
|
|
if (!root) {
|
|
|
|
|
return InvoiceFormat.UNKNOWN;
|
|
|
|
|
}
|
2025-04-03 16:41:10 +00:00
|
|
|
|
2025-03-17 16:49:49 +00:00
|
|
|
// UBL detection (Invoice or CreditNote root element)
|
2025-04-04 12:14:41 +00:00
|
|
|
if (FormatDetector.isUBLFormat(root)) {
|
|
|
|
|
// Check for XRechnung customization
|
|
|
|
|
if (FormatDetector.isXRechnungFormat(doc)) {
|
|
|
|
|
return InvoiceFormat.XRECHNUNG;
|
2025-04-03 20:08:02 +00:00
|
|
|
}
|
2025-04-04 12:14:41 +00:00
|
|
|
return InvoiceFormat.UBL;
|
|
|
|
|
}
|
2025-04-03 20:08:02 +00:00
|
|
|
|
2025-04-04 12:14:41 +00:00
|
|
|
// Factur-X/ZUGFeRD detection (CrossIndustryInvoice root element)
|
|
|
|
|
if (FormatDetector.isCIIFormat(root)) {
|
|
|
|
|
return FormatDetector.detectCIIFormat(doc, xml);
|
2025-04-03 20:08:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ZUGFeRD v1 detection (CrossIndustryDocument root element)
|
2025-04-04 12:14:41 +00:00
|
|
|
if (FormatDetector.isZUGFeRDV1Format(root)) {
|
2025-04-03 20:08:02 +00:00
|
|
|
return InvoiceFormat.ZUGFERD;
|
2025-03-17 16:49:49 +00:00
|
|
|
}
|
2025-04-03 16:41:10 +00:00
|
|
|
|
2025-04-04 12:14:41 +00:00
|
|
|
// FatturaPA detection
|
|
|
|
|
if (FormatDetector.isFatturaPAFormat(root)) {
|
2025-04-03 15:53:08 +00:00
|
|
|
return InvoiceFormat.FATTURAPA;
|
|
|
|
|
}
|
2025-04-03 16:41:10 +00:00
|
|
|
|
2025-03-17 16:49:49 +00:00
|
|
|
return InvoiceFormat.UNKNOWN;
|
|
|
|
|
} catch (error) {
|
2025-04-03 15:53:08 +00:00
|
|
|
console.error('Error detecting format:', error);
|
2025-03-17 16:49:49 +00:00
|
|
|
return InvoiceFormat.UNKNOWN;
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-04-04 12:14:41 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Performs a quick format check based on string content
|
|
|
|
|
* This is faster than full XML parsing for obvious cases
|
|
|
|
|
* @param xml XML string
|
|
|
|
|
* @returns Detected format or UNKNOWN if more analysis is needed
|
|
|
|
|
*/
|
|
|
|
|
private static quickFormatCheck(xml: string): InvoiceFormat {
|
2026-04-16 20:30:56 +00:00
|
|
|
// Only scan a small prefix so large payloads do not create another full-size string copy.
|
|
|
|
|
const sample = xml.slice(0, 65536);
|
|
|
|
|
|
|
|
|
|
// Root-element checks avoid a DOM parse for the common invoice formats.
|
|
|
|
|
if (/<(?:[A-Za-z_][\w.-]*:)?(?:Invoice|CreditNote)\b/.test(sample)) {
|
|
|
|
|
const customizationIdMatch = sample.match(
|
|
|
|
|
/<[^>]*CustomizationID[^>]*>\s*([^<]+?)\s*<\/[^>]*CustomizationID>/i,
|
|
|
|
|
);
|
|
|
|
|
const customizationId = customizationIdMatch?.[1] ?? '';
|
|
|
|
|
|
|
|
|
|
if (/xrechnung/i.test(customizationId) || /urn:xoev-de:kosit:standard:xrechnung/i.test(customizationId)) {
|
|
|
|
|
return InvoiceFormat.XRECHNUNG;
|
|
|
|
|
}
|
|
|
|
|
return InvoiceFormat.UBL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (/<(?:[A-Za-z_][\w.-]*:)?CrossIndustryInvoice\b/.test(sample)) {
|
|
|
|
|
const guidelineIdMatch = sample.match(
|
|
|
|
|
/<[^>]*GuidelineSpecifiedDocumentContextParameter[^>]*>[\s\S]*?<[^>]*ID[^>]*>\s*([^<]+?)\s*<\/[^>]*ID>/i,
|
|
|
|
|
);
|
|
|
|
|
const guidelineId = guidelineIdMatch?.[1] ?? '';
|
|
|
|
|
|
|
|
|
|
if (/xrechnung/i.test(guidelineId)) {
|
|
|
|
|
return InvoiceFormat.XRECHNUNG;
|
|
|
|
|
}
|
|
|
|
|
if (/factur-x/i.test(guidelineId) || /urn:cen\.eu:en16931:2017/i.test(guidelineId)) {
|
|
|
|
|
return InvoiceFormat.FACTURX;
|
|
|
|
|
}
|
|
|
|
|
if (/zugferd/i.test(guidelineId) || /urn:ferd:/i.test(guidelineId) || /urn:zugferd/i.test(guidelineId)) {
|
|
|
|
|
return InvoiceFormat.ZUGFERD;
|
|
|
|
|
}
|
|
|
|
|
return InvoiceFormat.CII;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (/<(?:[A-Za-z_][\w.-]*:)?CrossIndustryDocument\b/.test(sample)) {
|
|
|
|
|
return InvoiceFormat.ZUGFERD;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (/<FatturaElettronica\b/.test(sample)) {
|
|
|
|
|
return InvoiceFormat.FATTURAPA;
|
|
|
|
|
}
|
2025-04-04 12:14:41 +00:00
|
|
|
|
|
|
|
|
// Check for obvious Factur-X indicators
|
|
|
|
|
if (
|
2026-04-16 20:30:56 +00:00
|
|
|
/factur-x\.eu/i.test(sample) ||
|
|
|
|
|
/factur-x\.xml/i.test(sample) ||
|
|
|
|
|
/factur-x:/i.test(sample) ||
|
|
|
|
|
(/urn:cen\.eu:en16931:2017/i.test(sample) && /factur-x/i.test(sample))
|
2025-04-04 12:14:41 +00:00
|
|
|
) {
|
|
|
|
|
return InvoiceFormat.FACTURX;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check for obvious ZUGFeRD indicators
|
|
|
|
|
if (
|
2026-04-16 20:30:56 +00:00
|
|
|
/zugferd:/i.test(sample) ||
|
|
|
|
|
/zugferd-invoice\.xml/i.test(sample) ||
|
|
|
|
|
/urn:ferd:/i.test(sample) ||
|
|
|
|
|
/urn:zugferd/i.test(sample)
|
2025-04-04 12:14:41 +00:00
|
|
|
) {
|
|
|
|
|
return InvoiceFormat.ZUGFERD;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check for obvious XRechnung indicators
|
|
|
|
|
if (
|
2026-04-16 20:30:56 +00:00
|
|
|
/xrechnung/i.test(sample) ||
|
|
|
|
|
/urn:xoev-de:kosit:standard:xrechnung/i.test(sample)
|
2025-04-04 12:14:41 +00:00
|
|
|
) {
|
|
|
|
|
return InvoiceFormat.XRECHNUNG;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check for obvious FatturaPA indicators
|
|
|
|
|
if (
|
2026-04-16 20:30:56 +00:00
|
|
|
/fatturapa/i.test(sample) ||
|
|
|
|
|
/fattura elettronica/i.test(sample) ||
|
|
|
|
|
/fatturaelettronica/i.test(sample)
|
2025-04-04 12:14:41 +00:00
|
|
|
) {
|
|
|
|
|
return InvoiceFormat.FATTURAPA;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Need more analysis
|
|
|
|
|
return InvoiceFormat.UNKNOWN;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Checks if the document is a UBL format
|
|
|
|
|
* @param root Root element
|
|
|
|
|
* @returns True if it's a UBL format
|
|
|
|
|
*/
|
|
|
|
|
private static isUBLFormat(root: Element): boolean {
|
|
|
|
|
return (
|
|
|
|
|
root.nodeName === 'Invoice' ||
|
|
|
|
|
root.nodeName === 'CreditNote' ||
|
|
|
|
|
root.nodeName === 'ubl:Invoice' ||
|
|
|
|
|
root.nodeName === 'ubl:CreditNote' ||
|
|
|
|
|
root.nodeName.endsWith(':Invoice') ||
|
|
|
|
|
root.nodeName.endsWith(':CreditNote')
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Checks if the document is an XRechnung format
|
|
|
|
|
* @param doc XML document
|
|
|
|
|
* @returns True if it's an XRechnung format
|
|
|
|
|
*/
|
|
|
|
|
private static isXRechnungFormat(doc: Document): boolean {
|
|
|
|
|
try {
|
|
|
|
|
// Set up namespaces for XPath queries
|
|
|
|
|
const namespaces = {
|
|
|
|
|
'cbc': 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2',
|
|
|
|
|
'ubl': 'urn:oasis:names:specification:ubl:schema:xsd:Invoice-2'
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Create XPath selector with namespaces
|
|
|
|
|
const select = xpath.useNamespaces(namespaces);
|
|
|
|
|
|
|
|
|
|
// Use getElementsByTagName directly for more reliable results
|
|
|
|
|
const customizationNodes = doc.getElementsByTagName('cbc:CustomizationID');
|
|
|
|
|
|
|
|
|
|
// Check if any CustomizationID node contains "xrechnung"
|
|
|
|
|
for (let i = 0; i < customizationNodes.length; i++) {
|
|
|
|
|
const node = customizationNodes[i];
|
|
|
|
|
if (node.textContent && node.textContent.includes('xrechnung')) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
} catch (error) {
|
|
|
|
|
console.warn('Error checking for XRechnung format:', error);
|
|
|
|
|
// If direct DOM access fails, try a string-based approach
|
|
|
|
|
const xmlStr = new XMLSerializer().serializeToString(doc);
|
|
|
|
|
return xmlStr.includes('xrechnung') || xmlStr.includes('XRechnung');
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Checks if the document is a CII format (Factur-X/ZUGFeRD v2+)
|
|
|
|
|
* @param root Root element
|
|
|
|
|
* @returns True if it's a CII format
|
|
|
|
|
*/
|
|
|
|
|
private static isCIIFormat(root: Element): boolean {
|
|
|
|
|
return (
|
|
|
|
|
root.nodeName === 'rsm:CrossIndustryInvoice' ||
|
|
|
|
|
root.nodeName === 'CrossIndustryInvoice' ||
|
|
|
|
|
root.nodeName.endsWith(':CrossIndustryInvoice')
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Checks if the document is a ZUGFeRD v1 format
|
|
|
|
|
* @param root Root element
|
|
|
|
|
* @returns True if it's a ZUGFeRD v1 format
|
|
|
|
|
*/
|
|
|
|
|
private static isZUGFeRDV1Format(root: Element): boolean {
|
|
|
|
|
return (
|
|
|
|
|
root.nodeName === 'rsm:CrossIndustryDocument' ||
|
|
|
|
|
root.nodeName === 'CrossIndustryDocument' ||
|
|
|
|
|
root.nodeName === 'ram:CrossIndustryDocument' ||
|
|
|
|
|
root.nodeName.endsWith(':CrossIndustryDocument')
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Checks if the document is a FatturaPA format
|
|
|
|
|
* @param root Root element
|
|
|
|
|
* @returns True if it's a FatturaPA format
|
|
|
|
|
*/
|
|
|
|
|
private static isFatturaPAFormat(root: Element): boolean {
|
2026-04-16 20:30:56 +00:00
|
|
|
const xmlns = root.getAttribute('xmlns') || '';
|
|
|
|
|
return root.nodeName === 'FatturaElettronica' || xmlns.includes('fatturapa.gov.it');
|
2025-04-04 12:14:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Detects the specific CII format (Factur-X vs ZUGFeRD)
|
|
|
|
|
* @param doc XML document
|
|
|
|
|
* @param xml Original XML string for fallback checks
|
|
|
|
|
* @returns Detected format
|
|
|
|
|
*/
|
|
|
|
|
private static detectCIIFormat(doc: Document, xml: string): InvoiceFormat {
|
|
|
|
|
try {
|
|
|
|
|
// Use direct DOM traversal instead of XPath for more reliable behavior
|
|
|
|
|
const contextNodes = doc.getElementsByTagNameNS(
|
|
|
|
|
'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100',
|
|
|
|
|
'ExchangedDocumentContext'
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
if (contextNodes.length === 0) {
|
|
|
|
|
// Try without namespace
|
|
|
|
|
const noNsContextNodes = doc.getElementsByTagName('ExchangedDocumentContext');
|
|
|
|
|
if (noNsContextNodes.length === 0) {
|
|
|
|
|
// Fallback to string-based detection
|
|
|
|
|
return FormatDetector.detectCIIFormatFromString(xml);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Loop through all potential context nodes
|
|
|
|
|
const allContextNodes = [...Array.from(contextNodes), ...Array.from(doc.getElementsByTagName('ExchangedDocumentContext'))];
|
|
|
|
|
|
|
|
|
|
for (const contextNode of allContextNodes) {
|
|
|
|
|
// Find guideline parameter
|
|
|
|
|
const guidelineNodes = contextNode.getElementsByTagName('ram:GuidelineSpecifiedDocumentContextParameter');
|
|
|
|
|
|
|
|
|
|
if (guidelineNodes.length === 0) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const guidelineNode of Array.from(guidelineNodes)) {
|
|
|
|
|
// Find ID element
|
|
|
|
|
const idNodes = guidelineNode.getElementsByTagName('ram:ID');
|
|
|
|
|
|
|
|
|
|
if (idNodes.length === 0) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const idNode of Array.from(idNodes)) {
|
|
|
|
|
const profileText = idNode.textContent || '';
|
|
|
|
|
|
2025-05-30 18:18:42 +00:00
|
|
|
// Check for ZUGFeRD profiles (v1 and v2)
|
2025-04-04 12:14:41 +00:00
|
|
|
if (
|
|
|
|
|
profileText.includes('zugferd') ||
|
2025-05-30 18:18:42 +00:00
|
|
|
profileText.includes('urn:ferd:') ||
|
2025-04-04 12:14:41 +00:00
|
|
|
profileText === CII_PROFILE_IDS.ZUGFERD_BASIC ||
|
|
|
|
|
profileText === CII_PROFILE_IDS.ZUGFERD_COMFORT ||
|
2025-05-30 18:18:42 +00:00
|
|
|
profileText === CII_PROFILE_IDS.ZUGFERD_EXTENDED ||
|
|
|
|
|
profileText === CII_PROFILE_IDS.ZUGFERD_V1_BASIC ||
|
|
|
|
|
profileText === CII_PROFILE_IDS.ZUGFERD_V1_COMFORT ||
|
|
|
|
|
profileText === CII_PROFILE_IDS.ZUGFERD_V1_EXTENDED
|
2025-04-04 12:14:41 +00:00
|
|
|
) {
|
|
|
|
|
return InvoiceFormat.ZUGFERD;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check for Factur-X profiles
|
|
|
|
|
if (
|
|
|
|
|
profileText.includes('factur-x') ||
|
|
|
|
|
profileText === CII_PROFILE_IDS.FACTURX_MINIMUM ||
|
|
|
|
|
profileText === CII_PROFILE_IDS.FACTURX_BASIC ||
|
|
|
|
|
profileText === CII_PROFILE_IDS.FACTURX_EN16931
|
|
|
|
|
) {
|
|
|
|
|
return InvoiceFormat.FACTURX;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If we reach here, fall back to string checking
|
|
|
|
|
return FormatDetector.detectCIIFormatFromString(xml);
|
|
|
|
|
} catch (error) {
|
|
|
|
|
console.warn('Error detecting CII format, falling back to generic CII:', error);
|
|
|
|
|
return FormatDetector.detectCIIFormatFromString(xml);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Fallback method to detect CII format from string content
|
|
|
|
|
* @param xml XML string
|
|
|
|
|
* @returns Detected format
|
|
|
|
|
*/
|
|
|
|
|
private static detectCIIFormatFromString(xml: string): InvoiceFormat {
|
|
|
|
|
// Check for Factur-X indicators
|
|
|
|
|
if (xml.includes('factur-x') || xml.includes('Factur-X')) {
|
|
|
|
|
return InvoiceFormat.FACTURX;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check for ZUGFeRD indicators
|
|
|
|
|
if (xml.includes('zugferd') || xml.includes('ZUGFeRD')) {
|
|
|
|
|
return InvoiceFormat.ZUGFERD;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Generic CII if we can't determine more specifically
|
|
|
|
|
return InvoiceFormat.CII;
|
|
|
|
|
}
|
2026-04-16 20:30:56 +00:00
|
|
|
}
|