2025-04-03 15:53:08 +00:00
|
|
|
import { InvoiceFormat } from '../../interfaces/common.js';
|
2025-04-03 21:07:21 +00:00
|
|
|
import { DOMParser, xpath } from '../../plugins.js';
|
2025-04-03 20:08:02 +00:00
|
|
|
import { CII_PROFILE_IDS, ZUGFERD_V1_NAMESPACES } from '../cii/cii.types.js';
|
2025-03-17 16:49:49 +00:00
|
|
|
|
|
|
|
/**
|
2025-04-03 15:53:08 +00:00
|
|
|
* Utility class for detecting invoice formats
|
2025-03-17 16:49:49 +00:00
|
|
|
*/
|
2025-04-03 15:53:08 +00:00
|
|
|
export class FormatDetector {
|
2025-03-17 16:49:49 +00:00
|
|
|
/**
|
2025-04-03 15:53:08 +00:00
|
|
|
* Detects the format of an XML document
|
2025-03-17 16:49:49 +00:00
|
|
|
* @param xml XML content to analyze
|
|
|
|
* @returns Detected invoice format
|
|
|
|
*/
|
2025-04-03 15:53:08 +00:00
|
|
|
public static detectFormat(xml: string): InvoiceFormat {
|
2025-03-17 16:49:49 +00:00
|
|
|
try {
|
2025-04-04 12:14:41 +00:00
|
|
|
// Quick check for empty or invalid XML
|
|
|
|
if (!xml || typeof xml !== 'string' || xml.trim().length === 0) {
|
|
|
|
return InvoiceFormat.UNKNOWN;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Quick string-based pre-checks for performance
|
|
|
|
const quickCheck = FormatDetector.quickFormatCheck(xml);
|
|
|
|
if (quickCheck !== InvoiceFormat.UNKNOWN) {
|
|
|
|
return quickCheck;
|
|
|
|
}
|
|
|
|
|
|
|
|
// More thorough parsing-based checks
|
2025-03-17 16:49:49 +00:00
|
|
|
const doc = new DOMParser().parseFromString(xml, 'application/xml');
|
|
|
|
const root = doc.documentElement;
|
2025-04-03 16:41:10 +00:00
|
|
|
|
2025-03-17 16:49:49 +00:00
|
|
|
if (!root) {
|
|
|
|
return InvoiceFormat.UNKNOWN;
|
|
|
|
}
|
2025-04-03 16:41:10 +00:00
|
|
|
|
2025-03-17 16:49:49 +00:00
|
|
|
// UBL detection (Invoice or CreditNote root element)
|
2025-04-04 12:14:41 +00:00
|
|
|
if (FormatDetector.isUBLFormat(root)) {
|
|
|
|
// Check for XRechnung customization
|
|
|
|
if (FormatDetector.isXRechnungFormat(doc)) {
|
|
|
|
return InvoiceFormat.XRECHNUNG;
|
2025-04-03 20:08:02 +00:00
|
|
|
}
|
2025-04-04 12:14:41 +00:00
|
|
|
return InvoiceFormat.UBL;
|
|
|
|
}
|
2025-04-03 20:08:02 +00:00
|
|
|
|
2025-04-04 12:14:41 +00:00
|
|
|
// Factur-X/ZUGFeRD detection (CrossIndustryInvoice root element)
|
|
|
|
if (FormatDetector.isCIIFormat(root)) {
|
|
|
|
return FormatDetector.detectCIIFormat(doc, xml);
|
2025-04-03 20:08:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// ZUGFeRD v1 detection (CrossIndustryDocument root element)
|
2025-04-04 12:14:41 +00:00
|
|
|
if (FormatDetector.isZUGFeRDV1Format(root)) {
|
2025-04-03 20:08:02 +00:00
|
|
|
return InvoiceFormat.ZUGFERD;
|
2025-03-17 16:49:49 +00:00
|
|
|
}
|
2025-04-03 16:41:10 +00:00
|
|
|
|
2025-04-04 12:14:41 +00:00
|
|
|
// FatturaPA detection
|
|
|
|
if (FormatDetector.isFatturaPAFormat(root)) {
|
2025-04-03 15:53:08 +00:00
|
|
|
return InvoiceFormat.FATTURAPA;
|
|
|
|
}
|
2025-04-03 16:41:10 +00:00
|
|
|
|
2025-03-17 16:49:49 +00:00
|
|
|
return InvoiceFormat.UNKNOWN;
|
|
|
|
} catch (error) {
|
2025-04-03 15:53:08 +00:00
|
|
|
console.error('Error detecting format:', error);
|
2025-03-17 16:49:49 +00:00
|
|
|
return InvoiceFormat.UNKNOWN;
|
|
|
|
}
|
|
|
|
}
|
2025-04-04 12:14:41 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Performs a quick format check based on string content
|
|
|
|
* This is faster than full XML parsing for obvious cases
|
|
|
|
* @param xml XML string
|
|
|
|
* @returns Detected format or UNKNOWN if more analysis is needed
|
|
|
|
*/
|
|
|
|
private static quickFormatCheck(xml: string): InvoiceFormat {
|
|
|
|
const lowerXml = xml.toLowerCase();
|
|
|
|
|
|
|
|
// Check for obvious Factur-X indicators
|
|
|
|
if (
|
|
|
|
lowerXml.includes('factur-x.eu') ||
|
|
|
|
lowerXml.includes('factur-x.xml') ||
|
|
|
|
lowerXml.includes('factur-x:') ||
|
|
|
|
lowerXml.includes('urn:cen.eu:en16931:2017') && lowerXml.includes('factur-x')
|
|
|
|
) {
|
|
|
|
return InvoiceFormat.FACTURX;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for obvious ZUGFeRD indicators
|
|
|
|
if (
|
|
|
|
lowerXml.includes('zugferd:') ||
|
|
|
|
lowerXml.includes('zugferd-invoice.xml') ||
|
|
|
|
lowerXml.includes('urn:ferd:') ||
|
|
|
|
lowerXml.includes('urn:zugferd')
|
|
|
|
) {
|
|
|
|
return InvoiceFormat.ZUGFERD;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for obvious XRechnung indicators
|
|
|
|
if (
|
|
|
|
lowerXml.includes('xrechnung') ||
|
|
|
|
lowerXml.includes('urn:xoev-de:kosit:standard:xrechnung')
|
|
|
|
) {
|
|
|
|
return InvoiceFormat.XRECHNUNG;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for obvious FatturaPA indicators
|
|
|
|
if (
|
|
|
|
lowerXml.includes('fatturapa') ||
|
|
|
|
lowerXml.includes('fattura elettronica') ||
|
|
|
|
lowerXml.includes('fatturaelettronica')
|
|
|
|
) {
|
|
|
|
return InvoiceFormat.FATTURAPA;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Need more analysis
|
|
|
|
return InvoiceFormat.UNKNOWN;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks if the document is a UBL format
|
|
|
|
* @param root Root element
|
|
|
|
* @returns True if it's a UBL format
|
|
|
|
*/
|
|
|
|
private static isUBLFormat(root: Element): boolean {
|
|
|
|
return (
|
|
|
|
root.nodeName === 'Invoice' ||
|
|
|
|
root.nodeName === 'CreditNote' ||
|
|
|
|
root.nodeName === 'ubl:Invoice' ||
|
|
|
|
root.nodeName === 'ubl:CreditNote' ||
|
|
|
|
root.nodeName.endsWith(':Invoice') ||
|
|
|
|
root.nodeName.endsWith(':CreditNote')
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks if the document is an XRechnung format
|
|
|
|
* @param doc XML document
|
|
|
|
* @returns True if it's an XRechnung format
|
|
|
|
*/
|
|
|
|
private static isXRechnungFormat(doc: Document): boolean {
|
|
|
|
try {
|
|
|
|
// Set up namespaces for XPath queries
|
|
|
|
const namespaces = {
|
|
|
|
'cbc': 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2',
|
|
|
|
'ubl': 'urn:oasis:names:specification:ubl:schema:xsd:Invoice-2'
|
|
|
|
};
|
|
|
|
|
|
|
|
// Create XPath selector with namespaces
|
|
|
|
const select = xpath.useNamespaces(namespaces);
|
|
|
|
|
|
|
|
// Use getElementsByTagName directly for more reliable results
|
|
|
|
const customizationNodes = doc.getElementsByTagName('cbc:CustomizationID');
|
|
|
|
|
|
|
|
// Check if any CustomizationID node contains "xrechnung"
|
|
|
|
for (let i = 0; i < customizationNodes.length; i++) {
|
|
|
|
const node = customizationNodes[i];
|
|
|
|
if (node.textContent && node.textContent.includes('xrechnung')) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
} catch (error) {
|
|
|
|
console.warn('Error checking for XRechnung format:', error);
|
|
|
|
// If direct DOM access fails, try a string-based approach
|
|
|
|
const xmlStr = new XMLSerializer().serializeToString(doc);
|
|
|
|
return xmlStr.includes('xrechnung') || xmlStr.includes('XRechnung');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks if the document is a CII format (Factur-X/ZUGFeRD v2+)
|
|
|
|
* @param root Root element
|
|
|
|
* @returns True if it's a CII format
|
|
|
|
*/
|
|
|
|
private static isCIIFormat(root: Element): boolean {
|
|
|
|
return (
|
|
|
|
root.nodeName === 'rsm:CrossIndustryInvoice' ||
|
|
|
|
root.nodeName === 'CrossIndustryInvoice' ||
|
|
|
|
root.nodeName.endsWith(':CrossIndustryInvoice')
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks if the document is a ZUGFeRD v1 format
|
|
|
|
* @param root Root element
|
|
|
|
* @returns True if it's a ZUGFeRD v1 format
|
|
|
|
*/
|
|
|
|
private static isZUGFeRDV1Format(root: Element): boolean {
|
|
|
|
return (
|
|
|
|
root.nodeName === 'rsm:CrossIndustryDocument' ||
|
|
|
|
root.nodeName === 'CrossIndustryDocument' ||
|
|
|
|
root.nodeName === 'ram:CrossIndustryDocument' ||
|
|
|
|
root.nodeName.endsWith(':CrossIndustryDocument')
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks if the document is a FatturaPA format
|
|
|
|
* @param root Root element
|
|
|
|
* @returns True if it's a FatturaPA format
|
|
|
|
*/
|
|
|
|
private static isFatturaPAFormat(root: Element): boolean {
|
|
|
|
return (
|
|
|
|
root.nodeName === 'FatturaElettronica' ||
|
|
|
|
(root.getAttribute('xmlns') && root.getAttribute('xmlns')!.includes('fatturapa.gov.it'))
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Detects the specific CII format (Factur-X vs ZUGFeRD)
|
|
|
|
* @param doc XML document
|
|
|
|
* @param xml Original XML string for fallback checks
|
|
|
|
* @returns Detected format
|
|
|
|
*/
|
|
|
|
private static detectCIIFormat(doc: Document, xml: string): InvoiceFormat {
|
|
|
|
try {
|
|
|
|
// Use direct DOM traversal instead of XPath for more reliable behavior
|
|
|
|
const contextNodes = doc.getElementsByTagNameNS(
|
|
|
|
'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100',
|
|
|
|
'ExchangedDocumentContext'
|
|
|
|
);
|
|
|
|
|
|
|
|
if (contextNodes.length === 0) {
|
|
|
|
// Try without namespace
|
|
|
|
const noNsContextNodes = doc.getElementsByTagName('ExchangedDocumentContext');
|
|
|
|
if (noNsContextNodes.length === 0) {
|
|
|
|
// Fallback to string-based detection
|
|
|
|
return FormatDetector.detectCIIFormatFromString(xml);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Loop through all potential context nodes
|
|
|
|
const allContextNodes = [...Array.from(contextNodes), ...Array.from(doc.getElementsByTagName('ExchangedDocumentContext'))];
|
|
|
|
|
|
|
|
for (const contextNode of allContextNodes) {
|
|
|
|
// Find guideline parameter
|
|
|
|
const guidelineNodes = contextNode.getElementsByTagName('ram:GuidelineSpecifiedDocumentContextParameter');
|
|
|
|
|
|
|
|
if (guidelineNodes.length === 0) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const guidelineNode of Array.from(guidelineNodes)) {
|
|
|
|
// Find ID element
|
|
|
|
const idNodes = guidelineNode.getElementsByTagName('ram:ID');
|
|
|
|
|
|
|
|
if (idNodes.length === 0) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const idNode of Array.from(idNodes)) {
|
|
|
|
const profileText = idNode.textContent || '';
|
|
|
|
|
|
|
|
// Check for ZUGFeRD profiles
|
|
|
|
if (
|
|
|
|
profileText.includes('zugferd') ||
|
|
|
|
profileText === CII_PROFILE_IDS.ZUGFERD_BASIC ||
|
|
|
|
profileText === CII_PROFILE_IDS.ZUGFERD_COMFORT ||
|
|
|
|
profileText === CII_PROFILE_IDS.ZUGFERD_EXTENDED
|
|
|
|
) {
|
|
|
|
return InvoiceFormat.ZUGFERD;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for Factur-X profiles
|
|
|
|
if (
|
|
|
|
profileText.includes('factur-x') ||
|
|
|
|
profileText === CII_PROFILE_IDS.FACTURX_MINIMUM ||
|
|
|
|
profileText === CII_PROFILE_IDS.FACTURX_BASIC ||
|
|
|
|
profileText === CII_PROFILE_IDS.FACTURX_EN16931
|
|
|
|
) {
|
|
|
|
return InvoiceFormat.FACTURX;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we reach here, fall back to string checking
|
|
|
|
return FormatDetector.detectCIIFormatFromString(xml);
|
|
|
|
} catch (error) {
|
|
|
|
console.warn('Error detecting CII format, falling back to generic CII:', error);
|
|
|
|
return FormatDetector.detectCIIFormatFromString(xml);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Fallback method to detect CII format from string content
|
|
|
|
* @param xml XML string
|
|
|
|
* @returns Detected format
|
|
|
|
*/
|
|
|
|
private static detectCIIFormatFromString(xml: string): InvoiceFormat {
|
|
|
|
// Check for Factur-X indicators
|
|
|
|
if (xml.includes('factur-x') || xml.includes('Factur-X')) {
|
|
|
|
return InvoiceFormat.FACTURX;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for ZUGFeRD indicators
|
|
|
|
if (xml.includes('zugferd') || xml.includes('ZUGFeRD')) {
|
|
|
|
return InvoiceFormat.ZUGFERD;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Generic CII if we can't determine more specifically
|
|
|
|
return InvoiceFormat.CII;
|
|
|
|
}
|
|
|
|
}
|