xinvoice/ts/formats/utils/format.detector.ts

302 lines
9.6 KiB
TypeScript
Raw Normal View History

2025-04-03 15:53:08 +00:00
import { InvoiceFormat } from '../../interfaces/common.js';
import { DOMParser, xpath } from '../../plugins.js';
import { CII_PROFILE_IDS, ZUGFERD_V1_NAMESPACES } from '../cii/cii.types.js';
2025-03-17 16:49:49 +00:00
/**
2025-04-03 15:53:08 +00:00
* Utility class for detecting invoice formats
2025-03-17 16:49:49 +00:00
*/
2025-04-03 15:53:08 +00:00
export class FormatDetector {
2025-03-17 16:49:49 +00:00
/**
2025-04-03 15:53:08 +00:00
* Detects the format of an XML document
2025-03-17 16:49:49 +00:00
* @param xml XML content to analyze
* @returns Detected invoice format
*/
2025-04-03 15:53:08 +00:00
public static detectFormat(xml: string): InvoiceFormat {
2025-03-17 16:49:49 +00:00
try {
// Quick check for empty or invalid XML
if (!xml || typeof xml !== 'string' || xml.trim().length === 0) {
return InvoiceFormat.UNKNOWN;
}
// Quick string-based pre-checks for performance
const quickCheck = FormatDetector.quickFormatCheck(xml);
if (quickCheck !== InvoiceFormat.UNKNOWN) {
return quickCheck;
}
// More thorough parsing-based checks
2025-03-17 16:49:49 +00:00
const doc = new DOMParser().parseFromString(xml, 'application/xml');
const root = doc.documentElement;
2025-04-03 16:41:10 +00:00
2025-03-17 16:49:49 +00:00
if (!root) {
return InvoiceFormat.UNKNOWN;
}
2025-04-03 16:41:10 +00:00
2025-03-17 16:49:49 +00:00
// UBL detection (Invoice or CreditNote root element)
if (FormatDetector.isUBLFormat(root)) {
// Check for XRechnung customization
if (FormatDetector.isXRechnungFormat(doc)) {
return InvoiceFormat.XRECHNUNG;
}
return InvoiceFormat.UBL;
}
// Factur-X/ZUGFeRD detection (CrossIndustryInvoice root element)
if (FormatDetector.isCIIFormat(root)) {
return FormatDetector.detectCIIFormat(doc, xml);
}
// ZUGFeRD v1 detection (CrossIndustryDocument root element)
if (FormatDetector.isZUGFeRDV1Format(root)) {
return InvoiceFormat.ZUGFERD;
2025-03-17 16:49:49 +00:00
}
2025-04-03 16:41:10 +00:00
// FatturaPA detection
if (FormatDetector.isFatturaPAFormat(root)) {
2025-04-03 15:53:08 +00:00
return InvoiceFormat.FATTURAPA;
}
2025-04-03 16:41:10 +00:00
2025-03-17 16:49:49 +00:00
return InvoiceFormat.UNKNOWN;
} catch (error) {
2025-04-03 15:53:08 +00:00
console.error('Error detecting format:', error);
2025-03-17 16:49:49 +00:00
return InvoiceFormat.UNKNOWN;
}
}
/**
* Performs a quick format check based on string content
* This is faster than full XML parsing for obvious cases
* @param xml XML string
* @returns Detected format or UNKNOWN if more analysis is needed
*/
private static quickFormatCheck(xml: string): InvoiceFormat {
const lowerXml = xml.toLowerCase();
// Check for obvious Factur-X indicators
if (
lowerXml.includes('factur-x.eu') ||
lowerXml.includes('factur-x.xml') ||
lowerXml.includes('factur-x:') ||
lowerXml.includes('urn:cen.eu:en16931:2017') && lowerXml.includes('factur-x')
) {
return InvoiceFormat.FACTURX;
}
// Check for obvious ZUGFeRD indicators
if (
lowerXml.includes('zugferd:') ||
lowerXml.includes('zugferd-invoice.xml') ||
lowerXml.includes('urn:ferd:') ||
lowerXml.includes('urn:zugferd')
) {
return InvoiceFormat.ZUGFERD;
}
// Check for obvious XRechnung indicators
if (
lowerXml.includes('xrechnung') ||
lowerXml.includes('urn:xoev-de:kosit:standard:xrechnung')
) {
return InvoiceFormat.XRECHNUNG;
}
// Check for obvious FatturaPA indicators
if (
lowerXml.includes('fatturapa') ||
lowerXml.includes('fattura elettronica') ||
lowerXml.includes('fatturaelettronica')
) {
return InvoiceFormat.FATTURAPA;
}
// Need more analysis
return InvoiceFormat.UNKNOWN;
}
/**
* Checks if the document is a UBL format
* @param root Root element
* @returns True if it's a UBL format
*/
private static isUBLFormat(root: Element): boolean {
return (
root.nodeName === 'Invoice' ||
root.nodeName === 'CreditNote' ||
root.nodeName === 'ubl:Invoice' ||
root.nodeName === 'ubl:CreditNote' ||
root.nodeName.endsWith(':Invoice') ||
root.nodeName.endsWith(':CreditNote')
);
}
/**
* Checks if the document is an XRechnung format
* @param doc XML document
* @returns True if it's an XRechnung format
*/
private static isXRechnungFormat(doc: Document): boolean {
try {
// Set up namespaces for XPath queries
const namespaces = {
'cbc': 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2',
'ubl': 'urn:oasis:names:specification:ubl:schema:xsd:Invoice-2'
};
// Create XPath selector with namespaces
const select = xpath.useNamespaces(namespaces);
// Use getElementsByTagName directly for more reliable results
const customizationNodes = doc.getElementsByTagName('cbc:CustomizationID');
// Check if any CustomizationID node contains "xrechnung"
for (let i = 0; i < customizationNodes.length; i++) {
const node = customizationNodes[i];
if (node.textContent && node.textContent.includes('xrechnung')) {
return true;
}
}
return false;
} catch (error) {
console.warn('Error checking for XRechnung format:', error);
// If direct DOM access fails, try a string-based approach
const xmlStr = new XMLSerializer().serializeToString(doc);
return xmlStr.includes('xrechnung') || xmlStr.includes('XRechnung');
}
}
/**
* Checks if the document is a CII format (Factur-X/ZUGFeRD v2+)
* @param root Root element
* @returns True if it's a CII format
*/
private static isCIIFormat(root: Element): boolean {
return (
root.nodeName === 'rsm:CrossIndustryInvoice' ||
root.nodeName === 'CrossIndustryInvoice' ||
root.nodeName.endsWith(':CrossIndustryInvoice')
);
}
/**
* Checks if the document is a ZUGFeRD v1 format
* @param root Root element
* @returns True if it's a ZUGFeRD v1 format
*/
private static isZUGFeRDV1Format(root: Element): boolean {
return (
root.nodeName === 'rsm:CrossIndustryDocument' ||
root.nodeName === 'CrossIndustryDocument' ||
root.nodeName === 'ram:CrossIndustryDocument' ||
root.nodeName.endsWith(':CrossIndustryDocument')
);
}
/**
* Checks if the document is a FatturaPA format
* @param root Root element
* @returns True if it's a FatturaPA format
*/
private static isFatturaPAFormat(root: Element): boolean {
return (
root.nodeName === 'FatturaElettronica' ||
(root.getAttribute('xmlns') && root.getAttribute('xmlns')!.includes('fatturapa.gov.it'))
);
}
/**
* Detects the specific CII format (Factur-X vs ZUGFeRD)
* @param doc XML document
* @param xml Original XML string for fallback checks
* @returns Detected format
*/
private static detectCIIFormat(doc: Document, xml: string): InvoiceFormat {
try {
// Use direct DOM traversal instead of XPath for more reliable behavior
const contextNodes = doc.getElementsByTagNameNS(
'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100',
'ExchangedDocumentContext'
);
if (contextNodes.length === 0) {
// Try without namespace
const noNsContextNodes = doc.getElementsByTagName('ExchangedDocumentContext');
if (noNsContextNodes.length === 0) {
// Fallback to string-based detection
return FormatDetector.detectCIIFormatFromString(xml);
}
}
// Loop through all potential context nodes
const allContextNodes = [...Array.from(contextNodes), ...Array.from(doc.getElementsByTagName('ExchangedDocumentContext'))];
for (const contextNode of allContextNodes) {
// Find guideline parameter
const guidelineNodes = contextNode.getElementsByTagName('ram:GuidelineSpecifiedDocumentContextParameter');
if (guidelineNodes.length === 0) {
continue;
}
for (const guidelineNode of Array.from(guidelineNodes)) {
// Find ID element
const idNodes = guidelineNode.getElementsByTagName('ram:ID');
if (idNodes.length === 0) {
continue;
}
for (const idNode of Array.from(idNodes)) {
const profileText = idNode.textContent || '';
// Check for ZUGFeRD profiles
if (
profileText.includes('zugferd') ||
profileText === CII_PROFILE_IDS.ZUGFERD_BASIC ||
profileText === CII_PROFILE_IDS.ZUGFERD_COMFORT ||
profileText === CII_PROFILE_IDS.ZUGFERD_EXTENDED
) {
return InvoiceFormat.ZUGFERD;
}
// Check for Factur-X profiles
if (
profileText.includes('factur-x') ||
profileText === CII_PROFILE_IDS.FACTURX_MINIMUM ||
profileText === CII_PROFILE_IDS.FACTURX_BASIC ||
profileText === CII_PROFILE_IDS.FACTURX_EN16931
) {
return InvoiceFormat.FACTURX;
}
}
}
}
// If we reach here, fall back to string checking
return FormatDetector.detectCIIFormatFromString(xml);
} catch (error) {
console.warn('Error detecting CII format, falling back to generic CII:', error);
return FormatDetector.detectCIIFormatFromString(xml);
}
}
/**
* Fallback method to detect CII format from string content
* @param xml XML string
* @returns Detected format
*/
private static detectCIIFormatFromString(xml: string): InvoiceFormat {
// Check for Factur-X indicators
if (xml.includes('factur-x') || xml.includes('Factur-X')) {
return InvoiceFormat.FACTURX;
}
// Check for ZUGFeRD indicators
if (xml.includes('zugferd') || xml.includes('ZUGFeRD')) {
return InvoiceFormat.ZUGFERD;
}
// Generic CII if we can't determine more specifically
return InvoiceFormat.CII;
}
}