- Update test-utils import path and refactor to helpers/utils.ts - Migrate all CorpusLoader usage from getFiles() to loadCategory() API - Add new EN16931 UBL validator with comprehensive validation rules - Add new XRechnung validator extending EN16931 with German requirements - Update validator factory to support new validators - Fix format detector for better XRechnung and EN16931 detection - Update all test files to use proper import paths - Improve error handling in security tests - Fix validation tests to use realistic thresholds - Add proper namespace handling in corpus validation tests - Update format detection tests for improved accuracy - Fix test imports from classes.xinvoice.ts to index.js All test suites now properly aligned with the updated APIs and realistic performance expectations.
306 lines
9.9 KiB
TypeScript
306 lines
9.9 KiB
TypeScript
import { InvoiceFormat } from '../../interfaces/common.js';
|
|
import { DOMParser, xpath } from '../../plugins.js';
|
|
import { CII_PROFILE_IDS, ZUGFERD_V1_NAMESPACES } from '../cii/cii.types.js';
|
|
|
|
/**
|
|
* Utility class for detecting invoice formats
|
|
*/
|
|
export class FormatDetector {
|
|
/**
|
|
* Detects the format of an XML document
|
|
* @param xml XML content to analyze
|
|
* @returns Detected invoice format
|
|
*/
|
|
public static detectFormat(xml: string): InvoiceFormat {
|
|
try {
|
|
// Quick check for empty or invalid XML
|
|
if (!xml || typeof xml !== 'string' || xml.trim().length === 0) {
|
|
return InvoiceFormat.UNKNOWN;
|
|
}
|
|
|
|
// Quick string-based pre-checks for performance
|
|
const quickCheck = FormatDetector.quickFormatCheck(xml);
|
|
if (quickCheck !== InvoiceFormat.UNKNOWN) {
|
|
return quickCheck;
|
|
}
|
|
|
|
// More thorough parsing-based checks
|
|
const doc = new DOMParser().parseFromString(xml, 'application/xml');
|
|
const root = doc.documentElement;
|
|
|
|
if (!root) {
|
|
return InvoiceFormat.UNKNOWN;
|
|
}
|
|
|
|
// UBL detection (Invoice or CreditNote root element)
|
|
if (FormatDetector.isUBLFormat(root)) {
|
|
// Check for XRechnung customization
|
|
if (FormatDetector.isXRechnungFormat(doc)) {
|
|
return InvoiceFormat.XRECHNUNG;
|
|
}
|
|
return InvoiceFormat.UBL;
|
|
}
|
|
|
|
// Factur-X/ZUGFeRD detection (CrossIndustryInvoice root element)
|
|
if (FormatDetector.isCIIFormat(root)) {
|
|
return FormatDetector.detectCIIFormat(doc, xml);
|
|
}
|
|
|
|
// ZUGFeRD v1 detection (CrossIndustryDocument root element)
|
|
if (FormatDetector.isZUGFeRDV1Format(root)) {
|
|
return InvoiceFormat.ZUGFERD;
|
|
}
|
|
|
|
// FatturaPA detection
|
|
if (FormatDetector.isFatturaPAFormat(root)) {
|
|
return InvoiceFormat.FATTURAPA;
|
|
}
|
|
|
|
return InvoiceFormat.UNKNOWN;
|
|
} catch (error) {
|
|
console.error('Error detecting format:', error);
|
|
return InvoiceFormat.UNKNOWN;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Performs a quick format check based on string content
|
|
* This is faster than full XML parsing for obvious cases
|
|
* @param xml XML string
|
|
* @returns Detected format or UNKNOWN if more analysis is needed
|
|
*/
|
|
private static quickFormatCheck(xml: string): InvoiceFormat {
|
|
const lowerXml = xml.toLowerCase();
|
|
|
|
// Check for obvious Factur-X indicators
|
|
if (
|
|
lowerXml.includes('factur-x.eu') ||
|
|
lowerXml.includes('factur-x.xml') ||
|
|
lowerXml.includes('factur-x:') ||
|
|
lowerXml.includes('urn:cen.eu:en16931:2017') && lowerXml.includes('factur-x')
|
|
) {
|
|
return InvoiceFormat.FACTURX;
|
|
}
|
|
|
|
// Check for obvious ZUGFeRD indicators
|
|
if (
|
|
lowerXml.includes('zugferd:') ||
|
|
lowerXml.includes('zugferd-invoice.xml') ||
|
|
lowerXml.includes('urn:ferd:') ||
|
|
lowerXml.includes('urn:zugferd')
|
|
) {
|
|
return InvoiceFormat.ZUGFERD;
|
|
}
|
|
|
|
// Check for obvious XRechnung indicators
|
|
if (
|
|
lowerXml.includes('xrechnung') ||
|
|
lowerXml.includes('urn:xoev-de:kosit:standard:xrechnung')
|
|
) {
|
|
return InvoiceFormat.XRECHNUNG;
|
|
}
|
|
|
|
// Check for obvious FatturaPA indicators
|
|
if (
|
|
lowerXml.includes('fatturapa') ||
|
|
lowerXml.includes('fattura elettronica') ||
|
|
lowerXml.includes('fatturaelettronica')
|
|
) {
|
|
return InvoiceFormat.FATTURAPA;
|
|
}
|
|
|
|
// Need more analysis
|
|
return InvoiceFormat.UNKNOWN;
|
|
}
|
|
|
|
/**
|
|
* Checks if the document is a UBL format
|
|
* @param root Root element
|
|
* @returns True if it's a UBL format
|
|
*/
|
|
private static isUBLFormat(root: Element): boolean {
|
|
return (
|
|
root.nodeName === 'Invoice' ||
|
|
root.nodeName === 'CreditNote' ||
|
|
root.nodeName === 'ubl:Invoice' ||
|
|
root.nodeName === 'ubl:CreditNote' ||
|
|
root.nodeName.endsWith(':Invoice') ||
|
|
root.nodeName.endsWith(':CreditNote')
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Checks if the document is an XRechnung format
|
|
* @param doc XML document
|
|
* @returns True if it's an XRechnung format
|
|
*/
|
|
private static isXRechnungFormat(doc: Document): boolean {
|
|
try {
|
|
// Set up namespaces for XPath queries
|
|
const namespaces = {
|
|
'cbc': 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2',
|
|
'ubl': 'urn:oasis:names:specification:ubl:schema:xsd:Invoice-2'
|
|
};
|
|
|
|
// Create XPath selector with namespaces
|
|
const select = xpath.useNamespaces(namespaces);
|
|
|
|
// Use getElementsByTagName directly for more reliable results
|
|
const customizationNodes = doc.getElementsByTagName('cbc:CustomizationID');
|
|
|
|
// Check if any CustomizationID node contains "xrechnung"
|
|
for (let i = 0; i < customizationNodes.length; i++) {
|
|
const node = customizationNodes[i];
|
|
if (node.textContent && node.textContent.includes('xrechnung')) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
} catch (error) {
|
|
console.warn('Error checking for XRechnung format:', error);
|
|
// If direct DOM access fails, try a string-based approach
|
|
const xmlStr = new XMLSerializer().serializeToString(doc);
|
|
return xmlStr.includes('xrechnung') || xmlStr.includes('XRechnung');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Checks if the document is a CII format (Factur-X/ZUGFeRD v2+)
|
|
* @param root Root element
|
|
* @returns True if it's a CII format
|
|
*/
|
|
private static isCIIFormat(root: Element): boolean {
|
|
return (
|
|
root.nodeName === 'rsm:CrossIndustryInvoice' ||
|
|
root.nodeName === 'CrossIndustryInvoice' ||
|
|
root.nodeName.endsWith(':CrossIndustryInvoice')
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Checks if the document is a ZUGFeRD v1 format
|
|
* @param root Root element
|
|
* @returns True if it's a ZUGFeRD v1 format
|
|
*/
|
|
private static isZUGFeRDV1Format(root: Element): boolean {
|
|
return (
|
|
root.nodeName === 'rsm:CrossIndustryDocument' ||
|
|
root.nodeName === 'CrossIndustryDocument' ||
|
|
root.nodeName === 'ram:CrossIndustryDocument' ||
|
|
root.nodeName.endsWith(':CrossIndustryDocument')
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Checks if the document is a FatturaPA format
|
|
* @param root Root element
|
|
* @returns True if it's a FatturaPA format
|
|
*/
|
|
private static isFatturaPAFormat(root: Element): boolean {
|
|
return (
|
|
root.nodeName === 'FatturaElettronica' ||
|
|
(root.getAttribute('xmlns') && root.getAttribute('xmlns')!.includes('fatturapa.gov.it'))
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Detects the specific CII format (Factur-X vs ZUGFeRD)
|
|
* @param doc XML document
|
|
* @param xml Original XML string for fallback checks
|
|
* @returns Detected format
|
|
*/
|
|
private static detectCIIFormat(doc: Document, xml: string): InvoiceFormat {
|
|
try {
|
|
// Use direct DOM traversal instead of XPath for more reliable behavior
|
|
const contextNodes = doc.getElementsByTagNameNS(
|
|
'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100',
|
|
'ExchangedDocumentContext'
|
|
);
|
|
|
|
if (contextNodes.length === 0) {
|
|
// Try without namespace
|
|
const noNsContextNodes = doc.getElementsByTagName('ExchangedDocumentContext');
|
|
if (noNsContextNodes.length === 0) {
|
|
// Fallback to string-based detection
|
|
return FormatDetector.detectCIIFormatFromString(xml);
|
|
}
|
|
}
|
|
|
|
// Loop through all potential context nodes
|
|
const allContextNodes = [...Array.from(contextNodes), ...Array.from(doc.getElementsByTagName('ExchangedDocumentContext'))];
|
|
|
|
for (const contextNode of allContextNodes) {
|
|
// Find guideline parameter
|
|
const guidelineNodes = contextNode.getElementsByTagName('ram:GuidelineSpecifiedDocumentContextParameter');
|
|
|
|
if (guidelineNodes.length === 0) {
|
|
continue;
|
|
}
|
|
|
|
for (const guidelineNode of Array.from(guidelineNodes)) {
|
|
// Find ID element
|
|
const idNodes = guidelineNode.getElementsByTagName('ram:ID');
|
|
|
|
if (idNodes.length === 0) {
|
|
continue;
|
|
}
|
|
|
|
for (const idNode of Array.from(idNodes)) {
|
|
const profileText = idNode.textContent || '';
|
|
|
|
// Check for ZUGFeRD profiles (v1 and v2)
|
|
if (
|
|
profileText.includes('zugferd') ||
|
|
profileText.includes('urn:ferd:') ||
|
|
profileText === CII_PROFILE_IDS.ZUGFERD_BASIC ||
|
|
profileText === CII_PROFILE_IDS.ZUGFERD_COMFORT ||
|
|
profileText === CII_PROFILE_IDS.ZUGFERD_EXTENDED ||
|
|
profileText === CII_PROFILE_IDS.ZUGFERD_V1_BASIC ||
|
|
profileText === CII_PROFILE_IDS.ZUGFERD_V1_COMFORT ||
|
|
profileText === CII_PROFILE_IDS.ZUGFERD_V1_EXTENDED
|
|
) {
|
|
return InvoiceFormat.ZUGFERD;
|
|
}
|
|
|
|
// Check for Factur-X profiles
|
|
if (
|
|
profileText.includes('factur-x') ||
|
|
profileText === CII_PROFILE_IDS.FACTURX_MINIMUM ||
|
|
profileText === CII_PROFILE_IDS.FACTURX_BASIC ||
|
|
profileText === CII_PROFILE_IDS.FACTURX_EN16931
|
|
) {
|
|
return InvoiceFormat.FACTURX;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// If we reach here, fall back to string checking
|
|
return FormatDetector.detectCIIFormatFromString(xml);
|
|
} catch (error) {
|
|
console.warn('Error detecting CII format, falling back to generic CII:', error);
|
|
return FormatDetector.detectCIIFormatFromString(xml);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fallback method to detect CII format from string content
|
|
* @param xml XML string
|
|
* @returns Detected format
|
|
*/
|
|
private static detectCIIFormatFromString(xml: string): InvoiceFormat {
|
|
// Check for Factur-X indicators
|
|
if (xml.includes('factur-x') || xml.includes('Factur-X')) {
|
|
return InvoiceFormat.FACTURX;
|
|
}
|
|
|
|
// Check for ZUGFeRD indicators
|
|
if (xml.includes('zugferd') || xml.includes('ZUGFeRD')) {
|
|
return InvoiceFormat.ZUGFERD;
|
|
}
|
|
|
|
// Generic CII if we can't determine more specifically
|
|
return InvoiceFormat.CII;
|
|
}
|
|
} |