fix(core): Improve PDF XML extraction, embedding, and format detection; update loadPdf/exportPdf error handling; add new validator implementations and enhance IPdf metadata.
This commit is contained in:
@ -13,6 +13,18 @@ export class FormatDetector {
|
||||
*/
|
||||
public static detectFormat(xml: string): InvoiceFormat {
|
||||
try {
|
||||
// Quick check for empty or invalid XML
|
||||
if (!xml || typeof xml !== 'string' || xml.trim().length === 0) {
|
||||
return InvoiceFormat.UNKNOWN;
|
||||
}
|
||||
|
||||
// Quick string-based pre-checks for performance
|
||||
const quickCheck = FormatDetector.quickFormatCheck(xml);
|
||||
if (quickCheck !== InvoiceFormat.UNKNOWN) {
|
||||
return quickCheck;
|
||||
}
|
||||
|
||||
// More thorough parsing-based checks
|
||||
const doc = new DOMParser().parseFromString(xml, 'application/xml');
|
||||
const root = doc.documentElement;
|
||||
|
||||
@ -21,106 +33,26 @@ export class FormatDetector {
|
||||
}
|
||||
|
||||
// UBL detection (Invoice or CreditNote root element)
|
||||
if (root.nodeName === 'Invoice' || root.nodeName === 'CreditNote') {
|
||||
// For simplicity, we'll treat all UBL documents as XRechnung for now
|
||||
// In a real implementation, we would check for specific customization IDs
|
||||
return InvoiceFormat.XRECHNUNG;
|
||||
if (FormatDetector.isUBLFormat(root)) {
|
||||
// Check for XRechnung customization
|
||||
if (FormatDetector.isXRechnungFormat(doc)) {
|
||||
return InvoiceFormat.XRECHNUNG;
|
||||
}
|
||||
return InvoiceFormat.UBL;
|
||||
}
|
||||
|
||||
// Factur-X/ZUGFeRD detection (CrossIndustryInvoice or CrossIndustryDocument root element)
|
||||
if (root.nodeName === 'rsm:CrossIndustryInvoice' || root.nodeName === 'CrossIndustryInvoice' ||
|
||||
root.nodeName.endsWith(':CrossIndustryInvoice')) {
|
||||
// Set up namespaces for XPath queries (ZUGFeRD v2/Factur-X)
|
||||
const namespaces = {
|
||||
rsm: 'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100',
|
||||
ram: 'urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100'
|
||||
};
|
||||
|
||||
// Create XPath selector with namespaces
|
||||
const select = xpath.useNamespaces(namespaces);
|
||||
|
||||
// Look for profile identifier
|
||||
const profileNode = select(
|
||||
'string(//rsm:ExchangedDocumentContext/ram:GuidelineSpecifiedDocumentContextParameter/ram:ID)',
|
||||
doc
|
||||
);
|
||||
|
||||
if (profileNode) {
|
||||
const profileText = profileNode.toString();
|
||||
|
||||
// Check for ZUGFeRD profiles
|
||||
if (profileText.includes('zugferd') ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_BASIC ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_COMFORT ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_EXTENDED) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
|
||||
// Check for Factur-X profiles
|
||||
if (profileText.includes('factur-x') ||
|
||||
profileText === CII_PROFILE_IDS.FACTURX_MINIMUM ||
|
||||
profileText === CII_PROFILE_IDS.FACTURX_BASIC ||
|
||||
profileText === CII_PROFILE_IDS.FACTURX_EN16931) {
|
||||
return InvoiceFormat.FACTURX;
|
||||
}
|
||||
}
|
||||
|
||||
// If we can't determine the specific CII format, default to generic CII
|
||||
return InvoiceFormat.CII;
|
||||
// Factur-X/ZUGFeRD detection (CrossIndustryInvoice root element)
|
||||
if (FormatDetector.isCIIFormat(root)) {
|
||||
return FormatDetector.detectCIIFormat(doc, xml);
|
||||
}
|
||||
|
||||
// ZUGFeRD v1 detection (CrossIndustryDocument root element)
|
||||
if (root.nodeName === 'rsm:CrossIndustryDocument' || root.nodeName === 'CrossIndustryDocument' ||
|
||||
root.nodeName === 'ram:CrossIndustryDocument' || root.nodeName.endsWith(':CrossIndustryDocument')) {
|
||||
|
||||
// Check for ZUGFeRD v1 namespace in the document
|
||||
const xmlString = xml.toString();
|
||||
if (xmlString.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') ||
|
||||
xmlString.includes('urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:12') ||
|
||||
xmlString.includes('urn:ferd:CrossIndustryDocument') ||
|
||||
xmlString.includes('zugferd') ||
|
||||
xmlString.includes('ZUGFeRD')) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
|
||||
// Set up namespaces for XPath queries (ZUGFeRD v1)
|
||||
try {
|
||||
const namespaces = {
|
||||
rsm: ZUGFERD_V1_NAMESPACES.RSM,
|
||||
ram: ZUGFERD_V1_NAMESPACES.RAM
|
||||
};
|
||||
|
||||
// Create XPath selector with namespaces
|
||||
const select = xpath.useNamespaces(namespaces);
|
||||
|
||||
// Look for profile identifier
|
||||
const profileNode = select(
|
||||
'string(//rsm:SpecifiedExchangedDocumentContext/ram:GuidelineSpecifiedDocumentContextParameter/ram:ID)',
|
||||
doc
|
||||
);
|
||||
|
||||
if (profileNode) {
|
||||
const profileText = profileNode.toString();
|
||||
|
||||
// Check for ZUGFeRD v1 profiles
|
||||
if (profileText.includes('ferd:CrossIndustryDocument:invoice:1p0') ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_V1_BASIC ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_V1_COMFORT ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_V1_EXTENDED) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('Error in ZUGFeRD v1 XPath detection:', error);
|
||||
}
|
||||
|
||||
// If we can't determine the specific profile but it's a CrossIndustryDocument, it's likely ZUGFeRD v1
|
||||
if (FormatDetector.isZUGFeRDV1Format(root)) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
|
||||
// FatturaPA detection would be implemented here
|
||||
if (root.nodeName === 'FatturaElettronica' ||
|
||||
(root.getAttribute('xmlns') && root.getAttribute('xmlns')!.includes('fatturapa.gov.it'))) {
|
||||
// FatturaPA detection
|
||||
if (FormatDetector.isFatturaPAFormat(root)) {
|
||||
return InvoiceFormat.FATTURAPA;
|
||||
}
|
||||
|
||||
@ -130,4 +62,241 @@ export class FormatDetector {
|
||||
return InvoiceFormat.UNKNOWN;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a quick format check based on string content
|
||||
* This is faster than full XML parsing for obvious cases
|
||||
* @param xml XML string
|
||||
* @returns Detected format or UNKNOWN if more analysis is needed
|
||||
*/
|
||||
private static quickFormatCheck(xml: string): InvoiceFormat {
|
||||
const lowerXml = xml.toLowerCase();
|
||||
|
||||
// Check for obvious Factur-X indicators
|
||||
if (
|
||||
lowerXml.includes('factur-x.eu') ||
|
||||
lowerXml.includes('factur-x.xml') ||
|
||||
lowerXml.includes('factur-x:') ||
|
||||
lowerXml.includes('urn:cen.eu:en16931:2017') && lowerXml.includes('factur-x')
|
||||
) {
|
||||
return InvoiceFormat.FACTURX;
|
||||
}
|
||||
|
||||
// Check for obvious ZUGFeRD indicators
|
||||
if (
|
||||
lowerXml.includes('zugferd:') ||
|
||||
lowerXml.includes('zugferd-invoice.xml') ||
|
||||
lowerXml.includes('urn:ferd:') ||
|
||||
lowerXml.includes('urn:zugferd')
|
||||
) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
|
||||
// Check for obvious XRechnung indicators
|
||||
if (
|
||||
lowerXml.includes('xrechnung') ||
|
||||
lowerXml.includes('urn:xoev-de:kosit:standard:xrechnung')
|
||||
) {
|
||||
return InvoiceFormat.XRECHNUNG;
|
||||
}
|
||||
|
||||
// Check for obvious FatturaPA indicators
|
||||
if (
|
||||
lowerXml.includes('fatturapa') ||
|
||||
lowerXml.includes('fattura elettronica') ||
|
||||
lowerXml.includes('fatturaelettronica')
|
||||
) {
|
||||
return InvoiceFormat.FATTURAPA;
|
||||
}
|
||||
|
||||
// Need more analysis
|
||||
return InvoiceFormat.UNKNOWN;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the document is a UBL format
|
||||
* @param root Root element
|
||||
* @returns True if it's a UBL format
|
||||
*/
|
||||
private static isUBLFormat(root: Element): boolean {
|
||||
return (
|
||||
root.nodeName === 'Invoice' ||
|
||||
root.nodeName === 'CreditNote' ||
|
||||
root.nodeName === 'ubl:Invoice' ||
|
||||
root.nodeName === 'ubl:CreditNote' ||
|
||||
root.nodeName.endsWith(':Invoice') ||
|
||||
root.nodeName.endsWith(':CreditNote')
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the document is an XRechnung format
|
||||
* @param doc XML document
|
||||
* @returns True if it's an XRechnung format
|
||||
*/
|
||||
private static isXRechnungFormat(doc: Document): boolean {
|
||||
try {
|
||||
// Set up namespaces for XPath queries
|
||||
const namespaces = {
|
||||
'cbc': 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2',
|
||||
'ubl': 'urn:oasis:names:specification:ubl:schema:xsd:Invoice-2'
|
||||
};
|
||||
|
||||
// Create XPath selector with namespaces
|
||||
const select = xpath.useNamespaces(namespaces);
|
||||
|
||||
// Use getElementsByTagName directly for more reliable results
|
||||
const customizationNodes = doc.getElementsByTagName('cbc:CustomizationID');
|
||||
|
||||
// Check if any CustomizationID node contains "xrechnung"
|
||||
for (let i = 0; i < customizationNodes.length; i++) {
|
||||
const node = customizationNodes[i];
|
||||
if (node.textContent && node.textContent.includes('xrechnung')) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (error) {
|
||||
console.warn('Error checking for XRechnung format:', error);
|
||||
// If direct DOM access fails, try a string-based approach
|
||||
const xmlStr = new XMLSerializer().serializeToString(doc);
|
||||
return xmlStr.includes('xrechnung') || xmlStr.includes('XRechnung');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the document is a CII format (Factur-X/ZUGFeRD v2+)
|
||||
* @param root Root element
|
||||
* @returns True if it's a CII format
|
||||
*/
|
||||
private static isCIIFormat(root: Element): boolean {
|
||||
return (
|
||||
root.nodeName === 'rsm:CrossIndustryInvoice' ||
|
||||
root.nodeName === 'CrossIndustryInvoice' ||
|
||||
root.nodeName.endsWith(':CrossIndustryInvoice')
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the document is a ZUGFeRD v1 format
|
||||
* @param root Root element
|
||||
* @returns True if it's a ZUGFeRD v1 format
|
||||
*/
|
||||
private static isZUGFeRDV1Format(root: Element): boolean {
|
||||
return (
|
||||
root.nodeName === 'rsm:CrossIndustryDocument' ||
|
||||
root.nodeName === 'CrossIndustryDocument' ||
|
||||
root.nodeName === 'ram:CrossIndustryDocument' ||
|
||||
root.nodeName.endsWith(':CrossIndustryDocument')
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the document is a FatturaPA format
|
||||
* @param root Root element
|
||||
* @returns True if it's a FatturaPA format
|
||||
*/
|
||||
private static isFatturaPAFormat(root: Element): boolean {
|
||||
return (
|
||||
root.nodeName === 'FatturaElettronica' ||
|
||||
(root.getAttribute('xmlns') && root.getAttribute('xmlns')!.includes('fatturapa.gov.it'))
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects the specific CII format (Factur-X vs ZUGFeRD)
|
||||
* @param doc XML document
|
||||
* @param xml Original XML string for fallback checks
|
||||
* @returns Detected format
|
||||
*/
|
||||
private static detectCIIFormat(doc: Document, xml: string): InvoiceFormat {
|
||||
try {
|
||||
// Use direct DOM traversal instead of XPath for more reliable behavior
|
||||
const contextNodes = doc.getElementsByTagNameNS(
|
||||
'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100',
|
||||
'ExchangedDocumentContext'
|
||||
);
|
||||
|
||||
if (contextNodes.length === 0) {
|
||||
// Try without namespace
|
||||
const noNsContextNodes = doc.getElementsByTagName('ExchangedDocumentContext');
|
||||
if (noNsContextNodes.length === 0) {
|
||||
// Fallback to string-based detection
|
||||
return FormatDetector.detectCIIFormatFromString(xml);
|
||||
}
|
||||
}
|
||||
|
||||
// Loop through all potential context nodes
|
||||
const allContextNodes = [...Array.from(contextNodes), ...Array.from(doc.getElementsByTagName('ExchangedDocumentContext'))];
|
||||
|
||||
for (const contextNode of allContextNodes) {
|
||||
// Find guideline parameter
|
||||
const guidelineNodes = contextNode.getElementsByTagName('ram:GuidelineSpecifiedDocumentContextParameter');
|
||||
|
||||
if (guidelineNodes.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const guidelineNode of Array.from(guidelineNodes)) {
|
||||
// Find ID element
|
||||
const idNodes = guidelineNode.getElementsByTagName('ram:ID');
|
||||
|
||||
if (idNodes.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const idNode of Array.from(idNodes)) {
|
||||
const profileText = idNode.textContent || '';
|
||||
|
||||
// Check for ZUGFeRD profiles
|
||||
if (
|
||||
profileText.includes('zugferd') ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_BASIC ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_COMFORT ||
|
||||
profileText === CII_PROFILE_IDS.ZUGFERD_EXTENDED
|
||||
) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
|
||||
// Check for Factur-X profiles
|
||||
if (
|
||||
profileText.includes('factur-x') ||
|
||||
profileText === CII_PROFILE_IDS.FACTURX_MINIMUM ||
|
||||
profileText === CII_PROFILE_IDS.FACTURX_BASIC ||
|
||||
profileText === CII_PROFILE_IDS.FACTURX_EN16931
|
||||
) {
|
||||
return InvoiceFormat.FACTURX;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we reach here, fall back to string checking
|
||||
return FormatDetector.detectCIIFormatFromString(xml);
|
||||
} catch (error) {
|
||||
console.warn('Error detecting CII format, falling back to generic CII:', error);
|
||||
return FormatDetector.detectCIIFormatFromString(xml);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback method to detect CII format from string content
|
||||
* @param xml XML string
|
||||
* @returns Detected format
|
||||
*/
|
||||
private static detectCIIFormatFromString(xml: string): InvoiceFormat {
|
||||
// Check for Factur-X indicators
|
||||
if (xml.includes('factur-x') || xml.includes('Factur-X')) {
|
||||
return InvoiceFormat.FACTURX;
|
||||
}
|
||||
|
||||
// Check for ZUGFeRD indicators
|
||||
if (xml.includes('zugferd') || xml.includes('ZUGFeRD')) {
|
||||
return InvoiceFormat.ZUGFERD;
|
||||
}
|
||||
|
||||
// Generic CII if we can't determine more specifically
|
||||
return InvoiceFormat.CII;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user