einvoice/test/suite/einvoice_pdf-operations/test.pdf-03.facturx-extraction.ts

486 lines
18 KiB
TypeScript

import { tap, expect } from '@git.zone/tstest/tapbundle';
import * as plugins from '../../../ts/plugins.ts';
import { EInvoice } from '../../../ts/classes.xinvoice.ts';
import { CorpusLoader } from '../../helpers/corpus.loader.ts';
import { PerformanceTracker } from '../../helpers/performance.tracker.ts';
const testTimeout = 300000; // 5 minutes timeout for PDF processing
// PDF-03: ZUGFeRD v2/Factur-X Extraction
// Tests XML extraction from ZUGFeRD v2 and Factur-X PDFs with enhanced format support
// and cross-border compatibility (German ZUGFeRD v2 and French Factur-X)
tap.test('PDF-03: Factur-X Extraction - Basic ZUGFeRD v2 Extraction', async (tools) => {
const startTime = Date.now();
try {
const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2');
if (zugferdV2Files.length === 0) {
tools.log('⚠ No ZUGFeRD v2 files found in corpus, skipping basic extraction test');
return;
}
const testFile = zugferdV2Files[0];
tools.log(`Testing ZUGFeRD v2 extraction with: ${plugins.path.basename(testFile)}`);
const invoice = new EInvoice();
// Check file accessibility
const fileExists = await plugins.fs.pathExists(testFile);
expect(fileExists).toBeTrue();
const fileStats = await plugins.fs.stat(testFile);
tools.log(`File size: ${(fileStats.size / 1024).toFixed(1)}KB`);
// Attempt PDF extraction
try {
const extractionResult = await invoice.fromFile(testFile);
if (extractionResult) {
tools.log('✓ ZUGFeRD v2 XML extraction successful');
// Verify extracted content
const extractedXml = await invoice.toXmlString();
expect(extractedXml).toBeTruthy();
expect(extractedXml.length).toBeGreaterThan(100);
// Check for ZUGFeRD v2/Factur-X characteristics
const hasZugferdV2Markers = extractedXml.includes('urn:cen.eu:en16931:2017') ||
extractedXml.includes('CrossIndustryInvoice') ||
extractedXml.includes('urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100') ||
extractedXml.includes('zugferd') ||
extractedXml.includes('factur-x');
if (hasZugferdV2Markers) {
tools.log('✓ ZUGFeRD v2/Factur-X format markers detected');
} else {
tools.log('⚠ ZUGFeRD v2/Factur-X format markers not clearly detected');
}
// Test validation of extracted content
try {
const validationResult = await invoice.validate();
if (validationResult.valid) {
tools.log('✓ Extracted ZUGFeRD v2 content passes validation');
} else {
tools.log(`⚠ Validation issues: ${validationResult.errors?.length || 0} errors`);
if (validationResult.errors && validationResult.errors.length > 0) {
tools.log(` First error: ${validationResult.errors[0].message}`);
}
}
} catch (validationError) {
tools.log(`⚠ Validation failed: ${validationError.message}`);
}
} else {
tools.log('⚠ ZUGFeRD v2 extraction returned no result');
}
} catch (extractionError) {
tools.log(`⚠ ZUGFeRD v2 extraction failed: ${extractionError.message}`);
}
} catch (error) {
tools.log(`ZUGFeRD v2 basic extraction test failed: ${error.message}`);
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('pdf-facturx-basic-extraction', duration);
});
tap.test('PDF-03: Factur-X Extraction - Factur-X Specific Testing', async (tools) => {
const startTime = Date.now();
try {
// Look for Factur-X specific files in corpus
const facturxFiles = await CorpusLoader.getFiles('ZUGFERD_V2');
// Filter for files that might be Factur-X specific
const potentialFacturxFiles = facturxFiles.filter(file =>
plugins.path.basename(file).toLowerCase().includes('factur') ||
plugins.path.basename(file).toLowerCase().includes('france') ||
plugins.path.basename(file).toLowerCase().includes('fr')
);
if (potentialFacturxFiles.length === 0) {
tools.log('⚠ No specific Factur-X files identified, testing with ZUGFeRD v2 files');
// Use first few ZUGFeRD v2 files as they should be compatible
potentialFacturxFiles.push(...facturxFiles.slice(0, 2));
}
tools.log(`Testing Factur-X specific features with ${potentialFacturxFiles.length} files`);
let facturxProcessed = 0;
let facturxSuccessful = 0;
for (const filePath of potentialFacturxFiles) {
const fileName = plugins.path.basename(filePath);
try {
facturxProcessed++;
const invoice = new EInvoice();
const extractionResult = await invoice.fromFile(filePath);
if (extractionResult) {
facturxSuccessful++;
const xmlContent = await invoice.toXmlString();
// Look for Factur-X specific characteristics
const facturxChecks = {
hasEN16931Context: xmlContent.includes('urn:cen.eu:en16931:2017'),
hasCIINamespace: xmlContent.includes('urn:un:unece:uncefact:data:standard:CrossIndustryInvoice'),
hasFacturxGuideline: xmlContent.includes('factur-x') || xmlContent.includes('FACTUR-X'),
hasExchangedDocument: xmlContent.includes('ExchangedDocument'),
hasSupplyChainTrade: xmlContent.includes('SupplyChainTradeTransaction')
};
tools.log(`${fileName} Factur-X characteristics:`);
tools.log(` EN16931 Context: ${facturxChecks.hasEN16931Context}`);
tools.log(` CII Namespace: ${facturxChecks.hasCIINamespace}`);
tools.log(` Factur-X Guideline: ${facturxChecks.hasFacturxGuideline}`);
tools.log(` ExchangedDocument: ${facturxChecks.hasExchangedDocument}`);
tools.log(` SupplyChainTrade: ${facturxChecks.hasSupplyChainTrade}`);
// Basic Factur-X structure validation
if (facturxChecks.hasEN16931Context && facturxChecks.hasCIINamespace) {
tools.log(` ✓ Valid Factur-X/ZUGFeRD v2 structure detected`);
}
} else {
tools.log(`${fileName}: No XML content extracted`);
}
} catch (error) {
tools.log(`${fileName}: Extraction failed - ${error.message}`);
}
}
const facturxSuccessRate = facturxProcessed > 0 ? (facturxSuccessful / facturxProcessed) * 100 : 0;
tools.log(`\nFactur-X Processing Summary:`);
tools.log(`- Files processed: ${facturxProcessed}`);
tools.log(`- Successful extractions: ${facturxSuccessful} (${facturxSuccessRate.toFixed(1)}%)`);
if (facturxProcessed > 0) {
expect(facturxSuccessRate).toBeGreaterThan(0);
}
} catch (error) {
tools.log(`Factur-X specific testing failed: ${error.message}`);
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('pdf-facturx-specific-testing', duration);
});
tap.test('PDF-03: Factur-X Extraction - Corpus Performance Analysis', { timeout: testTimeout }, async (tools) => {
const startTime = Date.now();
let totalProcessed = 0;
let totalSuccessful = 0;
let totalExtractionTime = 0;
const fileSizePerformance = [];
try {
const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2');
tools.log(`Processing ${zugferdV2Files.length} ZUGFeRD v2/Factur-X files for performance analysis`);
if (zugferdV2Files.length === 0) {
tools.log('⚠ No ZUGFeRD v2/Factur-X files found in corpus');
return;
}
// Process subset for performance analysis
const filesToProcess = zugferdV2Files.slice(0, Math.min(10, zugferdV2Files.length));
for (const filePath of filesToProcess) {
const fileName = plugins.path.basename(filePath);
const fileExtractionStart = Date.now();
try {
totalProcessed++;
// Get file size for performance correlation
const fileStats = await plugins.fs.stat(filePath);
const fileSizeKB = fileStats.size / 1024;
const invoice = new EInvoice();
const extractionResult = await invoice.fromFile(filePath);
const fileExtractionTime = Date.now() - fileExtractionStart;
totalExtractionTime += fileExtractionTime;
if (extractionResult) {
totalSuccessful++;
// Record size vs performance data
fileSizePerformance.push({
fileName,
sizeKB: fileSizeKB,
extractionTimeMs: fileExtractionTime,
timePerKB: fileExtractionTime / fileSizeKB
});
tools.log(`${fileName}: ${fileSizeKB.toFixed(1)}KB → ${fileExtractionTime}ms (${(fileExtractionTime/fileSizeKB).toFixed(2)}ms/KB)`);
// Quick content verification
const xmlContent = await invoice.toXmlString();
if (xmlContent.length < 100) {
tools.log(` ⚠ Suspiciously short XML content: ${xmlContent.length} chars`);
}
} else {
tools.log(`${fileName}: Extraction failed (${fileSizeKB.toFixed(1)}KB, ${fileExtractionTime}ms)`);
}
} catch (error) {
const fileExtractionTime = Date.now() - fileExtractionStart;
totalExtractionTime += fileExtractionTime;
tools.log(`${fileName}: Error after ${fileExtractionTime}ms - ${error.message}`);
}
}
// Performance analysis
const successRate = totalProcessed > 0 ? (totalSuccessful / totalProcessed) * 100 : 0;
const averageExtractionTime = totalProcessed > 0 ? totalExtractionTime / totalProcessed : 0;
tools.log(`\nZUGFeRD v2/Factur-X Performance Analysis:`);
tools.log(`- Files processed: ${totalProcessed}`);
tools.log(`- Success rate: ${successRate.toFixed(1)}%`);
tools.log(`- Average extraction time: ${averageExtractionTime.toFixed(1)}ms`);
if (fileSizePerformance.length > 0) {
const avgTimePerKB = fileSizePerformance.reduce((sum, item) => sum + item.timePerKB, 0) / fileSizePerformance.length;
const avgFileSize = fileSizePerformance.reduce((sum, item) => sum + item.sizeKB, 0) / fileSizePerformance.length;
tools.log(`- Average file size: ${avgFileSize.toFixed(1)}KB`);
tools.log(`- Average time per KB: ${avgTimePerKB.toFixed(2)}ms/KB`);
// Find performance outliers
const sortedByTime = [...fileSizePerformance].sort((a, b) => b.extractionTimeMs - a.extractionTimeMs);
if (sortedByTime.length > 0) {
tools.log(`- Slowest file: ${sortedByTime[0].fileName} (${sortedByTime[0].extractionTimeMs}ms)`);
tools.log(`- Fastest file: ${sortedByTime[sortedByTime.length-1].fileName} (${sortedByTime[sortedByTime.length-1].extractionTimeMs}ms)`);
}
// Performance expectations
expect(avgTimePerKB).toBeLessThan(50); // 50ms per KB max
expect(averageExtractionTime).toBeLessThan(3000); // 3 seconds max average
}
// Success rate expectations
if (totalProcessed > 0) {
expect(successRate).toBeGreaterThan(0); // At least one should work
}
} catch (error) {
tools.log(`Corpus performance analysis failed: ${error.message}`);
throw error;
}
const totalDuration = Date.now() - startTime;
PerformanceTracker.recordMetric('pdf-facturx-corpus-performance', totalDuration);
tools.log(`Performance analysis completed in ${totalDuration}ms`);
});
tap.test('PDF-03: Factur-X Extraction - Profile Detection', async (tools) => {
const startTime = Date.now();
try {
const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2');
if (zugferdV2Files.length === 0) {
tools.log('⚠ No ZUGFeRD v2/Factur-X files found for profile detection');
return;
}
// Test profile detection with a sample of files
const sampleFiles = zugferdV2Files.slice(0, 3);
const profileStats = {
'MINIMUM': 0,
'BASIC': 0,
'COMFORT': 0,
'EXTENDED': 0,
'FACTUR-X': 0,
'UNKNOWN': 0
};
tools.log(`Testing profile detection with ${sampleFiles.length} files`);
for (const filePath of sampleFiles) {
const fileName = plugins.path.basename(filePath);
try {
const invoice = new EInvoice();
const extractionResult = await invoice.fromFile(filePath);
if (extractionResult) {
const xmlContent = await invoice.toXmlString();
// Detect ZUGFeRD/Factur-X profile from XML content
let detectedProfile = 'UNKNOWN';
if (xmlContent.includes('urn:cen.eu:en16931:2017#compliant#urn:zugferd.de:2p1:minimum')) {
detectedProfile = 'MINIMUM';
} else if (xmlContent.includes('urn:cen.eu:en16931:2017#compliant#urn:zugferd.de:2p1:basic')) {
detectedProfile = 'BASIC';
} else if (xmlContent.includes('urn:cen.eu:en16931:2017#compliant#urn:zugferd.de:2p1:comfort')) {
detectedProfile = 'COMFORT';
} else if (xmlContent.includes('urn:cen.eu:en16931:2017#compliant#urn:zugferd.de:2p1:extended')) {
detectedProfile = 'EXTENDED';
} else if (xmlContent.includes('urn:cen.eu:en16931:2017#conformant#urn:factur-x.eu:1p0:')) {
detectedProfile = 'FACTUR-X';
} else if (xmlContent.includes('urn:cen.eu:en16931:2017')) {
detectedProfile = 'EN16931'; // Generic EN16931 compliance
}
profileStats[detectedProfile] = (profileStats[detectedProfile] || 0) + 1;
tools.log(`${fileName}: Profile detected - ${detectedProfile}`);
// Additional profile-specific checks
if (detectedProfile !== 'UNKNOWN') {
const hasMinimumFields = xmlContent.includes('ExchangedDocument') &&
xmlContent.includes('SupplyChainTradeTransaction');
const hasComfortFields = xmlContent.includes('ApplicableHeaderTradeAgreement') &&
xmlContent.includes('ApplicableHeaderTradeDelivery');
const hasExtendedFields = xmlContent.includes('IncludedSupplyChainTradeLineItem');
tools.log(` Minimum fields: ${hasMinimumFields}`);
tools.log(` Comfort fields: ${hasComfortFields}`);
tools.log(` Extended fields: ${hasExtendedFields}`);
}
} else {
tools.log(`${fileName}: No content for profile detection`);
}
} catch (error) {
tools.log(`${fileName}: Profile detection failed - ${error.message}`);
}
}
tools.log(`\nProfile Detection Summary:`);
for (const [profile, count] of Object.entries(profileStats)) {
if (count > 0) {
tools.log(`- ${profile}: ${count} files`);
}
}
} catch (error) {
tools.log(`Profile detection failed: ${error.message}`);
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('pdf-facturx-profile-detection', duration);
});
tap.test('PDF-03: Factur-X Extraction - Error Recovery', async (tools) => {
const startTime = Date.now();
// Test error recovery with problematic PDF files
const errorTestCases = [
{
name: 'Non-PDF file with PDF extension',
createFile: async () => {
const tempPath = plugins.path.join(process.cwd(), '.nogit', 'temp-fake.pdf');
await plugins.fs.ensureDir(plugins.path.dirname(tempPath));
await plugins.fs.writeFile(tempPath, 'This is not a PDF file');
return tempPath;
},
expectedError: true
},
{
name: 'Empty PDF file',
createFile: async () => {
const tempPath = plugins.path.join(process.cwd(), '.nogit', 'temp-empty.pdf');
await plugins.fs.ensureDir(plugins.path.dirname(tempPath));
await plugins.fs.writeFile(tempPath, '');
return tempPath;
},
expectedError: true
},
{
name: 'PDF header only',
createFile: async () => {
const tempPath = plugins.path.join(process.cwd(), '.nogit', 'temp-header-only.pdf');
await plugins.fs.ensureDir(plugins.path.dirname(tempPath));
await plugins.fs.writeFile(tempPath, '%PDF-1.4\n');
return tempPath;
},
expectedError: true
}
];
for (const testCase of errorTestCases) {
tools.log(`Testing error recovery: ${testCase.name}`);
let tempFilePath = null;
try {
if (testCase.createFile) {
tempFilePath = await testCase.createFile();
const invoice = new EInvoice();
const result = await invoice.fromFile(tempFilePath);
if (testCase.expectedError) {
if (result) {
tools.log(`⚠ Expected error for ${testCase.name} but extraction succeeded`);
} else {
tools.log(`${testCase.name}: Gracefully handled (no result)`);
}
} else {
tools.log(`${testCase.name}: Operation succeeded as expected`);
}
}
} catch (error) {
if (testCase.expectedError) {
tools.log(`${testCase.name}: Expected error caught - ${error.message}`);
expect(error.message).toBeTruthy();
} else {
tools.log(`${testCase.name}: Unexpected error - ${error.message}`);
throw error;
}
} finally {
// Clean up temp file
if (tempFilePath) {
try {
await plugins.fs.remove(tempFilePath);
} catch (cleanupError) {
tools.log(`Warning: Failed to clean up ${tempFilePath}`);
}
}
}
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('pdf-facturx-error-recovery', duration);
});
tap.test('PDF-03: Performance Summary', async (tools) => {
const operations = [
'pdf-facturx-basic-extraction',
'pdf-facturx-specific-testing',
'pdf-facturx-corpus-performance',
'pdf-facturx-profile-detection',
'pdf-facturx-error-recovery'
];
tools.log(`\n=== ZUGFeRD v2/Factur-X Extraction Performance Summary ===`);
for (const operation of operations) {
const summary = await PerformanceTracker.getSummary(operation);
if (summary) {
tools.log(`${operation}:`);
tools.log(` avg=${summary.average}ms, min=${summary.min}ms, max=${summary.max}ms, p95=${summary.p95}ms`);
}
}
tools.log(`\nZUGFeRD v2/Factur-X extraction testing completed.`);
});