import { tap, expect } from '@git.zone/tstest/tapbundle'; import * as plugins from '../../../ts/plugins.ts'; import { EInvoice } from '../../../ts/classes.xinvoice.ts'; import { CorpusLoader } from '../../helpers/corpus.loader.ts'; import { PerformanceTracker } from '../../helpers/performance.tracker.ts'; const testTimeout = 300000; // 5 minutes timeout for PDF processing // PDF-03: ZUGFeRD v2/Factur-X Extraction // Tests XML extraction from ZUGFeRD v2 and Factur-X PDFs with enhanced format support // and cross-border compatibility (German ZUGFeRD v2 and French Factur-X) tap.test('PDF-03: Factur-X Extraction - Basic ZUGFeRD v2 Extraction', async (tools) => { const startTime = Date.now(); try { const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2'); if (zugferdV2Files.length === 0) { tools.log('⚠ No ZUGFeRD v2 files found in corpus, skipping basic extraction test'); return; } const testFile = zugferdV2Files[0]; tools.log(`Testing ZUGFeRD v2 extraction with: ${plugins.path.basename(testFile)}`); const invoice = new EInvoice(); // Check file accessibility const fileExists = await plugins.fs.pathExists(testFile); expect(fileExists).toBeTrue(); const fileStats = await plugins.fs.stat(testFile); tools.log(`File size: ${(fileStats.size / 1024).toFixed(1)}KB`); // Attempt PDF extraction try { const extractionResult = await invoice.fromFile(testFile); if (extractionResult) { tools.log('✓ ZUGFeRD v2 XML extraction successful'); // Verify extracted content const extractedXml = await invoice.toXmlString(); expect(extractedXml).toBeTruthy(); expect(extractedXml.length).toBeGreaterThan(100); // Check for ZUGFeRD v2/Factur-X characteristics const hasZugferdV2Markers = extractedXml.includes('urn:cen.eu:en16931:2017') || extractedXml.includes('CrossIndustryInvoice') || extractedXml.includes('urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100') || extractedXml.includes('zugferd') || extractedXml.includes('factur-x'); if (hasZugferdV2Markers) { tools.log('✓ ZUGFeRD v2/Factur-X format markers detected'); } else { tools.log('⚠ ZUGFeRD v2/Factur-X format markers not clearly detected'); } // Test validation of extracted content try { const validationResult = await invoice.validate(); if (validationResult.valid) { tools.log('✓ Extracted ZUGFeRD v2 content passes validation'); } else { tools.log(`⚠ Validation issues: ${validationResult.errors?.length || 0} errors`); if (validationResult.errors && validationResult.errors.length > 0) { tools.log(` First error: ${validationResult.errors[0].message}`); } } } catch (validationError) { tools.log(`⚠ Validation failed: ${validationError.message}`); } } else { tools.log('⚠ ZUGFeRD v2 extraction returned no result'); } } catch (extractionError) { tools.log(`⚠ ZUGFeRD v2 extraction failed: ${extractionError.message}`); } } catch (error) { tools.log(`ZUGFeRD v2 basic extraction test failed: ${error.message}`); } const duration = Date.now() - startTime; PerformanceTracker.recordMetric('pdf-facturx-basic-extraction', duration); }); tap.test('PDF-03: Factur-X Extraction - Factur-X Specific Testing', async (tools) => { const startTime = Date.now(); try { // Look for Factur-X specific files in corpus const facturxFiles = await CorpusLoader.getFiles('ZUGFERD_V2'); // Filter for files that might be Factur-X specific const potentialFacturxFiles = facturxFiles.filter(file => plugins.path.basename(file).toLowerCase().includes('factur') || plugins.path.basename(file).toLowerCase().includes('france') || plugins.path.basename(file).toLowerCase().includes('fr') ); if (potentialFacturxFiles.length === 0) { tools.log('⚠ No specific Factur-X files identified, testing with ZUGFeRD v2 files'); // Use first few ZUGFeRD v2 files as they should be compatible potentialFacturxFiles.push(...facturxFiles.slice(0, 2)); } tools.log(`Testing Factur-X specific features with ${potentialFacturxFiles.length} files`); let facturxProcessed = 0; let facturxSuccessful = 0; for (const filePath of potentialFacturxFiles) { const fileName = plugins.path.basename(filePath); try { facturxProcessed++; const invoice = new EInvoice(); const extractionResult = await invoice.fromFile(filePath); if (extractionResult) { facturxSuccessful++; const xmlContent = await invoice.toXmlString(); // Look for Factur-X specific characteristics const facturxChecks = { hasEN16931Context: xmlContent.includes('urn:cen.eu:en16931:2017'), hasCIINamespace: xmlContent.includes('urn:un:unece:uncefact:data:standard:CrossIndustryInvoice'), hasFacturxGuideline: xmlContent.includes('factur-x') || xmlContent.includes('FACTUR-X'), hasExchangedDocument: xmlContent.includes('ExchangedDocument'), hasSupplyChainTrade: xmlContent.includes('SupplyChainTradeTransaction') }; tools.log(`${fileName} Factur-X characteristics:`); tools.log(` EN16931 Context: ${facturxChecks.hasEN16931Context}`); tools.log(` CII Namespace: ${facturxChecks.hasCIINamespace}`); tools.log(` Factur-X Guideline: ${facturxChecks.hasFacturxGuideline}`); tools.log(` ExchangedDocument: ${facturxChecks.hasExchangedDocument}`); tools.log(` SupplyChainTrade: ${facturxChecks.hasSupplyChainTrade}`); // Basic Factur-X structure validation if (facturxChecks.hasEN16931Context && facturxChecks.hasCIINamespace) { tools.log(` ✓ Valid Factur-X/ZUGFeRD v2 structure detected`); } } else { tools.log(`⚠ ${fileName}: No XML content extracted`); } } catch (error) { tools.log(`✗ ${fileName}: Extraction failed - ${error.message}`); } } const facturxSuccessRate = facturxProcessed > 0 ? (facturxSuccessful / facturxProcessed) * 100 : 0; tools.log(`\nFactur-X Processing Summary:`); tools.log(`- Files processed: ${facturxProcessed}`); tools.log(`- Successful extractions: ${facturxSuccessful} (${facturxSuccessRate.toFixed(1)}%)`); if (facturxProcessed > 0) { expect(facturxSuccessRate).toBeGreaterThan(0); } } catch (error) { tools.log(`Factur-X specific testing failed: ${error.message}`); } const duration = Date.now() - startTime; PerformanceTracker.recordMetric('pdf-facturx-specific-testing', duration); }); tap.test('PDF-03: Factur-X Extraction - Corpus Performance Analysis', { timeout: testTimeout }, async (tools) => { const startTime = Date.now(); let totalProcessed = 0; let totalSuccessful = 0; let totalExtractionTime = 0; const fileSizePerformance = []; try { const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2'); tools.log(`Processing ${zugferdV2Files.length} ZUGFeRD v2/Factur-X files for performance analysis`); if (zugferdV2Files.length === 0) { tools.log('⚠ No ZUGFeRD v2/Factur-X files found in corpus'); return; } // Process subset for performance analysis const filesToProcess = zugferdV2Files.slice(0, Math.min(10, zugferdV2Files.length)); for (const filePath of filesToProcess) { const fileName = plugins.path.basename(filePath); const fileExtractionStart = Date.now(); try { totalProcessed++; // Get file size for performance correlation const fileStats = await plugins.fs.stat(filePath); const fileSizeKB = fileStats.size / 1024; const invoice = new EInvoice(); const extractionResult = await invoice.fromFile(filePath); const fileExtractionTime = Date.now() - fileExtractionStart; totalExtractionTime += fileExtractionTime; if (extractionResult) { totalSuccessful++; // Record size vs performance data fileSizePerformance.push({ fileName, sizeKB: fileSizeKB, extractionTimeMs: fileExtractionTime, timePerKB: fileExtractionTime / fileSizeKB }); tools.log(`✓ ${fileName}: ${fileSizeKB.toFixed(1)}KB → ${fileExtractionTime}ms (${(fileExtractionTime/fileSizeKB).toFixed(2)}ms/KB)`); // Quick content verification const xmlContent = await invoice.toXmlString(); if (xmlContent.length < 100) { tools.log(` ⚠ Suspiciously short XML content: ${xmlContent.length} chars`); } } else { tools.log(`⚠ ${fileName}: Extraction failed (${fileSizeKB.toFixed(1)}KB, ${fileExtractionTime}ms)`); } } catch (error) { const fileExtractionTime = Date.now() - fileExtractionStart; totalExtractionTime += fileExtractionTime; tools.log(`✗ ${fileName}: Error after ${fileExtractionTime}ms - ${error.message}`); } } // Performance analysis const successRate = totalProcessed > 0 ? (totalSuccessful / totalProcessed) * 100 : 0; const averageExtractionTime = totalProcessed > 0 ? totalExtractionTime / totalProcessed : 0; tools.log(`\nZUGFeRD v2/Factur-X Performance Analysis:`); tools.log(`- Files processed: ${totalProcessed}`); tools.log(`- Success rate: ${successRate.toFixed(1)}%`); tools.log(`- Average extraction time: ${averageExtractionTime.toFixed(1)}ms`); if (fileSizePerformance.length > 0) { const avgTimePerKB = fileSizePerformance.reduce((sum, item) => sum + item.timePerKB, 0) / fileSizePerformance.length; const avgFileSize = fileSizePerformance.reduce((sum, item) => sum + item.sizeKB, 0) / fileSizePerformance.length; tools.log(`- Average file size: ${avgFileSize.toFixed(1)}KB`); tools.log(`- Average time per KB: ${avgTimePerKB.toFixed(2)}ms/KB`); // Find performance outliers const sortedByTime = [...fileSizePerformance].sort((a, b) => b.extractionTimeMs - a.extractionTimeMs); if (sortedByTime.length > 0) { tools.log(`- Slowest file: ${sortedByTime[0].fileName} (${sortedByTime[0].extractionTimeMs}ms)`); tools.log(`- Fastest file: ${sortedByTime[sortedByTime.length-1].fileName} (${sortedByTime[sortedByTime.length-1].extractionTimeMs}ms)`); } // Performance expectations expect(avgTimePerKB).toBeLessThan(50); // 50ms per KB max expect(averageExtractionTime).toBeLessThan(3000); // 3 seconds max average } // Success rate expectations if (totalProcessed > 0) { expect(successRate).toBeGreaterThan(0); // At least one should work } } catch (error) { tools.log(`Corpus performance analysis failed: ${error.message}`); throw error; } const totalDuration = Date.now() - startTime; PerformanceTracker.recordMetric('pdf-facturx-corpus-performance', totalDuration); tools.log(`Performance analysis completed in ${totalDuration}ms`); }); tap.test('PDF-03: Factur-X Extraction - Profile Detection', async (tools) => { const startTime = Date.now(); try { const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2'); if (zugferdV2Files.length === 0) { tools.log('⚠ No ZUGFeRD v2/Factur-X files found for profile detection'); return; } // Test profile detection with a sample of files const sampleFiles = zugferdV2Files.slice(0, 3); const profileStats = { 'MINIMUM': 0, 'BASIC': 0, 'COMFORT': 0, 'EXTENDED': 0, 'FACTUR-X': 0, 'UNKNOWN': 0 }; tools.log(`Testing profile detection with ${sampleFiles.length} files`); for (const filePath of sampleFiles) { const fileName = plugins.path.basename(filePath); try { const invoice = new EInvoice(); const extractionResult = await invoice.fromFile(filePath); if (extractionResult) { const xmlContent = await invoice.toXmlString(); // Detect ZUGFeRD/Factur-X profile from XML content let detectedProfile = 'UNKNOWN'; if (xmlContent.includes('urn:cen.eu:en16931:2017#compliant#urn:zugferd.de:2p1:minimum')) { detectedProfile = 'MINIMUM'; } else if (xmlContent.includes('urn:cen.eu:en16931:2017#compliant#urn:zugferd.de:2p1:basic')) { detectedProfile = 'BASIC'; } else if (xmlContent.includes('urn:cen.eu:en16931:2017#compliant#urn:zugferd.de:2p1:comfort')) { detectedProfile = 'COMFORT'; } else if (xmlContent.includes('urn:cen.eu:en16931:2017#compliant#urn:zugferd.de:2p1:extended')) { detectedProfile = 'EXTENDED'; } else if (xmlContent.includes('urn:cen.eu:en16931:2017#conformant#urn:factur-x.eu:1p0:')) { detectedProfile = 'FACTUR-X'; } else if (xmlContent.includes('urn:cen.eu:en16931:2017')) { detectedProfile = 'EN16931'; // Generic EN16931 compliance } profileStats[detectedProfile] = (profileStats[detectedProfile] || 0) + 1; tools.log(`${fileName}: Profile detected - ${detectedProfile}`); // Additional profile-specific checks if (detectedProfile !== 'UNKNOWN') { const hasMinimumFields = xmlContent.includes('ExchangedDocument') && xmlContent.includes('SupplyChainTradeTransaction'); const hasComfortFields = xmlContent.includes('ApplicableHeaderTradeAgreement') && xmlContent.includes('ApplicableHeaderTradeDelivery'); const hasExtendedFields = xmlContent.includes('IncludedSupplyChainTradeLineItem'); tools.log(` Minimum fields: ${hasMinimumFields}`); tools.log(` Comfort fields: ${hasComfortFields}`); tools.log(` Extended fields: ${hasExtendedFields}`); } } else { tools.log(`⚠ ${fileName}: No content for profile detection`); } } catch (error) { tools.log(`✗ ${fileName}: Profile detection failed - ${error.message}`); } } tools.log(`\nProfile Detection Summary:`); for (const [profile, count] of Object.entries(profileStats)) { if (count > 0) { tools.log(`- ${profile}: ${count} files`); } } } catch (error) { tools.log(`Profile detection failed: ${error.message}`); } const duration = Date.now() - startTime; PerformanceTracker.recordMetric('pdf-facturx-profile-detection', duration); }); tap.test('PDF-03: Factur-X Extraction - Error Recovery', async (tools) => { const startTime = Date.now(); // Test error recovery with problematic PDF files const errorTestCases = [ { name: 'Non-PDF file with PDF extension', createFile: async () => { const tempPath = plugins.path.join(process.cwd(), '.nogit', 'temp-fake.pdf'); await plugins.fs.ensureDir(plugins.path.dirname(tempPath)); await plugins.fs.writeFile(tempPath, 'This is not a PDF file'); return tempPath; }, expectedError: true }, { name: 'Empty PDF file', createFile: async () => { const tempPath = plugins.path.join(process.cwd(), '.nogit', 'temp-empty.pdf'); await plugins.fs.ensureDir(plugins.path.dirname(tempPath)); await plugins.fs.writeFile(tempPath, ''); return tempPath; }, expectedError: true }, { name: 'PDF header only', createFile: async () => { const tempPath = plugins.path.join(process.cwd(), '.nogit', 'temp-header-only.pdf'); await plugins.fs.ensureDir(plugins.path.dirname(tempPath)); await plugins.fs.writeFile(tempPath, '%PDF-1.4\n'); return tempPath; }, expectedError: true } ]; for (const testCase of errorTestCases) { tools.log(`Testing error recovery: ${testCase.name}`); let tempFilePath = null; try { if (testCase.createFile) { tempFilePath = await testCase.createFile(); const invoice = new EInvoice(); const result = await invoice.fromFile(tempFilePath); if (testCase.expectedError) { if (result) { tools.log(`⚠ Expected error for ${testCase.name} but extraction succeeded`); } else { tools.log(`✓ ${testCase.name}: Gracefully handled (no result)`); } } else { tools.log(`✓ ${testCase.name}: Operation succeeded as expected`); } } } catch (error) { if (testCase.expectedError) { tools.log(`✓ ${testCase.name}: Expected error caught - ${error.message}`); expect(error.message).toBeTruthy(); } else { tools.log(`✗ ${testCase.name}: Unexpected error - ${error.message}`); throw error; } } finally { // Clean up temp file if (tempFilePath) { try { await plugins.fs.remove(tempFilePath); } catch (cleanupError) { tools.log(`Warning: Failed to clean up ${tempFilePath}`); } } } } const duration = Date.now() - startTime; PerformanceTracker.recordMetric('pdf-facturx-error-recovery', duration); }); tap.test('PDF-03: Performance Summary', async (tools) => { const operations = [ 'pdf-facturx-basic-extraction', 'pdf-facturx-specific-testing', 'pdf-facturx-corpus-performance', 'pdf-facturx-profile-detection', 'pdf-facturx-error-recovery' ]; tools.log(`\n=== ZUGFeRD v2/Factur-X Extraction Performance Summary ===`); for (const operation of operations) { const summary = await PerformanceTracker.getSummary(operation); if (summary) { tools.log(`${operation}:`); tools.log(` avg=${summary.average}ms, min=${summary.min}ms, max=${summary.max}ms, p95=${summary.p95}ms`); } } tools.log(`\nZUGFeRD v2/Factur-X extraction testing completed.`); });