import { tap, expect } from '@git.zone/tstest/tapbundle'; import { EInvoice } from '../../../ts/index.js'; import { CorpusLoader } from '../../helpers/corpus.loader.js'; import { PerformanceTracker } from '../../helpers/performance.tracker.js'; import { promises as fs } from 'fs'; import * as path from 'path'; tap.test('PDF-03: Factur-X Extraction - should extract and validate Factur-X PDFs', async () => { // Get ZUGFeRD v2/Factur-X PDF files from corpus const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2_CORRECT'); const pdfFiles = zugferdV2Files.filter(f => f.endsWith('.pdf')); console.log(`Testing Factur-X extraction from ${pdfFiles.length} PDFs`); let successCount = 0; let facturxDetectedCount = 0; for (const filePath of pdfFiles.slice(0, 10)) { // Test first 10 for performance const fileName = path.basename(filePath); try { const pdfBuffer = await fs.readFile(filePath); const { result: invoice, metric } = await PerformanceTracker.track( 'facturx-extraction', async () => { return await EInvoice.fromPdf(pdfBuffer); }, { file: fileName } ); expect(invoice).toBeTruthy(); const xml = invoice.getXml(); expect(xml).toBeTruthy(); expect(xml.length).toBeGreaterThan(100); // Check for Factur-X/ZUGFeRD v2 specific markers const isFacturX = xml.includes('urn:cen.eu:en16931:2017') || xml.includes('factur-x') || xml.includes('CrossIndustryInvoice') || xml.includes('urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100'); if (isFacturX) { facturxDetectedCount++; console.log(`✓ ${fileName}: Factur-X detected and extracted (${metric.duration.toFixed(2)}ms)`); } else { console.log(`✓ ${fileName}: Extracted but format unclear (${metric.duration.toFixed(2)}ms)`); } successCount++; } catch (error) { console.log(`✗ ${fileName}: ${error.message}`); } } console.log(`\nFactur-X Extraction Summary:`); console.log(` Total processed: ${Math.min(10, pdfFiles.length)}`); console.log(` Successful extractions: ${successCount}`); console.log(` Factur-X format detected: ${facturxDetectedCount}`); // We expect most Factur-X files to be successfully extracted expect(successCount).toBeGreaterThan(0); }); tap.test('PDF-03: Factur-X Format Validation - should validate Factur-X specific elements', async () => { // Get one Factur-X file for detailed validation const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2_CORRECT'); const pdfFiles = zugferdV2Files.filter(f => f.endsWith('.pdf')); if (pdfFiles.length === 0) { console.log('No Factur-X PDFs found, skipping validation test'); return; } const testFile = pdfFiles[0]; const fileName = path.basename(testFile); console.log(`Validating Factur-X format with: ${fileName}`); const pdfBuffer = await fs.readFile(testFile); const invoice = await EInvoice.fromPdf(pdfBuffer); expect(invoice).toBeTruthy(); const xml = invoice.getXml(); expect(xml).toBeTruthy(); // Factur-X specific validations console.log('Checking Factur-X format characteristics:'); // Should contain EN16931 namespace const hasEN16931Namespace = xml.includes('urn:cen.eu:en16931:2017'); console.log(` EN16931 namespace: ${hasEN16931Namespace ? '✓' : '✗'}`); // Should contain CrossIndustryInvoice root element (ZUGFeRD v2/Factur-X) const hasCrossIndustryInvoice = xml.includes(''); console.log(` Invoice ID element: ${hasInvoiceId ? '✓' : '✗'}`); const hasIssueDate = xml.includes(''); console.log(` Issue date element: ${hasIssueDate ? '✓' : '✗'}`); // Check for profile specification const hasProfileSpec = xml.includes('GuidelineSpecifiedDocumentContextParameter'); console.log(` Profile specification: ${hasProfileSpec ? '✓' : '✗'}`); // Check format detection const detectedFormat = invoice.getFormat(); console.log(` Detected format: ${detectedFormat}`); // Basic validation - should have CrossIndustryInvoice for v2/Factur-X expect(hasCrossIndustryInvoice).toBeTruthy(); expect(hasInvoiceId).toBeTruthy(); }); tap.test('PDF-03: Factur-X Profile Detection - should detect different Factur-X profiles', async () => { const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2_CORRECT'); const pdfFiles = zugferdV2Files.filter(f => f.endsWith('.pdf')); if (pdfFiles.length === 0) { console.log('No Factur-X PDFs found, skipping profile detection test'); return; } console.log(`Testing profile detection with ${Math.min(5, pdfFiles.length)} Factur-X PDFs`); const profileCounts = new Map(); for (const filePath of pdfFiles.slice(0, 5)) { const fileName = path.basename(filePath); try { const pdfBuffer = await fs.readFile(filePath); const invoice = await EInvoice.fromPdf(pdfBuffer); const xml = invoice.getXml(); // Detect profile from XML content let profile = 'UNKNOWN'; if (xml.includes('basic')) { profile = 'BASIC'; } else if (xml.includes('comfort')) { profile = 'COMFORT'; } else if (xml.includes('extended')) { profile = 'EXTENDED'; } else if (xml.includes('minimum')) { profile = 'MINIMUM'; } else if (xml.includes('en16931')) { profile = 'EN16931'; } profileCounts.set(profile, (profileCounts.get(profile) || 0) + 1); console.log(` ${fileName}: Profile ${profile}`); } catch (error) { console.log(` ${fileName}: Error - ${error.message}`); } } console.log(`\nProfile Distribution:`); for (const [profile, count] of profileCounts) { console.log(` ${profile}: ${count} files`); } // Should have detected at least one profile expect(profileCounts.size).toBeGreaterThan(0); }); tap.test('PDF-03: Factur-X Performance - should extract Factur-X PDFs efficiently', async () => { const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2_CORRECT'); const pdfFiles = zugferdV2Files.filter(f => f.endsWith('.pdf')); if (pdfFiles.length === 0) { console.log('No Factur-X PDFs found, skipping performance test'); return; } console.log(`Testing extraction performance with ${Math.min(5, pdfFiles.length)} Factur-X PDFs`); const durations: number[] = []; for (const filePath of pdfFiles.slice(0, 5)) { const fileName = path.basename(filePath); const pdfBuffer = await fs.readFile(filePath); const { metric } = await PerformanceTracker.track( 'facturx-performance', async () => { return await EInvoice.fromPdf(pdfBuffer); }, { file: fileName } ); durations.push(metric.duration); console.log(` ${fileName}: ${metric.duration.toFixed(2)}ms`); } const avgDuration = durations.reduce((a, b) => a + b, 0) / durations.length; const maxDuration = Math.max(...durations); console.log(`\nPerformance Summary:`); console.log(` Average: ${avgDuration.toFixed(2)}ms`); console.log(` Maximum: ${maxDuration.toFixed(2)}ms`); // Performance expectation - should complete within reasonable time expect(avgDuration).toBeLessThan(1000); // Less than 1 second on average expect(maxDuration).toBeLessThan(5000); // No single extraction over 5 seconds }); tap.start();