import { tap, expect } from '@git.zone/tstest/tapbundle'; import { EInvoice } from '../../../ts/index.js'; import { CorpusLoader } from '../../helpers/corpus.loader.js'; import { PerformanceTracker } from '../../helpers/performance.tracker.js'; import { promises as fs } from 'fs'; import * as path from 'path'; tap.test('PDF-02: ZUGFeRD v1 Extraction - should extract and validate ZUGFeRD v1 PDFs', async () => { // Get ZUGFeRD v1 PDF files from corpus const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT'); const pdfFiles = zugferdV1Files.filter(f => f.endsWith('.pdf')); console.log(`Testing ZUGFeRD v1 extraction from ${pdfFiles.length} PDFs`); let successCount = 0; let v1DetectedCount = 0; for (const filePath of pdfFiles.slice(0, 10)) { // Test first 10 for performance const fileName = path.basename(filePath); try { const pdfBuffer = await fs.readFile(filePath); const { result: invoice, metric } = await PerformanceTracker.track( 'zugferd-v1-extraction', async () => { return await EInvoice.fromPdf(pdfBuffer); }, { file: fileName } ); expect(invoice).toBeTruthy(); const xml = invoice.getXml(); expect(xml).toBeTruthy(); expect(xml.length).toBeGreaterThan(100); // Check for ZUGFeRD v1 specific markers const isZugferdV1 = xml.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') || xml.includes('CrossIndustryDocument') || (xml.includes('ZUGFeRD') && !xml.includes('CrossIndustryInvoice')); if (isZugferdV1) { v1DetectedCount++; console.log(`✓ ${fileName}: ZUGFeRD v1 detected and extracted (${metric.duration.toFixed(2)}ms)`); } else { console.log(`✓ ${fileName}: Extracted but not ZUGFeRD v1 format (${metric.duration.toFixed(2)}ms)`); } successCount++; } catch (error) { console.log(`✗ ${fileName}: ${error.message}`); } } console.log(`\nZUGFeRD v1 Extraction Summary:`); console.log(` Total processed: ${Math.min(10, pdfFiles.length)}`); console.log(` Successful extractions: ${successCount}`); console.log(` ZUGFeRD v1 format detected: ${v1DetectedCount}`); // We expect most ZUGFeRD v1 files to be successfully extracted expect(successCount).toBeGreaterThan(0); }); tap.test('PDF-02: ZUGFeRD v1 Format Validation - should validate v1 specific elements', async () => { // Get one ZUGFeRD v1 file for detailed validation const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT'); const pdfFiles = zugferdV1Files.filter(f => f.endsWith('.pdf')); if (pdfFiles.length === 0) { console.log('No ZUGFeRD v1 PDFs found, skipping validation test'); return; } const testFile = pdfFiles[0]; const fileName = path.basename(testFile); console.log(`Validating ZUGFeRD v1 format with: ${fileName}`); const pdfBuffer = await fs.readFile(testFile); const invoice = await EInvoice.fromPdf(pdfBuffer); expect(invoice).toBeTruthy(); const xml = invoice.getXml(); expect(xml).toBeTruthy(); // ZUGFeRD v1 specific validations console.log('Checking ZUGFeRD v1 format characteristics:'); // Should contain ZUGFeRD v1 namespace const hasV1Namespace = xml.includes('urn:ferd:CrossIndustryDocument:invoice:1p0'); console.log(` ZUGFeRD v1 namespace: ${hasV1Namespace ? '✓' : '✗'}`); // Should contain CrossIndustryDocument root element const hasCrossIndustryDocument = xml.includes(''); console.log(` Invoice ID element: ${hasInvoiceId ? '✓' : '✗'}`); const hasIssueDate = xml.includes(''); console.log(` Issue date element: ${hasIssueDate ? '✓' : '✗'}`); // Check format detection const detectedFormat = invoice.getFormat(); console.log(` Detected format: ${detectedFormat}`); // Basic validation - at least some ZUGFeRD v1 characteristics should be present expect(hasCrossIndustryDocument || hasV1Namespace).toBeTruthy(); expect(hasInvoiceId).toBeTruthy(); }); tap.test('PDF-02: ZUGFeRD v1 Performance - should extract v1 PDFs efficiently', async () => { const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT'); const pdfFiles = zugferdV1Files.filter(f => f.endsWith('.pdf')); if (pdfFiles.length === 0) { console.log('No ZUGFeRD v1 PDFs found, skipping performance test'); return; } console.log(`Testing extraction performance with ${Math.min(5, pdfFiles.length)} ZUGFeRD v1 PDFs`); const durations: number[] = []; for (const filePath of pdfFiles.slice(0, 5)) { const fileName = path.basename(filePath); const pdfBuffer = await fs.readFile(filePath); const { metric } = await PerformanceTracker.track( 'zugferd-v1-performance', async () => { return await EInvoice.fromPdf(pdfBuffer); }, { file: fileName } ); durations.push(metric.duration); console.log(` ${fileName}: ${metric.duration.toFixed(2)}ms`); } const avgDuration = durations.reduce((a, b) => a + b, 0) / durations.length; const maxDuration = Math.max(...durations); console.log(`\nPerformance Summary:`); console.log(` Average: ${avgDuration.toFixed(2)}ms`); console.log(` Maximum: ${maxDuration.toFixed(2)}ms`); // Performance expectation - should complete within reasonable time expect(avgDuration).toBeLessThan(1000); // Less than 1 second on average expect(maxDuration).toBeLessThan(5000); // No single extraction over 5 seconds }); tap.start();