update

2025-05-28 08:40:26 +00:00
parent e4c762658d
commit 32f8bc192a
24 changed files with 3350 additions and 5416 deletions
--- a/test/suite/einvoice_pdf-operations/test.pdf-02.zugferd-v1-extraction.ts
+++ b/test/suite/einvoice_pdf-operations/test.pdf-02.zugferd-v1-extraction.ts
@ -1,357 +1,157 @@
 import { tap, expect } from '@git.zone/tstest/tapbundle';
-import * as plugins from '../../../ts/plugins.ts';
-import { EInvoice } from '../../../ts/classes.xinvoice.ts';
-import { CorpusLoader } from '../../helpers/corpus.loader.ts';
-import { PerformanceTracker } from '../../helpers/performance.tracker.ts';
+import { EInvoice } from '../../../ts/index.js';
+import { CorpusLoader } from '../../helpers/corpus.loader.js';
+import { PerformanceTracker } from '../../helpers/performance.tracker.js';
+import { promises as fs } from 'fs';
+import * as path from 'path';

-const testTimeout = 300000; // 5 minutes timeout for PDF processing
-
-// PDF-02: ZUGFeRD v1 Extraction
-// Tests XML extraction from ZUGFeRD v1 PDFs with specific format validation
-// and compatibility checks for legacy ZUGFeRD implementations
-
-tap.test('PDF-02: ZUGFeRD v1 Extraction - Basic Extraction', async (tools) => {
-  const startTime = Date.now();
+tap.test('PDF-02: ZUGFeRD v1 Extraction - should extract and validate ZUGFeRD v1 PDFs', async () => {
+  // Get ZUGFeRD v1 PDF files from corpus
+  const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT');
+  const pdfFiles = zugferdV1Files.filter(f => f.endsWith('.pdf'));
  
-  // Test basic ZUGFeRD v1 extraction functionality
-  try {
-    const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1');
-    
-    if (zugferdV1Files.length === 0) {
-      tools.log('⚠ No ZUGFeRD v1 files found in corpus, skipping basic extraction test');
-      return;
-    }
-    
-    const testFile = zugferdV1Files[0];
-    tools.log(`Testing ZUGFeRD v1 extraction with: ${plugins.path.basename(testFile)}`);
-    
-    const invoice = new EInvoice();
-    
-    // Check if file exists and is readable
-    const fileExists = await plugins.fs.pathExists(testFile);
-    expect(fileExists).toBeTrue();
-    
-    const fileStats = await plugins.fs.stat(testFile);
-    tools.log(`File size: ${(fileStats.size / 1024).toFixed(1)}KB`);
-    
-    // Attempt PDF extraction
-    let extractionResult;
-    try {
-      extractionResult = await invoice.fromFile(testFile);
-      
-      if (extractionResult) {
-        tools.log('✓ ZUGFeRD v1 XML extraction successful');
-        
-        // Verify extracted content contains ZUGFeRD v1 characteristics
-        const extractedXml = await invoice.toXmlString();
-        expect(extractedXml).toBeTruthy();
-        expect(extractedXml.length).toBeGreaterThan(100);
-        
-        // Check for ZUGFeRD v1 namespace or characteristics
-        const hasZugferdV1Markers = extractedXml.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') ||
-                                   extractedXml.includes('ZUGFeRD') ||
-                                   extractedXml.includes('FERD');
-        
-        if (hasZugferdV1Markers) {
-          tools.log('✓ ZUGFeRD v1 format markers detected in extracted XML');
-        } else {
-          tools.log('⚠ ZUGFeRD v1 format markers not clearly detected');
-        }
-        
-        // Test basic validation of extracted content
-        try {
-          const validationResult = await invoice.validate();
-          if (validationResult.valid) {
-            tools.log('✓ Extracted ZUGFeRD v1 content passes validation');
-          } else {
-            tools.log(`⚠ Validation issues found: ${validationResult.errors?.length || 0} errors`);
-          }
-        } catch (validationError) {
-          tools.log(`⚠ Validation failed: ${validationError.message}`);
-        }
-        
-      } else {
-        tools.log('⚠ ZUGFeRD v1 extraction returned no result');
-      }
-      
-    } catch (extractionError) {
-      tools.log(`⚠ ZUGFeRD v1 extraction failed: ${extractionError.message}`);
-      // This might be expected if PDF extraction is not fully implemented
-    }
-    
-  } catch (error) {
-    tools.log(`ZUGFeRD v1 basic extraction test failed: ${error.message}`);
-  }
+  console.log(`Testing ZUGFeRD v1 extraction from ${pdfFiles.length} PDFs`);
  
-  const duration = Date.now() - startTime;
-  PerformanceTracker.recordMetric('pdf-zugferd-v1-basic-extraction', duration);
-});
-
-tap.test('PDF-02: ZUGFeRD v1 Extraction - Corpus Processing', { timeout: testTimeout }, async (tools) => {
-  const startTime = Date.now();
+  let successCount = 0;
+  let v1DetectedCount = 0;
  
-  let processedFiles = 0;
-  let successfulExtractions = 0;
-  let extractionErrors = 0;
-  let totalExtractionTime = 0;
-  
-  try {
-    const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1');
-    tools.log(`Processing ${zugferdV1Files.length} ZUGFeRD v1 files`);
-    
-    if (zugferdV1Files.length === 0) {
-      tools.log('⚠ No ZUGFeRD v1 files found in corpus');
-      return;
-    }
-    
-    for (const filePath of zugferdV1Files) {
-      const fileName = plugins.path.basename(filePath);
-      const fileExtractionStart = Date.now();
-      
-      try {
-        processedFiles++;
-        
-        // Check file accessibility
-        const fileExists = await plugins.fs.pathExists(filePath);
-        if (!fileExists) {
-          tools.log(`⚠ File not found: ${fileName}`);
-          continue;
-        }
-        
-        const fileStats = await plugins.fs.stat(filePath);
-        const fileSizeKB = fileStats.size / 1024;
-        
-        // Attempt extraction
-        const invoice = new EInvoice();
-        const extractionResult = await invoice.fromFile(filePath);
-        
-        const fileExtractionTime = Date.now() - fileExtractionStart;
-        totalExtractionTime += fileExtractionTime;
-        
-        if (extractionResult) {
-          successfulExtractions++;
-          
-          tools.log(`✓ ${fileName}: Extracted (${fileSizeKB.toFixed(1)}KB, ${fileExtractionTime}ms)`);
-          
-          // Quick validation of extracted content
-          try {
-            const xmlContent = await invoice.toXmlString();
-            if (xmlContent && xmlContent.length > 50) {
-              tools.log(`  Content length: ${xmlContent.length} chars`);
-            }
-          } catch (contentError) {
-            tools.log(`  ⚠ Content extraction error: ${contentError.message}`);
-          }
-          
-        } else {
-          extractionErrors++;
-          tools.log(`⚠ ${fileName}: No XML content extracted`);
-        }
-        
-      } catch (error) {
-        extractionErrors++;
-        const fileExtractionTime = Date.now() - fileExtractionStart;
-        totalExtractionTime += fileExtractionTime;
-        
-        tools.log(`✗ ${fileName}: Extraction failed - ${error.message}`);
-      }
-    }
-    
-    // Calculate statistics
-    const successRate = processedFiles > 0 ? (successfulExtractions / processedFiles) * 100 : 0;
-    const averageExtractionTime = processedFiles > 0 ? totalExtractionTime / processedFiles : 0;
-    
-    tools.log(`\nZUGFeRD v1 Extraction Summary:`);
-    tools.log(`- Files processed: ${processedFiles}`);
-    tools.log(`- Successful extractions: ${successfulExtractions} (${successRate.toFixed(1)}%)`);
-    tools.log(`- Extraction errors: ${extractionErrors}`);
-    tools.log(`- Average extraction time: ${averageExtractionTime.toFixed(1)}ms`);
-    
-    // Performance expectations
-    if (processedFiles > 0) {
-      expect(averageExtractionTime).toBeLessThan(5000); // 5 seconds max per file
-    }
-    
-    // We expect at least some extractions to work, but don't require 100% success
-    // as some files might be corrupted or use unsupported PDF features
-    if (processedFiles > 0) {
-      expect(successRate).toBeGreaterThan(0); // At least one file should work
-    }
-    
-  } catch (error) {
-    tools.log(`ZUGFeRD v1 corpus processing failed: ${error.message}`);
-    throw error;
-  }
-  
-  const totalDuration = Date.now() - startTime;
-  PerformanceTracker.recordMetric('pdf-zugferd-v1-corpus-extraction', totalDuration);
-  
-  tools.log(`ZUGFeRD v1 corpus processing completed in ${totalDuration}ms`);
-});
-
-tap.test('PDF-02: ZUGFeRD v1 Extraction - Format Validation', async (tools) => {
-  const startTime = Date.now();
-  
-  try {
-    const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1');
-    
-    if (zugferdV1Files.length === 0) {
-      tools.log('⚠ No ZUGFeRD v1 files found for format validation');
-      return;
-    }
-    
-    // Test with first available file for detailed format validation
-    const testFile = zugferdV1Files[0];
-    const fileName = plugins.path.basename(testFile);
-    
-    tools.log(`Testing ZUGFeRD v1 format validation with: ${fileName}`);
-    
-    const invoice = new EInvoice();
+  for (const filePath of pdfFiles.slice(0, 10)) { // Test first 10 for performance
+    const fileName = path.basename(filePath);
    
    try {
-      const extractionResult = await invoice.fromFile(testFile);
+      const pdfBuffer = await fs.readFile(filePath);
      
-      if (extractionResult) {
-        const xmlContent = await invoice.toXmlString();
-        
-        // ZUGFeRD v1 specific format checks
-        const formatChecks = {
-          hasXmlDeclaration: xmlContent.startsWith('<?xml'),
-          hasZugferdNamespace: xmlContent.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') ||
-                              xmlContent.includes('ZUGFeRD') ||
-                              xmlContent.includes('FERD'),
-          hasInvoiceElements: xmlContent.includes('<Invoice') || 
-                             xmlContent.includes('<CrossIndustryDocument') ||
-                             xmlContent.includes('<invoice'),
-          isWellFormed: true // Assume true if we got this far
-        };
-        
-        tools.log(`ZUGFeRD v1 Format Validation Results:`);
-        tools.log(`- Has XML Declaration: ${formatChecks.hasXmlDeclaration}`);
-        tools.log(`- Has ZUGFeRD Namespace: ${formatChecks.hasZugferdNamespace}`);
-        tools.log(`- Has Invoice Elements: ${formatChecks.hasInvoiceElements}`);
-        tools.log(`- Is Well-Formed: ${formatChecks.isWellFormed}`);
-        
-        // Basic format expectations
-        expect(formatChecks.hasXmlDeclaration).toBeTrue();
-        expect(formatChecks.isWellFormed).toBeTrue();
-        
-        if (formatChecks.hasZugferdNamespace && formatChecks.hasInvoiceElements) {
-          tools.log('✓ ZUGFeRD v1 format validation passed');
-        } else {
-          tools.log('⚠ ZUGFeRD v1 format markers not fully detected');
-        }
-        
-        // Test format detection if available
-        if (typeof invoice.detectFormat === 'function') {
-          try {
-            const detectedFormat = await invoice.detectFormat(xmlContent);
-            tools.log(`Detected format: ${detectedFormat}`);
-            
-            if (detectedFormat.toLowerCase().includes('zugferd') || 
-                detectedFormat.toLowerCase().includes('cii')) {
-              tools.log('✓ Format detection correctly identified ZUGFeRD/CII');
-            }
-          } catch (detectionError) {
-            tools.log(`Format detection error: ${detectionError.message}`);
-          }
-        }
-        
+      const { result: invoice, metric } = await PerformanceTracker.track(
+        'zugferd-v1-extraction',
+        async () => {
+          return await EInvoice.fromPdf(pdfBuffer);
+        },
+        { file: fileName }
+      );
+      
+      expect(invoice).toBeTruthy();
+      const xml = invoice.getXml();
+      expect(xml).toBeTruthy();
+      expect(xml.length).toBeGreaterThan(100);
+      
+      // Check for ZUGFeRD v1 specific markers
+      const isZugferdV1 = xml.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') ||
+                         xml.includes('CrossIndustryDocument') ||
+                         (xml.includes('ZUGFeRD') && !xml.includes('CrossIndustryInvoice'));
+      
+      if (isZugferdV1) {
+        v1DetectedCount++;
+        console.log(`✓ ${fileName}: ZUGFeRD v1 detected and extracted (${metric.duration.toFixed(2)}ms)`);
      } else {
-        tools.log('⚠ No content extracted for format validation');
+        console.log(`✓ ${fileName}: Extracted but not ZUGFeRD v1 format (${metric.duration.toFixed(2)}ms)`);
      }
      
-    } catch (extractionError) {
-      tools.log(`Format validation extraction failed: ${extractionError.message}`);
-    }
-    
-  } catch (error) {
-    tools.log(`ZUGFeRD v1 format validation failed: ${error.message}`);
-  }
-  
-  const duration = Date.now() - startTime;
-  PerformanceTracker.recordMetric('pdf-zugferd-v1-format-validation', duration);
-});
-
-tap.test('PDF-02: ZUGFeRD v1 Extraction - Error Handling', async (tools) => {
-  const startTime = Date.now();
-  
-  // Test error handling with various problematic scenarios
-  const errorTestCases = [
-    {
-      name: 'Non-existent file',
-      filePath: '/non/existent/zugferd.pdf',
-      expectedError: true
-    },
-    {
-      name: 'Empty file path',
-      filePath: '',
-      expectedError: true
-    }
-  ];
-  
-  for (const testCase of errorTestCases) {
-    tools.log(`Testing error handling: ${testCase.name}`);
-    
-    try {
-      const invoice = new EInvoice();
-      
-      if (testCase.filePath) {
-        const result = await invoice.fromFile(testCase.filePath);
-        
-        if (testCase.expectedError) {
-          tools.log(`⚠ Expected error for ${testCase.name} but operation succeeded`);
-        } else {
-          tools.log(`✓ ${testCase.name}: Operation succeeded as expected`);
-        }
-      } else {
-        // Test with empty/invalid path
-        try {
-          await invoice.fromFile(testCase.filePath);
-          if (testCase.expectedError) {
-            tools.log(`⚠ Expected error for ${testCase.name} but no error occurred`);
-          }
-        } catch (error) {
-          if (testCase.expectedError) {
-            tools.log(`✓ ${testCase.name}: Expected error caught - ${error.message}`);
-          } else {
-            throw error;
-          }
-        }
-      }
+      successCount++;
      
    } catch (error) {
-      if (testCase.expectedError) {
-        tools.log(`✓ ${testCase.name}: Expected error caught - ${error.message}`);
-        expect(error.message).toBeTruthy();
-      } else {
-        tools.log(`✗ ${testCase.name}: Unexpected error - ${error.message}`);
-        throw error;
-      }
+      console.log(`✗ ${fileName}: ${error.message}`);
    }
  }
  
-  const duration = Date.now() - startTime;
-  PerformanceTracker.recordMetric('pdf-zugferd-v1-error-handling', duration);
+  console.log(`\nZUGFeRD v1 Extraction Summary:`);
+  console.log(`  Total processed: ${Math.min(10, pdfFiles.length)}`);
+  console.log(`  Successful extractions: ${successCount}`);
+  console.log(`  ZUGFeRD v1 format detected: ${v1DetectedCount}`);
+  
+  // We expect most ZUGFeRD v1 files to be successfully extracted
+  expect(successCount).toBeGreaterThan(0);
 });

-tap.test('PDF-02: Performance Summary', async (tools) => {
-  const operations = [
-    'pdf-zugferd-v1-basic-extraction',
-    'pdf-zugferd-v1-corpus-extraction', 
-    'pdf-zugferd-v1-format-validation',
-    'pdf-zugferd-v1-error-handling'
-  ];
+tap.test('PDF-02: ZUGFeRD v1 Format Validation - should validate v1 specific elements', async () => {
+  // Get one ZUGFeRD v1 file for detailed validation
+  const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT');
+  const pdfFiles = zugferdV1Files.filter(f => f.endsWith('.pdf'));
  
-  tools.log(`\n=== ZUGFeRD v1 Extraction Performance Summary ===`);
-  
-  for (const operation of operations) {
-    const summary = await PerformanceTracker.getSummary(operation);
-    if (summary) {
-      tools.log(`${operation}:`);
-      tools.log(`  avg=${summary.average}ms, min=${summary.min}ms, max=${summary.max}ms, p95=${summary.p95}ms`);
-    }
+  if (pdfFiles.length === 0) {
+    console.log('No ZUGFeRD v1 PDFs found, skipping validation test');
+    return;
  }
  
-  tools.log(`\nZUGFeRD v1 extraction testing completed.`);
-});
+  const testFile = pdfFiles[0];
+  const fileName = path.basename(testFile);
+  
+  console.log(`Validating ZUGFeRD v1 format with: ${fileName}`);
+  
+  const pdfBuffer = await fs.readFile(testFile);
+  const invoice = await EInvoice.fromPdf(pdfBuffer);
+  
+  expect(invoice).toBeTruthy();
+  
+  const xml = invoice.getXml();
+  expect(xml).toBeTruthy();
+  
+  // ZUGFeRD v1 specific validations
+  console.log('Checking ZUGFeRD v1 format characteristics:');
+  
+  // Should contain ZUGFeRD v1 namespace
+  const hasV1Namespace = xml.includes('urn:ferd:CrossIndustryDocument:invoice:1p0');
+  console.log(`  ZUGFeRD v1 namespace: ${hasV1Namespace ? '✓' : '✗'}`);
+  
+  // Should contain CrossIndustryDocument root element
+  const hasCrossIndustryDocument = xml.includes('<rsm:CrossIndustryDocument') || 
+                                  xml.includes('<CrossIndustryDocument');
+  console.log(`  CrossIndustryDocument root: ${hasCrossIndustryDocument ? '✓' : '✗'}`);
+  
+  // Should contain basic invoice elements
+  const hasInvoiceId = xml.includes('<ram:ID>');
+  console.log(`  Invoice ID element: ${hasInvoiceId ? '✓' : '✗'}`);
+  
+  const hasIssueDate = xml.includes('<ram:IssueDateTime>');
+  console.log(`  Issue date element: ${hasIssueDate ? '✓' : '✗'}`);
+  
+  // Check format detection
+  const detectedFormat = invoice.getFormat();
+  console.log(`  Detected format: ${detectedFormat}`);
+  
+  // Basic validation - at least some ZUGFeRD v1 characteristics should be present
+  expect(hasCrossIndustryDocument || hasV1Namespace).toBeTruthy();
+  expect(hasInvoiceId).toBeTruthy();
+});
+
+tap.test('PDF-02: ZUGFeRD v1 Performance - should extract v1 PDFs efficiently', async () => {
+  const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT');
+  const pdfFiles = zugferdV1Files.filter(f => f.endsWith('.pdf'));
+  
+  if (pdfFiles.length === 0) {
+    console.log('No ZUGFeRD v1 PDFs found, skipping performance test');
+    return;
+  }
+  
+  console.log(`Testing extraction performance with ${Math.min(5, pdfFiles.length)} ZUGFeRD v1 PDFs`);
+  
+  const durations: number[] = [];
+  
+  for (const filePath of pdfFiles.slice(0, 5)) {
+    const fileName = path.basename(filePath);
+    const pdfBuffer = await fs.readFile(filePath);
+    
+    const { metric } = await PerformanceTracker.track(
+      'zugferd-v1-performance',
+      async () => {
+        return await EInvoice.fromPdf(pdfBuffer);
+      },
+      { file: fileName }
+    );
+    
+    durations.push(metric.duration);
+    console.log(`  ${fileName}: ${metric.duration.toFixed(2)}ms`);
+  }
+  
+  const avgDuration = durations.reduce((a, b) => a + b, 0) / durations.length;
+  const maxDuration = Math.max(...durations);
+  
+  console.log(`\nPerformance Summary:`);
+  console.log(`  Average: ${avgDuration.toFixed(2)}ms`);
+  console.log(`  Maximum: ${maxDuration.toFixed(2)}ms`);
+  
+  // Performance expectation - should complete within reasonable time
+  expect(avgDuration).toBeLessThan(1000); // Less than 1 second on average
+  expect(maxDuration).toBeLessThan(5000);  // No single extraction over 5 seconds
+});
+
+tap.start();