This commit is contained in:
2025-05-28 08:40:26 +00:00
parent e4c762658d
commit 32f8bc192a
24 changed files with 3350 additions and 5416 deletions

View File

@ -1,357 +1,157 @@
import { tap, expect } from '@git.zone/tstest/tapbundle';
import * as plugins from '../../../ts/plugins.ts';
import { EInvoice } from '../../../ts/classes.xinvoice.ts';
import { CorpusLoader } from '../../helpers/corpus.loader.ts';
import { PerformanceTracker } from '../../helpers/performance.tracker.ts';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../../helpers/corpus.loader.js';
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
import { promises as fs } from 'fs';
import * as path from 'path';
const testTimeout = 300000; // 5 minutes timeout for PDF processing
// PDF-02: ZUGFeRD v1 Extraction
// Tests XML extraction from ZUGFeRD v1 PDFs with specific format validation
// and compatibility checks for legacy ZUGFeRD implementations
tap.test('PDF-02: ZUGFeRD v1 Extraction - Basic Extraction', async (tools) => {
const startTime = Date.now();
tap.test('PDF-02: ZUGFeRD v1 Extraction - should extract and validate ZUGFeRD v1 PDFs', async () => {
// Get ZUGFeRD v1 PDF files from corpus
const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT');
const pdfFiles = zugferdV1Files.filter(f => f.endsWith('.pdf'));
// Test basic ZUGFeRD v1 extraction functionality
try {
const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1');
if (zugferdV1Files.length === 0) {
tools.log('⚠ No ZUGFeRD v1 files found in corpus, skipping basic extraction test');
return;
}
const testFile = zugferdV1Files[0];
tools.log(`Testing ZUGFeRD v1 extraction with: ${plugins.path.basename(testFile)}`);
const invoice = new EInvoice();
// Check if file exists and is readable
const fileExists = await plugins.fs.pathExists(testFile);
expect(fileExists).toBeTrue();
const fileStats = await plugins.fs.stat(testFile);
tools.log(`File size: ${(fileStats.size / 1024).toFixed(1)}KB`);
// Attempt PDF extraction
let extractionResult;
try {
extractionResult = await invoice.fromFile(testFile);
if (extractionResult) {
tools.log('✓ ZUGFeRD v1 XML extraction successful');
// Verify extracted content contains ZUGFeRD v1 characteristics
const extractedXml = await invoice.toXmlString();
expect(extractedXml).toBeTruthy();
expect(extractedXml.length).toBeGreaterThan(100);
// Check for ZUGFeRD v1 namespace or characteristics
const hasZugferdV1Markers = extractedXml.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') ||
extractedXml.includes('ZUGFeRD') ||
extractedXml.includes('FERD');
if (hasZugferdV1Markers) {
tools.log('✓ ZUGFeRD v1 format markers detected in extracted XML');
} else {
tools.log('⚠ ZUGFeRD v1 format markers not clearly detected');
}
// Test basic validation of extracted content
try {
const validationResult = await invoice.validate();
if (validationResult.valid) {
tools.log('✓ Extracted ZUGFeRD v1 content passes validation');
} else {
tools.log(`⚠ Validation issues found: ${validationResult.errors?.length || 0} errors`);
}
} catch (validationError) {
tools.log(`⚠ Validation failed: ${validationError.message}`);
}
} else {
tools.log('⚠ ZUGFeRD v1 extraction returned no result');
}
} catch (extractionError) {
tools.log(`⚠ ZUGFeRD v1 extraction failed: ${extractionError.message}`);
// This might be expected if PDF extraction is not fully implemented
}
} catch (error) {
tools.log(`ZUGFeRD v1 basic extraction test failed: ${error.message}`);
}
console.log(`Testing ZUGFeRD v1 extraction from ${pdfFiles.length} PDFs`);
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('pdf-zugferd-v1-basic-extraction', duration);
});
tap.test('PDF-02: ZUGFeRD v1 Extraction - Corpus Processing', { timeout: testTimeout }, async (tools) => {
const startTime = Date.now();
let successCount = 0;
let v1DetectedCount = 0;
let processedFiles = 0;
let successfulExtractions = 0;
let extractionErrors = 0;
let totalExtractionTime = 0;
try {
const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1');
tools.log(`Processing ${zugferdV1Files.length} ZUGFeRD v1 files`);
if (zugferdV1Files.length === 0) {
tools.log('⚠ No ZUGFeRD v1 files found in corpus');
return;
}
for (const filePath of zugferdV1Files) {
const fileName = plugins.path.basename(filePath);
const fileExtractionStart = Date.now();
try {
processedFiles++;
// Check file accessibility
const fileExists = await plugins.fs.pathExists(filePath);
if (!fileExists) {
tools.log(`⚠ File not found: ${fileName}`);
continue;
}
const fileStats = await plugins.fs.stat(filePath);
const fileSizeKB = fileStats.size / 1024;
// Attempt extraction
const invoice = new EInvoice();
const extractionResult = await invoice.fromFile(filePath);
const fileExtractionTime = Date.now() - fileExtractionStart;
totalExtractionTime += fileExtractionTime;
if (extractionResult) {
successfulExtractions++;
tools.log(`${fileName}: Extracted (${fileSizeKB.toFixed(1)}KB, ${fileExtractionTime}ms)`);
// Quick validation of extracted content
try {
const xmlContent = await invoice.toXmlString();
if (xmlContent && xmlContent.length > 50) {
tools.log(` Content length: ${xmlContent.length} chars`);
}
} catch (contentError) {
tools.log(` ⚠ Content extraction error: ${contentError.message}`);
}
} else {
extractionErrors++;
tools.log(`${fileName}: No XML content extracted`);
}
} catch (error) {
extractionErrors++;
const fileExtractionTime = Date.now() - fileExtractionStart;
totalExtractionTime += fileExtractionTime;
tools.log(`${fileName}: Extraction failed - ${error.message}`);
}
}
// Calculate statistics
const successRate = processedFiles > 0 ? (successfulExtractions / processedFiles) * 100 : 0;
const averageExtractionTime = processedFiles > 0 ? totalExtractionTime / processedFiles : 0;
tools.log(`\nZUGFeRD v1 Extraction Summary:`);
tools.log(`- Files processed: ${processedFiles}`);
tools.log(`- Successful extractions: ${successfulExtractions} (${successRate.toFixed(1)}%)`);
tools.log(`- Extraction errors: ${extractionErrors}`);
tools.log(`- Average extraction time: ${averageExtractionTime.toFixed(1)}ms`);
// Performance expectations
if (processedFiles > 0) {
expect(averageExtractionTime).toBeLessThan(5000); // 5 seconds max per file
}
// We expect at least some extractions to work, but don't require 100% success
// as some files might be corrupted or use unsupported PDF features
if (processedFiles > 0) {
expect(successRate).toBeGreaterThan(0); // At least one file should work
}
} catch (error) {
tools.log(`ZUGFeRD v1 corpus processing failed: ${error.message}`);
throw error;
}
const totalDuration = Date.now() - startTime;
PerformanceTracker.recordMetric('pdf-zugferd-v1-corpus-extraction', totalDuration);
tools.log(`ZUGFeRD v1 corpus processing completed in ${totalDuration}ms`);
});
tap.test('PDF-02: ZUGFeRD v1 Extraction - Format Validation', async (tools) => {
const startTime = Date.now();
try {
const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1');
if (zugferdV1Files.length === 0) {
tools.log('⚠ No ZUGFeRD v1 files found for format validation');
return;
}
// Test with first available file for detailed format validation
const testFile = zugferdV1Files[0];
const fileName = plugins.path.basename(testFile);
tools.log(`Testing ZUGFeRD v1 format validation with: ${fileName}`);
const invoice = new EInvoice();
for (const filePath of pdfFiles.slice(0, 10)) { // Test first 10 for performance
const fileName = path.basename(filePath);
try {
const extractionResult = await invoice.fromFile(testFile);
const pdfBuffer = await fs.readFile(filePath);
if (extractionResult) {
const xmlContent = await invoice.toXmlString();
// ZUGFeRD v1 specific format checks
const formatChecks = {
hasXmlDeclaration: xmlContent.startsWith('<?xml'),
hasZugferdNamespace: xmlContent.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') ||
xmlContent.includes('ZUGFeRD') ||
xmlContent.includes('FERD'),
hasInvoiceElements: xmlContent.includes('<Invoice') ||
xmlContent.includes('<CrossIndustryDocument') ||
xmlContent.includes('<invoice'),
isWellFormed: true // Assume true if we got this far
};
tools.log(`ZUGFeRD v1 Format Validation Results:`);
tools.log(`- Has XML Declaration: ${formatChecks.hasXmlDeclaration}`);
tools.log(`- Has ZUGFeRD Namespace: ${formatChecks.hasZugferdNamespace}`);
tools.log(`- Has Invoice Elements: ${formatChecks.hasInvoiceElements}`);
tools.log(`- Is Well-Formed: ${formatChecks.isWellFormed}`);
// Basic format expectations
expect(formatChecks.hasXmlDeclaration).toBeTrue();
expect(formatChecks.isWellFormed).toBeTrue();
if (formatChecks.hasZugferdNamespace && formatChecks.hasInvoiceElements) {
tools.log('✓ ZUGFeRD v1 format validation passed');
} else {
tools.log('⚠ ZUGFeRD v1 format markers not fully detected');
}
// Test format detection if available
if (typeof invoice.detectFormat === 'function') {
try {
const detectedFormat = await invoice.detectFormat(xmlContent);
tools.log(`Detected format: ${detectedFormat}`);
if (detectedFormat.toLowerCase().includes('zugferd') ||
detectedFormat.toLowerCase().includes('cii')) {
tools.log('✓ Format detection correctly identified ZUGFeRD/CII');
}
} catch (detectionError) {
tools.log(`Format detection error: ${detectionError.message}`);
}
}
const { result: invoice, metric } = await PerformanceTracker.track(
'zugferd-v1-extraction',
async () => {
return await EInvoice.fromPdf(pdfBuffer);
},
{ file: fileName }
);
expect(invoice).toBeTruthy();
const xml = invoice.getXml();
expect(xml).toBeTruthy();
expect(xml.length).toBeGreaterThan(100);
// Check for ZUGFeRD v1 specific markers
const isZugferdV1 = xml.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') ||
xml.includes('CrossIndustryDocument') ||
(xml.includes('ZUGFeRD') && !xml.includes('CrossIndustryInvoice'));
if (isZugferdV1) {
v1DetectedCount++;
console.log(`${fileName}: ZUGFeRD v1 detected and extracted (${metric.duration.toFixed(2)}ms)`);
} else {
tools.log('⚠ No content extracted for format validation');
console.log(`${fileName}: Extracted but not ZUGFeRD v1 format (${metric.duration.toFixed(2)}ms)`);
}
} catch (extractionError) {
tools.log(`Format validation extraction failed: ${extractionError.message}`);
}
} catch (error) {
tools.log(`ZUGFeRD v1 format validation failed: ${error.message}`);
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('pdf-zugferd-v1-format-validation', duration);
});
tap.test('PDF-02: ZUGFeRD v1 Extraction - Error Handling', async (tools) => {
const startTime = Date.now();
// Test error handling with various problematic scenarios
const errorTestCases = [
{
name: 'Non-existent file',
filePath: '/non/existent/zugferd.pdf',
expectedError: true
},
{
name: 'Empty file path',
filePath: '',
expectedError: true
}
];
for (const testCase of errorTestCases) {
tools.log(`Testing error handling: ${testCase.name}`);
try {
const invoice = new EInvoice();
if (testCase.filePath) {
const result = await invoice.fromFile(testCase.filePath);
if (testCase.expectedError) {
tools.log(`⚠ Expected error for ${testCase.name} but operation succeeded`);
} else {
tools.log(`${testCase.name}: Operation succeeded as expected`);
}
} else {
// Test with empty/invalid path
try {
await invoice.fromFile(testCase.filePath);
if (testCase.expectedError) {
tools.log(`⚠ Expected error for ${testCase.name} but no error occurred`);
}
} catch (error) {
if (testCase.expectedError) {
tools.log(`${testCase.name}: Expected error caught - ${error.message}`);
} else {
throw error;
}
}
}
successCount++;
} catch (error) {
if (testCase.expectedError) {
tools.log(`${testCase.name}: Expected error caught - ${error.message}`);
expect(error.message).toBeTruthy();
} else {
tools.log(`${testCase.name}: Unexpected error - ${error.message}`);
throw error;
}
console.log(`${fileName}: ${error.message}`);
}
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('pdf-zugferd-v1-error-handling', duration);
console.log(`\nZUGFeRD v1 Extraction Summary:`);
console.log(` Total processed: ${Math.min(10, pdfFiles.length)}`);
console.log(` Successful extractions: ${successCount}`);
console.log(` ZUGFeRD v1 format detected: ${v1DetectedCount}`);
// We expect most ZUGFeRD v1 files to be successfully extracted
expect(successCount).toBeGreaterThan(0);
});
tap.test('PDF-02: Performance Summary', async (tools) => {
const operations = [
'pdf-zugferd-v1-basic-extraction',
'pdf-zugferd-v1-corpus-extraction',
'pdf-zugferd-v1-format-validation',
'pdf-zugferd-v1-error-handling'
];
tap.test('PDF-02: ZUGFeRD v1 Format Validation - should validate v1 specific elements', async () => {
// Get one ZUGFeRD v1 file for detailed validation
const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT');
const pdfFiles = zugferdV1Files.filter(f => f.endsWith('.pdf'));
tools.log(`\n=== ZUGFeRD v1 Extraction Performance Summary ===`);
for (const operation of operations) {
const summary = await PerformanceTracker.getSummary(operation);
if (summary) {
tools.log(`${operation}:`);
tools.log(` avg=${summary.average}ms, min=${summary.min}ms, max=${summary.max}ms, p95=${summary.p95}ms`);
}
if (pdfFiles.length === 0) {
console.log('No ZUGFeRD v1 PDFs found, skipping validation test');
return;
}
tools.log(`\nZUGFeRD v1 extraction testing completed.`);
});
const testFile = pdfFiles[0];
const fileName = path.basename(testFile);
console.log(`Validating ZUGFeRD v1 format with: ${fileName}`);
const pdfBuffer = await fs.readFile(testFile);
const invoice = await EInvoice.fromPdf(pdfBuffer);
expect(invoice).toBeTruthy();
const xml = invoice.getXml();
expect(xml).toBeTruthy();
// ZUGFeRD v1 specific validations
console.log('Checking ZUGFeRD v1 format characteristics:');
// Should contain ZUGFeRD v1 namespace
const hasV1Namespace = xml.includes('urn:ferd:CrossIndustryDocument:invoice:1p0');
console.log(` ZUGFeRD v1 namespace: ${hasV1Namespace ? '✓' : '✗'}`);
// Should contain CrossIndustryDocument root element
const hasCrossIndustryDocument = xml.includes('<rsm:CrossIndustryDocument') ||
xml.includes('<CrossIndustryDocument');
console.log(` CrossIndustryDocument root: ${hasCrossIndustryDocument ? '✓' : '✗'}`);
// Should contain basic invoice elements
const hasInvoiceId = xml.includes('<ram:ID>');
console.log(` Invoice ID element: ${hasInvoiceId ? '✓' : '✗'}`);
const hasIssueDate = xml.includes('<ram:IssueDateTime>');
console.log(` Issue date element: ${hasIssueDate ? '✓' : '✗'}`);
// Check format detection
const detectedFormat = invoice.getFormat();
console.log(` Detected format: ${detectedFormat}`);
// Basic validation - at least some ZUGFeRD v1 characteristics should be present
expect(hasCrossIndustryDocument || hasV1Namespace).toBeTruthy();
expect(hasInvoiceId).toBeTruthy();
});
tap.test('PDF-02: ZUGFeRD v1 Performance - should extract v1 PDFs efficiently', async () => {
const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT');
const pdfFiles = zugferdV1Files.filter(f => f.endsWith('.pdf'));
if (pdfFiles.length === 0) {
console.log('No ZUGFeRD v1 PDFs found, skipping performance test');
return;
}
console.log(`Testing extraction performance with ${Math.min(5, pdfFiles.length)} ZUGFeRD v1 PDFs`);
const durations: number[] = [];
for (const filePath of pdfFiles.slice(0, 5)) {
const fileName = path.basename(filePath);
const pdfBuffer = await fs.readFile(filePath);
const { metric } = await PerformanceTracker.track(
'zugferd-v1-performance',
async () => {
return await EInvoice.fromPdf(pdfBuffer);
},
{ file: fileName }
);
durations.push(metric.duration);
console.log(` ${fileName}: ${metric.duration.toFixed(2)}ms`);
}
const avgDuration = durations.reduce((a, b) => a + b, 0) / durations.length;
const maxDuration = Math.max(...durations);
console.log(`\nPerformance Summary:`);
console.log(` Average: ${avgDuration.toFixed(2)}ms`);
console.log(` Maximum: ${maxDuration.toFixed(2)}ms`);
// Performance expectation - should complete within reasonable time
expect(avgDuration).toBeLessThan(1000); // Less than 1 second on average
expect(maxDuration).toBeLessThan(5000); // No single extraction over 5 seconds
});
tap.start();