486 lines
18 KiB
TypeScript
486 lines
18 KiB
TypeScript
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
|
import * as plugins from '../../../ts/plugins.ts';
|
|
import { EInvoice } from '../../../ts/classes.xinvoice.ts';
|
|
import { CorpusLoader } from '../../helpers/corpus.loader.ts';
|
|
import { PerformanceTracker } from '../../helpers/performance.tracker.ts';
|
|
|
|
const testTimeout = 300000; // 5 minutes timeout for PDF processing
|
|
|
|
// PDF-03: ZUGFeRD v2/Factur-X Extraction
|
|
// Tests XML extraction from ZUGFeRD v2 and Factur-X PDFs with enhanced format support
|
|
// and cross-border compatibility (German ZUGFeRD v2 and French Factur-X)
|
|
|
|
tap.test('PDF-03: Factur-X Extraction - Basic ZUGFeRD v2 Extraction', async (tools) => {
|
|
const startTime = Date.now();
|
|
|
|
try {
|
|
const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2');
|
|
|
|
if (zugferdV2Files.length === 0) {
|
|
tools.log('⚠ No ZUGFeRD v2 files found in corpus, skipping basic extraction test');
|
|
return;
|
|
}
|
|
|
|
const testFile = zugferdV2Files[0];
|
|
tools.log(`Testing ZUGFeRD v2 extraction with: ${plugins.path.basename(testFile)}`);
|
|
|
|
const invoice = new EInvoice();
|
|
|
|
// Check file accessibility
|
|
const fileExists = await plugins.fs.pathExists(testFile);
|
|
expect(fileExists).toBeTrue();
|
|
|
|
const fileStats = await plugins.fs.stat(testFile);
|
|
tools.log(`File size: ${(fileStats.size / 1024).toFixed(1)}KB`);
|
|
|
|
// Attempt PDF extraction
|
|
try {
|
|
const extractionResult = await invoice.fromFile(testFile);
|
|
|
|
if (extractionResult) {
|
|
tools.log('✓ ZUGFeRD v2 XML extraction successful');
|
|
|
|
// Verify extracted content
|
|
const extractedXml = await invoice.toXmlString();
|
|
expect(extractedXml).toBeTruthy();
|
|
expect(extractedXml.length).toBeGreaterThan(100);
|
|
|
|
// Check for ZUGFeRD v2/Factur-X characteristics
|
|
const hasZugferdV2Markers = extractedXml.includes('urn:cen.eu:en16931:2017') ||
|
|
extractedXml.includes('CrossIndustryInvoice') ||
|
|
extractedXml.includes('urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100') ||
|
|
extractedXml.includes('zugferd') ||
|
|
extractedXml.includes('factur-x');
|
|
|
|
if (hasZugferdV2Markers) {
|
|
tools.log('✓ ZUGFeRD v2/Factur-X format markers detected');
|
|
} else {
|
|
tools.log('⚠ ZUGFeRD v2/Factur-X format markers not clearly detected');
|
|
}
|
|
|
|
// Test validation of extracted content
|
|
try {
|
|
const validationResult = await invoice.validate();
|
|
if (validationResult.valid) {
|
|
tools.log('✓ Extracted ZUGFeRD v2 content passes validation');
|
|
} else {
|
|
tools.log(`⚠ Validation issues: ${validationResult.errors?.length || 0} errors`);
|
|
if (validationResult.errors && validationResult.errors.length > 0) {
|
|
tools.log(` First error: ${validationResult.errors[0].message}`);
|
|
}
|
|
}
|
|
} catch (validationError) {
|
|
tools.log(`⚠ Validation failed: ${validationError.message}`);
|
|
}
|
|
|
|
} else {
|
|
tools.log('⚠ ZUGFeRD v2 extraction returned no result');
|
|
}
|
|
|
|
} catch (extractionError) {
|
|
tools.log(`⚠ ZUGFeRD v2 extraction failed: ${extractionError.message}`);
|
|
}
|
|
|
|
} catch (error) {
|
|
tools.log(`ZUGFeRD v2 basic extraction test failed: ${error.message}`);
|
|
}
|
|
|
|
const duration = Date.now() - startTime;
|
|
PerformanceTracker.recordMetric('pdf-facturx-basic-extraction', duration);
|
|
});
|
|
|
|
tap.test('PDF-03: Factur-X Extraction - Factur-X Specific Testing', async (tools) => {
|
|
const startTime = Date.now();
|
|
|
|
try {
|
|
// Look for Factur-X specific files in corpus
|
|
const facturxFiles = await CorpusLoader.getFiles('ZUGFERD_V2');
|
|
|
|
// Filter for files that might be Factur-X specific
|
|
const potentialFacturxFiles = facturxFiles.filter(file =>
|
|
plugins.path.basename(file).toLowerCase().includes('factur') ||
|
|
plugins.path.basename(file).toLowerCase().includes('france') ||
|
|
plugins.path.basename(file).toLowerCase().includes('fr')
|
|
);
|
|
|
|
if (potentialFacturxFiles.length === 0) {
|
|
tools.log('⚠ No specific Factur-X files identified, testing with ZUGFeRD v2 files');
|
|
// Use first few ZUGFeRD v2 files as they should be compatible
|
|
potentialFacturxFiles.push(...facturxFiles.slice(0, 2));
|
|
}
|
|
|
|
tools.log(`Testing Factur-X specific features with ${potentialFacturxFiles.length} files`);
|
|
|
|
let facturxProcessed = 0;
|
|
let facturxSuccessful = 0;
|
|
|
|
for (const filePath of potentialFacturxFiles) {
|
|
const fileName = plugins.path.basename(filePath);
|
|
|
|
try {
|
|
facturxProcessed++;
|
|
|
|
const invoice = new EInvoice();
|
|
const extractionResult = await invoice.fromFile(filePath);
|
|
|
|
if (extractionResult) {
|
|
facturxSuccessful++;
|
|
|
|
const xmlContent = await invoice.toXmlString();
|
|
|
|
// Look for Factur-X specific characteristics
|
|
const facturxChecks = {
|
|
hasEN16931Context: xmlContent.includes('urn:cen.eu:en16931:2017'),
|
|
hasCIINamespace: xmlContent.includes('urn:un:unece:uncefact:data:standard:CrossIndustryInvoice'),
|
|
hasFacturxGuideline: xmlContent.includes('factur-x') || xmlContent.includes('FACTUR-X'),
|
|
hasExchangedDocument: xmlContent.includes('ExchangedDocument'),
|
|
hasSupplyChainTrade: xmlContent.includes('SupplyChainTradeTransaction')
|
|
};
|
|
|
|
tools.log(`${fileName} Factur-X characteristics:`);
|
|
tools.log(` EN16931 Context: ${facturxChecks.hasEN16931Context}`);
|
|
tools.log(` CII Namespace: ${facturxChecks.hasCIINamespace}`);
|
|
tools.log(` Factur-X Guideline: ${facturxChecks.hasFacturxGuideline}`);
|
|
tools.log(` ExchangedDocument: ${facturxChecks.hasExchangedDocument}`);
|
|
tools.log(` SupplyChainTrade: ${facturxChecks.hasSupplyChainTrade}`);
|
|
|
|
// Basic Factur-X structure validation
|
|
if (facturxChecks.hasEN16931Context && facturxChecks.hasCIINamespace) {
|
|
tools.log(` ✓ Valid Factur-X/ZUGFeRD v2 structure detected`);
|
|
}
|
|
|
|
} else {
|
|
tools.log(`⚠ ${fileName}: No XML content extracted`);
|
|
}
|
|
|
|
} catch (error) {
|
|
tools.log(`✗ ${fileName}: Extraction failed - ${error.message}`);
|
|
}
|
|
}
|
|
|
|
const facturxSuccessRate = facturxProcessed > 0 ? (facturxSuccessful / facturxProcessed) * 100 : 0;
|
|
|
|
tools.log(`\nFactur-X Processing Summary:`);
|
|
tools.log(`- Files processed: ${facturxProcessed}`);
|
|
tools.log(`- Successful extractions: ${facturxSuccessful} (${facturxSuccessRate.toFixed(1)}%)`);
|
|
|
|
if (facturxProcessed > 0) {
|
|
expect(facturxSuccessRate).toBeGreaterThan(0);
|
|
}
|
|
|
|
} catch (error) {
|
|
tools.log(`Factur-X specific testing failed: ${error.message}`);
|
|
}
|
|
|
|
const duration = Date.now() - startTime;
|
|
PerformanceTracker.recordMetric('pdf-facturx-specific-testing', duration);
|
|
});
|
|
|
|
tap.test('PDF-03: Factur-X Extraction - Corpus Performance Analysis', { timeout: testTimeout }, async (tools) => {
|
|
const startTime = Date.now();
|
|
|
|
let totalProcessed = 0;
|
|
let totalSuccessful = 0;
|
|
let totalExtractionTime = 0;
|
|
const fileSizePerformance = [];
|
|
|
|
try {
|
|
const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2');
|
|
tools.log(`Processing ${zugferdV2Files.length} ZUGFeRD v2/Factur-X files for performance analysis`);
|
|
|
|
if (zugferdV2Files.length === 0) {
|
|
tools.log('⚠ No ZUGFeRD v2/Factur-X files found in corpus');
|
|
return;
|
|
}
|
|
|
|
// Process subset for performance analysis
|
|
const filesToProcess = zugferdV2Files.slice(0, Math.min(10, zugferdV2Files.length));
|
|
|
|
for (const filePath of filesToProcess) {
|
|
const fileName = plugins.path.basename(filePath);
|
|
const fileExtractionStart = Date.now();
|
|
|
|
try {
|
|
totalProcessed++;
|
|
|
|
// Get file size for performance correlation
|
|
const fileStats = await plugins.fs.stat(filePath);
|
|
const fileSizeKB = fileStats.size / 1024;
|
|
|
|
const invoice = new EInvoice();
|
|
const extractionResult = await invoice.fromFile(filePath);
|
|
|
|
const fileExtractionTime = Date.now() - fileExtractionStart;
|
|
totalExtractionTime += fileExtractionTime;
|
|
|
|
if (extractionResult) {
|
|
totalSuccessful++;
|
|
|
|
// Record size vs performance data
|
|
fileSizePerformance.push({
|
|
fileName,
|
|
sizeKB: fileSizeKB,
|
|
extractionTimeMs: fileExtractionTime,
|
|
timePerKB: fileExtractionTime / fileSizeKB
|
|
});
|
|
|
|
tools.log(`✓ ${fileName}: ${fileSizeKB.toFixed(1)}KB → ${fileExtractionTime}ms (${(fileExtractionTime/fileSizeKB).toFixed(2)}ms/KB)`);
|
|
|
|
// Quick content verification
|
|
const xmlContent = await invoice.toXmlString();
|
|
if (xmlContent.length < 100) {
|
|
tools.log(` ⚠ Suspiciously short XML content: ${xmlContent.length} chars`);
|
|
}
|
|
|
|
} else {
|
|
tools.log(`⚠ ${fileName}: Extraction failed (${fileSizeKB.toFixed(1)}KB, ${fileExtractionTime}ms)`);
|
|
}
|
|
|
|
} catch (error) {
|
|
const fileExtractionTime = Date.now() - fileExtractionStart;
|
|
totalExtractionTime += fileExtractionTime;
|
|
tools.log(`✗ ${fileName}: Error after ${fileExtractionTime}ms - ${error.message}`);
|
|
}
|
|
}
|
|
|
|
// Performance analysis
|
|
const successRate = totalProcessed > 0 ? (totalSuccessful / totalProcessed) * 100 : 0;
|
|
const averageExtractionTime = totalProcessed > 0 ? totalExtractionTime / totalProcessed : 0;
|
|
|
|
tools.log(`\nZUGFeRD v2/Factur-X Performance Analysis:`);
|
|
tools.log(`- Files processed: ${totalProcessed}`);
|
|
tools.log(`- Success rate: ${successRate.toFixed(1)}%`);
|
|
tools.log(`- Average extraction time: ${averageExtractionTime.toFixed(1)}ms`);
|
|
|
|
if (fileSizePerformance.length > 0) {
|
|
const avgTimePerKB = fileSizePerformance.reduce((sum, item) => sum + item.timePerKB, 0) / fileSizePerformance.length;
|
|
const avgFileSize = fileSizePerformance.reduce((sum, item) => sum + item.sizeKB, 0) / fileSizePerformance.length;
|
|
|
|
tools.log(`- Average file size: ${avgFileSize.toFixed(1)}KB`);
|
|
tools.log(`- Average time per KB: ${avgTimePerKB.toFixed(2)}ms/KB`);
|
|
|
|
// Find performance outliers
|
|
const sortedByTime = [...fileSizePerformance].sort((a, b) => b.extractionTimeMs - a.extractionTimeMs);
|
|
if (sortedByTime.length > 0) {
|
|
tools.log(`- Slowest file: ${sortedByTime[0].fileName} (${sortedByTime[0].extractionTimeMs}ms)`);
|
|
tools.log(`- Fastest file: ${sortedByTime[sortedByTime.length-1].fileName} (${sortedByTime[sortedByTime.length-1].extractionTimeMs}ms)`);
|
|
}
|
|
|
|
// Performance expectations
|
|
expect(avgTimePerKB).toBeLessThan(50); // 50ms per KB max
|
|
expect(averageExtractionTime).toBeLessThan(3000); // 3 seconds max average
|
|
}
|
|
|
|
// Success rate expectations
|
|
if (totalProcessed > 0) {
|
|
expect(successRate).toBeGreaterThan(0); // At least one should work
|
|
}
|
|
|
|
} catch (error) {
|
|
tools.log(`Corpus performance analysis failed: ${error.message}`);
|
|
throw error;
|
|
}
|
|
|
|
const totalDuration = Date.now() - startTime;
|
|
PerformanceTracker.recordMetric('pdf-facturx-corpus-performance', totalDuration);
|
|
|
|
tools.log(`Performance analysis completed in ${totalDuration}ms`);
|
|
});
|
|
|
|
tap.test('PDF-03: Factur-X Extraction - Profile Detection', async (tools) => {
|
|
const startTime = Date.now();
|
|
|
|
try {
|
|
const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2');
|
|
|
|
if (zugferdV2Files.length === 0) {
|
|
tools.log('⚠ No ZUGFeRD v2/Factur-X files found for profile detection');
|
|
return;
|
|
}
|
|
|
|
// Test profile detection with a sample of files
|
|
const sampleFiles = zugferdV2Files.slice(0, 3);
|
|
const profileStats = {
|
|
'MINIMUM': 0,
|
|
'BASIC': 0,
|
|
'COMFORT': 0,
|
|
'EXTENDED': 0,
|
|
'FACTUR-X': 0,
|
|
'UNKNOWN': 0
|
|
};
|
|
|
|
tools.log(`Testing profile detection with ${sampleFiles.length} files`);
|
|
|
|
for (const filePath of sampleFiles) {
|
|
const fileName = plugins.path.basename(filePath);
|
|
|
|
try {
|
|
const invoice = new EInvoice();
|
|
const extractionResult = await invoice.fromFile(filePath);
|
|
|
|
if (extractionResult) {
|
|
const xmlContent = await invoice.toXmlString();
|
|
|
|
// Detect ZUGFeRD/Factur-X profile from XML content
|
|
let detectedProfile = 'UNKNOWN';
|
|
|
|
if (xmlContent.includes('urn:cen.eu:en16931:2017#compliant#urn:zugferd.de:2p1:minimum')) {
|
|
detectedProfile = 'MINIMUM';
|
|
} else if (xmlContent.includes('urn:cen.eu:en16931:2017#compliant#urn:zugferd.de:2p1:basic')) {
|
|
detectedProfile = 'BASIC';
|
|
} else if (xmlContent.includes('urn:cen.eu:en16931:2017#compliant#urn:zugferd.de:2p1:comfort')) {
|
|
detectedProfile = 'COMFORT';
|
|
} else if (xmlContent.includes('urn:cen.eu:en16931:2017#compliant#urn:zugferd.de:2p1:extended')) {
|
|
detectedProfile = 'EXTENDED';
|
|
} else if (xmlContent.includes('urn:cen.eu:en16931:2017#conformant#urn:factur-x.eu:1p0:')) {
|
|
detectedProfile = 'FACTUR-X';
|
|
} else if (xmlContent.includes('urn:cen.eu:en16931:2017')) {
|
|
detectedProfile = 'EN16931'; // Generic EN16931 compliance
|
|
}
|
|
|
|
profileStats[detectedProfile] = (profileStats[detectedProfile] || 0) + 1;
|
|
|
|
tools.log(`${fileName}: Profile detected - ${detectedProfile}`);
|
|
|
|
// Additional profile-specific checks
|
|
if (detectedProfile !== 'UNKNOWN') {
|
|
const hasMinimumFields = xmlContent.includes('ExchangedDocument') &&
|
|
xmlContent.includes('SupplyChainTradeTransaction');
|
|
const hasComfortFields = xmlContent.includes('ApplicableHeaderTradeAgreement') &&
|
|
xmlContent.includes('ApplicableHeaderTradeDelivery');
|
|
const hasExtendedFields = xmlContent.includes('IncludedSupplyChainTradeLineItem');
|
|
|
|
tools.log(` Minimum fields: ${hasMinimumFields}`);
|
|
tools.log(` Comfort fields: ${hasComfortFields}`);
|
|
tools.log(` Extended fields: ${hasExtendedFields}`);
|
|
}
|
|
|
|
} else {
|
|
tools.log(`⚠ ${fileName}: No content for profile detection`);
|
|
}
|
|
|
|
} catch (error) {
|
|
tools.log(`✗ ${fileName}: Profile detection failed - ${error.message}`);
|
|
}
|
|
}
|
|
|
|
tools.log(`\nProfile Detection Summary:`);
|
|
for (const [profile, count] of Object.entries(profileStats)) {
|
|
if (count > 0) {
|
|
tools.log(`- ${profile}: ${count} files`);
|
|
}
|
|
}
|
|
|
|
} catch (error) {
|
|
tools.log(`Profile detection failed: ${error.message}`);
|
|
}
|
|
|
|
const duration = Date.now() - startTime;
|
|
PerformanceTracker.recordMetric('pdf-facturx-profile-detection', duration);
|
|
});
|
|
|
|
tap.test('PDF-03: Factur-X Extraction - Error Recovery', async (tools) => {
|
|
const startTime = Date.now();
|
|
|
|
// Test error recovery with problematic PDF files
|
|
const errorTestCases = [
|
|
{
|
|
name: 'Non-PDF file with PDF extension',
|
|
createFile: async () => {
|
|
const tempPath = plugins.path.join(process.cwd(), '.nogit', 'temp-fake.pdf');
|
|
await plugins.fs.ensureDir(plugins.path.dirname(tempPath));
|
|
await plugins.fs.writeFile(tempPath, 'This is not a PDF file');
|
|
return tempPath;
|
|
},
|
|
expectedError: true
|
|
},
|
|
{
|
|
name: 'Empty PDF file',
|
|
createFile: async () => {
|
|
const tempPath = plugins.path.join(process.cwd(), '.nogit', 'temp-empty.pdf');
|
|
await plugins.fs.ensureDir(plugins.path.dirname(tempPath));
|
|
await plugins.fs.writeFile(tempPath, '');
|
|
return tempPath;
|
|
},
|
|
expectedError: true
|
|
},
|
|
{
|
|
name: 'PDF header only',
|
|
createFile: async () => {
|
|
const tempPath = plugins.path.join(process.cwd(), '.nogit', 'temp-header-only.pdf');
|
|
await plugins.fs.ensureDir(plugins.path.dirname(tempPath));
|
|
await plugins.fs.writeFile(tempPath, '%PDF-1.4\n');
|
|
return tempPath;
|
|
},
|
|
expectedError: true
|
|
}
|
|
];
|
|
|
|
for (const testCase of errorTestCases) {
|
|
tools.log(`Testing error recovery: ${testCase.name}`);
|
|
|
|
let tempFilePath = null;
|
|
|
|
try {
|
|
if (testCase.createFile) {
|
|
tempFilePath = await testCase.createFile();
|
|
|
|
const invoice = new EInvoice();
|
|
const result = await invoice.fromFile(tempFilePath);
|
|
|
|
if (testCase.expectedError) {
|
|
if (result) {
|
|
tools.log(`⚠ Expected error for ${testCase.name} but extraction succeeded`);
|
|
} else {
|
|
tools.log(`✓ ${testCase.name}: Gracefully handled (no result)`);
|
|
}
|
|
} else {
|
|
tools.log(`✓ ${testCase.name}: Operation succeeded as expected`);
|
|
}
|
|
}
|
|
|
|
} catch (error) {
|
|
if (testCase.expectedError) {
|
|
tools.log(`✓ ${testCase.name}: Expected error caught - ${error.message}`);
|
|
expect(error.message).toBeTruthy();
|
|
} else {
|
|
tools.log(`✗ ${testCase.name}: Unexpected error - ${error.message}`);
|
|
throw error;
|
|
}
|
|
} finally {
|
|
// Clean up temp file
|
|
if (tempFilePath) {
|
|
try {
|
|
await plugins.fs.remove(tempFilePath);
|
|
} catch (cleanupError) {
|
|
tools.log(`Warning: Failed to clean up ${tempFilePath}`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const duration = Date.now() - startTime;
|
|
PerformanceTracker.recordMetric('pdf-facturx-error-recovery', duration);
|
|
});
|
|
|
|
tap.test('PDF-03: Performance Summary', async (tools) => {
|
|
const operations = [
|
|
'pdf-facturx-basic-extraction',
|
|
'pdf-facturx-specific-testing',
|
|
'pdf-facturx-corpus-performance',
|
|
'pdf-facturx-profile-detection',
|
|
'pdf-facturx-error-recovery'
|
|
];
|
|
|
|
tools.log(`\n=== ZUGFeRD v2/Factur-X Extraction Performance Summary ===`);
|
|
|
|
for (const operation of operations) {
|
|
const summary = await PerformanceTracker.getSummary(operation);
|
|
if (summary) {
|
|
tools.log(`${operation}:`);
|
|
tools.log(` avg=${summary.average}ms, min=${summary.min}ms, max=${summary.max}ms, p95=${summary.p95}ms`);
|
|
}
|
|
}
|
|
|
|
tools.log(`\nZUGFeRD v2/Factur-X extraction testing completed.`);
|
|
}); |