einvoice/test/suite/einvoice_pdf-operations/test.pdf-03.facturx-extraction.ts

215 lines
7.7 KiB
TypeScript
Raw Normal View History

2025-05-25 19:45:37 +00:00
import { tap, expect } from '@git.zone/tstest/tapbundle';
2025-05-28 08:40:26 +00:00
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../../helpers/corpus.loader.js';
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
import { promises as fs } from 'fs';
import * as path from 'path';
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
tap.test('PDF-03: Factur-X Extraction - should extract and validate Factur-X PDFs', async () => {
// Get ZUGFeRD v2/Factur-X PDF files from corpus
const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2_CORRECT');
const pdfFiles = zugferdV2Files.filter(f => f.endsWith('.pdf'));
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
console.log(`Testing Factur-X extraction from ${pdfFiles.length} PDFs`);
let successCount = 0;
let facturxDetectedCount = 0;
for (const filePath of pdfFiles.slice(0, 10)) { // Test first 10 for performance
const fileName = path.basename(filePath);
2025-05-25 19:45:37 +00:00
try {
2025-05-28 08:40:26 +00:00
const pdfBuffer = await fs.readFile(filePath);
const { result: invoice, metric } = await PerformanceTracker.track(
'facturx-extraction',
async () => {
return await EInvoice.fromPdf(pdfBuffer);
},
{ file: fileName }
);
expect(invoice).toBeTruthy();
const xml = invoice.getXml();
expect(xml).toBeTruthy();
expect(xml.length).toBeGreaterThan(100);
// Check for Factur-X/ZUGFeRD v2 specific markers
const isFacturX = xml.includes('urn:cen.eu:en16931:2017') ||
xml.includes('factur-x') ||
xml.includes('CrossIndustryInvoice') ||
xml.includes('urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100');
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
if (isFacturX) {
facturxDetectedCount++;
console.log(`${fileName}: Factur-X detected and extracted (${metric.duration.toFixed(2)}ms)`);
2025-05-25 19:45:37 +00:00
} else {
2025-05-28 08:40:26 +00:00
console.log(`${fileName}: Extracted but format unclear (${metric.duration.toFixed(2)}ms)`);
2025-05-25 19:45:37 +00:00
}
2025-05-28 08:40:26 +00:00
successCount++;
} catch (error) {
console.log(`${fileName}: ${error.message}`);
2025-05-25 19:45:37 +00:00
}
}
2025-05-28 08:40:26 +00:00
console.log(`\nFactur-X Extraction Summary:`);
console.log(` Total processed: ${Math.min(10, pdfFiles.length)}`);
console.log(` Successful extractions: ${successCount}`);
console.log(` Factur-X format detected: ${facturxDetectedCount}`);
// We expect most Factur-X files to be successfully extracted
expect(successCount).toBeGreaterThan(0);
2025-05-25 19:45:37 +00:00
});
2025-05-28 08:40:26 +00:00
tap.test('PDF-03: Factur-X Format Validation - should validate Factur-X specific elements', async () => {
// Get one Factur-X file for detailed validation
const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2_CORRECT');
const pdfFiles = zugferdV2Files.filter(f => f.endsWith('.pdf'));
if (pdfFiles.length === 0) {
console.log('No Factur-X PDFs found, skipping validation test');
return;
2025-05-25 19:45:37 +00:00
}
2025-05-28 08:40:26 +00:00
const testFile = pdfFiles[0];
const fileName = path.basename(testFile);
console.log(`Validating Factur-X format with: ${fileName}`);
const pdfBuffer = await fs.readFile(testFile);
const invoice = await EInvoice.fromPdf(pdfBuffer);
expect(invoice).toBeTruthy();
const xml = invoice.getXml();
expect(xml).toBeTruthy();
// Factur-X specific validations
console.log('Checking Factur-X format characteristics:');
// Should contain EN16931 namespace
const hasEN16931Namespace = xml.includes('urn:cen.eu:en16931:2017');
console.log(` EN16931 namespace: ${hasEN16931Namespace ? '✓' : '✗'}`);
// Should contain CrossIndustryInvoice root element (ZUGFeRD v2/Factur-X)
const hasCrossIndustryInvoice = xml.includes('<rsm:CrossIndustryInvoice') ||
xml.includes('<CrossIndustryInvoice');
console.log(` CrossIndustryInvoice root: ${hasCrossIndustryInvoice ? '✓' : '✗'}`);
// Should contain basic invoice elements
const hasInvoiceId = xml.includes('<ram:ID>');
console.log(` Invoice ID element: ${hasInvoiceId ? '✓' : '✗'}`);
const hasIssueDate = xml.includes('<ram:IssueDateTime>');
console.log(` Issue date element: ${hasIssueDate ? '✓' : '✗'}`);
// Check for profile specification
const hasProfileSpec = xml.includes('GuidelineSpecifiedDocumentContextParameter');
console.log(` Profile specification: ${hasProfileSpec ? '✓' : '✗'}`);
// Check format detection
const detectedFormat = invoice.getFormat();
console.log(` Detected format: ${detectedFormat}`);
// Basic validation - should have CrossIndustryInvoice for v2/Factur-X
expect(hasCrossIndustryInvoice).toBeTruthy();
expect(hasInvoiceId).toBeTruthy();
2025-05-25 19:45:37 +00:00
});
2025-05-28 08:40:26 +00:00
tap.test('PDF-03: Factur-X Profile Detection - should detect different Factur-X profiles', async () => {
const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2_CORRECT');
const pdfFiles = zugferdV2Files.filter(f => f.endsWith('.pdf'));
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
if (pdfFiles.length === 0) {
console.log('No Factur-X PDFs found, skipping profile detection test');
return;
}
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
console.log(`Testing profile detection with ${Math.min(5, pdfFiles.length)} Factur-X PDFs`);
const profileCounts = new Map<string, number>();
for (const filePath of pdfFiles.slice(0, 5)) {
const fileName = path.basename(filePath);
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
try {
const pdfBuffer = await fs.readFile(filePath);
const invoice = await EInvoice.fromPdf(pdfBuffer);
const xml = invoice.getXml();
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
// Detect profile from XML content
let profile = 'UNKNOWN';
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
if (xml.includes('basic')) {
profile = 'BASIC';
} else if (xml.includes('comfort')) {
profile = 'COMFORT';
} else if (xml.includes('extended')) {
profile = 'EXTENDED';
} else if (xml.includes('minimum')) {
profile = 'MINIMUM';
} else if (xml.includes('en16931')) {
profile = 'EN16931';
2025-05-25 19:45:37 +00:00
}
2025-05-28 08:40:26 +00:00
profileCounts.set(profile, (profileCounts.get(profile) || 0) + 1);
console.log(` ${fileName}: Profile ${profile}`);
} catch (error) {
console.log(` ${fileName}: Error - ${error.message}`);
2025-05-25 19:45:37 +00:00
}
}
2025-05-28 08:40:26 +00:00
console.log(`\nProfile Distribution:`);
for (const [profile, count] of profileCounts) {
console.log(` ${profile}: ${count} files`);
}
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
// Should have detected at least one profile
expect(profileCounts.size).toBeGreaterThan(0);
2025-05-25 19:45:37 +00:00
});
2025-05-28 08:40:26 +00:00
tap.test('PDF-03: Factur-X Performance - should extract Factur-X PDFs efficiently', async () => {
const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2_CORRECT');
const pdfFiles = zugferdV2Files.filter(f => f.endsWith('.pdf'));
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
if (pdfFiles.length === 0) {
console.log('No Factur-X PDFs found, skipping performance test');
return;
2025-05-25 19:45:37 +00:00
}
2025-05-28 08:40:26 +00:00
console.log(`Testing extraction performance with ${Math.min(5, pdfFiles.length)} Factur-X PDFs`);
const durations: number[] = [];
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
for (const filePath of pdfFiles.slice(0, 5)) {
const fileName = path.basename(filePath);
const pdfBuffer = await fs.readFile(filePath);
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
const { metric } = await PerformanceTracker.track(
'facturx-performance',
async () => {
return await EInvoice.fromPdf(pdfBuffer);
},
{ file: fileName }
);
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
durations.push(metric.duration);
console.log(` ${fileName}: ${metric.duration.toFixed(2)}ms`);
2025-05-25 19:45:37 +00:00
}
2025-05-28 08:40:26 +00:00
const avgDuration = durations.reduce((a, b) => a + b, 0) / durations.length;
const maxDuration = Math.max(...durations);
console.log(`\nPerformance Summary:`);
console.log(` Average: ${avgDuration.toFixed(2)}ms`);
console.log(` Maximum: ${maxDuration.toFixed(2)}ms`);
// Performance expectation - should complete within reasonable time
expect(avgDuration).toBeLessThan(1000); // Less than 1 second on average
expect(maxDuration).toBeLessThan(5000); // No single extraction over 5 seconds
2025-05-25 19:45:37 +00:00
});
2025-05-28 08:40:26 +00:00
tap.start();