2025-05-25 19:45:37 +00:00
|
|
|
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
2025-05-28 08:40:26 +00:00
|
|
|
import { EInvoice } from '../../../ts/index.js';
|
|
|
|
import { CorpusLoader } from '../../helpers/corpus.loader.js';
|
|
|
|
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
|
|
|
|
import { promises as fs } from 'fs';
|
|
|
|
import * as path from 'path';
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
tap.test('PDF-03: Factur-X Extraction - should extract and validate Factur-X PDFs', async () => {
|
|
|
|
// Get ZUGFeRD v2/Factur-X PDF files from corpus
|
|
|
|
const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2_CORRECT');
|
|
|
|
const pdfFiles = zugferdV2Files.filter(f => f.endsWith('.pdf'));
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
console.log(`Testing Factur-X extraction from ${pdfFiles.length} PDFs`);
|
|
|
|
|
|
|
|
let successCount = 0;
|
|
|
|
let facturxDetectedCount = 0;
|
|
|
|
|
|
|
|
for (const filePath of pdfFiles.slice(0, 10)) { // Test first 10 for performance
|
|
|
|
const fileName = path.basename(filePath);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
|
|
|
try {
|
2025-05-28 08:40:26 +00:00
|
|
|
const pdfBuffer = await fs.readFile(filePath);
|
|
|
|
|
|
|
|
const { result: invoice, metric } = await PerformanceTracker.track(
|
|
|
|
'facturx-extraction',
|
|
|
|
async () => {
|
|
|
|
return await EInvoice.fromPdf(pdfBuffer);
|
|
|
|
},
|
|
|
|
{ file: fileName }
|
|
|
|
);
|
|
|
|
|
|
|
|
expect(invoice).toBeTruthy();
|
|
|
|
const xml = invoice.getXml();
|
|
|
|
expect(xml).toBeTruthy();
|
|
|
|
expect(xml.length).toBeGreaterThan(100);
|
|
|
|
|
|
|
|
// Check for Factur-X/ZUGFeRD v2 specific markers
|
|
|
|
const isFacturX = xml.includes('urn:cen.eu:en16931:2017') ||
|
|
|
|
xml.includes('factur-x') ||
|
|
|
|
xml.includes('CrossIndustryInvoice') ||
|
|
|
|
xml.includes('urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100');
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
if (isFacturX) {
|
|
|
|
facturxDetectedCount++;
|
|
|
|
console.log(`✓ ${fileName}: Factur-X detected and extracted (${metric.duration.toFixed(2)}ms)`);
|
2025-05-25 19:45:37 +00:00
|
|
|
} else {
|
2025-05-28 08:40:26 +00:00
|
|
|
console.log(`✓ ${fileName}: Extracted but format unclear (${metric.duration.toFixed(2)}ms)`);
|
2025-05-25 19:45:37 +00:00
|
|
|
}
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
successCount++;
|
|
|
|
|
|
|
|
} catch (error) {
|
|
|
|
console.log(`✗ ${fileName}: ${error.message}`);
|
2025-05-25 19:45:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
console.log(`\nFactur-X Extraction Summary:`);
|
|
|
|
console.log(` Total processed: ${Math.min(10, pdfFiles.length)}`);
|
|
|
|
console.log(` Successful extractions: ${successCount}`);
|
|
|
|
console.log(` Factur-X format detected: ${facturxDetectedCount}`);
|
|
|
|
|
|
|
|
// We expect most Factur-X files to be successfully extracted
|
|
|
|
expect(successCount).toBeGreaterThan(0);
|
2025-05-25 19:45:37 +00:00
|
|
|
});
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
tap.test('PDF-03: Factur-X Format Validation - should validate Factur-X specific elements', async () => {
|
|
|
|
// Get one Factur-X file for detailed validation
|
|
|
|
const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2_CORRECT');
|
|
|
|
const pdfFiles = zugferdV2Files.filter(f => f.endsWith('.pdf'));
|
|
|
|
|
|
|
|
if (pdfFiles.length === 0) {
|
|
|
|
console.log('No Factur-X PDFs found, skipping validation test');
|
|
|
|
return;
|
2025-05-25 19:45:37 +00:00
|
|
|
}
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
const testFile = pdfFiles[0];
|
|
|
|
const fileName = path.basename(testFile);
|
|
|
|
|
|
|
|
console.log(`Validating Factur-X format with: ${fileName}`);
|
|
|
|
|
|
|
|
const pdfBuffer = await fs.readFile(testFile);
|
|
|
|
const invoice = await EInvoice.fromPdf(pdfBuffer);
|
|
|
|
|
|
|
|
expect(invoice).toBeTruthy();
|
|
|
|
|
|
|
|
const xml = invoice.getXml();
|
|
|
|
expect(xml).toBeTruthy();
|
|
|
|
|
|
|
|
// Factur-X specific validations
|
|
|
|
console.log('Checking Factur-X format characteristics:');
|
|
|
|
|
|
|
|
// Should contain EN16931 namespace
|
|
|
|
const hasEN16931Namespace = xml.includes('urn:cen.eu:en16931:2017');
|
|
|
|
console.log(` EN16931 namespace: ${hasEN16931Namespace ? '✓' : '✗'}`);
|
|
|
|
|
|
|
|
// Should contain CrossIndustryInvoice root element (ZUGFeRD v2/Factur-X)
|
|
|
|
const hasCrossIndustryInvoice = xml.includes('<rsm:CrossIndustryInvoice') ||
|
|
|
|
xml.includes('<CrossIndustryInvoice');
|
|
|
|
console.log(` CrossIndustryInvoice root: ${hasCrossIndustryInvoice ? '✓' : '✗'}`);
|
|
|
|
|
|
|
|
// Should contain basic invoice elements
|
|
|
|
const hasInvoiceId = xml.includes('<ram:ID>');
|
|
|
|
console.log(` Invoice ID element: ${hasInvoiceId ? '✓' : '✗'}`);
|
|
|
|
|
|
|
|
const hasIssueDate = xml.includes('<ram:IssueDateTime>');
|
|
|
|
console.log(` Issue date element: ${hasIssueDate ? '✓' : '✗'}`);
|
|
|
|
|
|
|
|
// Check for profile specification
|
|
|
|
const hasProfileSpec = xml.includes('GuidelineSpecifiedDocumentContextParameter');
|
|
|
|
console.log(` Profile specification: ${hasProfileSpec ? '✓' : '✗'}`);
|
|
|
|
|
|
|
|
// Check format detection
|
|
|
|
const detectedFormat = invoice.getFormat();
|
|
|
|
console.log(` Detected format: ${detectedFormat}`);
|
|
|
|
|
|
|
|
// Basic validation - should have CrossIndustryInvoice for v2/Factur-X
|
|
|
|
expect(hasCrossIndustryInvoice).toBeTruthy();
|
|
|
|
expect(hasInvoiceId).toBeTruthy();
|
2025-05-25 19:45:37 +00:00
|
|
|
});
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
tap.test('PDF-03: Factur-X Profile Detection - should detect different Factur-X profiles', async () => {
|
|
|
|
const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2_CORRECT');
|
|
|
|
const pdfFiles = zugferdV2Files.filter(f => f.endsWith('.pdf'));
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
if (pdfFiles.length === 0) {
|
|
|
|
console.log('No Factur-X PDFs found, skipping profile detection test');
|
|
|
|
return;
|
|
|
|
}
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
console.log(`Testing profile detection with ${Math.min(5, pdfFiles.length)} Factur-X PDFs`);
|
|
|
|
|
|
|
|
const profileCounts = new Map<string, number>();
|
|
|
|
|
|
|
|
for (const filePath of pdfFiles.slice(0, 5)) {
|
|
|
|
const fileName = path.basename(filePath);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
try {
|
|
|
|
const pdfBuffer = await fs.readFile(filePath);
|
|
|
|
const invoice = await EInvoice.fromPdf(pdfBuffer);
|
|
|
|
const xml = invoice.getXml();
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
// Detect profile from XML content
|
|
|
|
let profile = 'UNKNOWN';
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
if (xml.includes('basic')) {
|
|
|
|
profile = 'BASIC';
|
|
|
|
} else if (xml.includes('comfort')) {
|
|
|
|
profile = 'COMFORT';
|
|
|
|
} else if (xml.includes('extended')) {
|
|
|
|
profile = 'EXTENDED';
|
|
|
|
} else if (xml.includes('minimum')) {
|
|
|
|
profile = 'MINIMUM';
|
|
|
|
} else if (xml.includes('en16931')) {
|
|
|
|
profile = 'EN16931';
|
2025-05-25 19:45:37 +00:00
|
|
|
}
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
profileCounts.set(profile, (profileCounts.get(profile) || 0) + 1);
|
|
|
|
console.log(` ${fileName}: Profile ${profile}`);
|
|
|
|
|
|
|
|
} catch (error) {
|
|
|
|
console.log(` ${fileName}: Error - ${error.message}`);
|
2025-05-25 19:45:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
console.log(`\nProfile Distribution:`);
|
|
|
|
for (const [profile, count] of profileCounts) {
|
|
|
|
console.log(` ${profile}: ${count} files`);
|
|
|
|
}
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
// Should have detected at least one profile
|
|
|
|
expect(profileCounts.size).toBeGreaterThan(0);
|
2025-05-25 19:45:37 +00:00
|
|
|
});
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
tap.test('PDF-03: Factur-X Performance - should extract Factur-X PDFs efficiently', async () => {
|
|
|
|
const zugferdV2Files = await CorpusLoader.getFiles('ZUGFERD_V2_CORRECT');
|
|
|
|
const pdfFiles = zugferdV2Files.filter(f => f.endsWith('.pdf'));
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
if (pdfFiles.length === 0) {
|
|
|
|
console.log('No Factur-X PDFs found, skipping performance test');
|
|
|
|
return;
|
2025-05-25 19:45:37 +00:00
|
|
|
}
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
console.log(`Testing extraction performance with ${Math.min(5, pdfFiles.length)} Factur-X PDFs`);
|
|
|
|
|
|
|
|
const durations: number[] = [];
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
for (const filePath of pdfFiles.slice(0, 5)) {
|
|
|
|
const fileName = path.basename(filePath);
|
|
|
|
const pdfBuffer = await fs.readFile(filePath);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
const { metric } = await PerformanceTracker.track(
|
|
|
|
'facturx-performance',
|
|
|
|
async () => {
|
|
|
|
return await EInvoice.fromPdf(pdfBuffer);
|
|
|
|
},
|
|
|
|
{ file: fileName }
|
|
|
|
);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
durations.push(metric.duration);
|
|
|
|
console.log(` ${fileName}: ${metric.duration.toFixed(2)}ms`);
|
2025-05-25 19:45:37 +00:00
|
|
|
}
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
const avgDuration = durations.reduce((a, b) => a + b, 0) / durations.length;
|
|
|
|
const maxDuration = Math.max(...durations);
|
|
|
|
|
|
|
|
console.log(`\nPerformance Summary:`);
|
|
|
|
console.log(` Average: ${avgDuration.toFixed(2)}ms`);
|
|
|
|
console.log(` Maximum: ${maxDuration.toFixed(2)}ms`);
|
|
|
|
|
|
|
|
// Performance expectation - should complete within reasonable time
|
|
|
|
expect(avgDuration).toBeLessThan(1000); // Less than 1 second on average
|
|
|
|
expect(maxDuration).toBeLessThan(5000); // No single extraction over 5 seconds
|
2025-05-25 19:45:37 +00:00
|
|
|
});
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
tap.start();
|