2025-05-25 19:45:37 +00:00
|
|
|
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
2025-05-28 08:40:26 +00:00
|
|
|
import { EInvoice } from '../../../ts/index.js';
|
|
|
|
import { CorpusLoader } from '../../helpers/corpus.loader.js';
|
|
|
|
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
|
|
|
|
import { promises as fs } from 'fs';
|
|
|
|
import * as path from 'path';
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
tap.test('PDF-02: ZUGFeRD v1 Extraction - should extract and validate ZUGFeRD v1 PDFs', async () => {
|
|
|
|
// Get ZUGFeRD v1 PDF files from corpus
|
|
|
|
const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT');
|
|
|
|
const pdfFiles = zugferdV1Files.filter(f => f.endsWith('.pdf'));
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
console.log(`Testing ZUGFeRD v1 extraction from ${pdfFiles.length} PDFs`);
|
|
|
|
|
|
|
|
let successCount = 0;
|
|
|
|
let v1DetectedCount = 0;
|
|
|
|
|
|
|
|
for (const filePath of pdfFiles.slice(0, 10)) { // Test first 10 for performance
|
|
|
|
const fileName = path.basename(filePath);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
|
|
|
try {
|
2025-05-28 08:40:26 +00:00
|
|
|
const pdfBuffer = await fs.readFile(filePath);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
const { result: invoice, metric } = await PerformanceTracker.track(
|
|
|
|
'zugferd-v1-extraction',
|
|
|
|
async () => {
|
|
|
|
return await EInvoice.fromPdf(pdfBuffer);
|
|
|
|
},
|
|
|
|
{ file: fileName }
|
|
|
|
);
|
|
|
|
|
|
|
|
expect(invoice).toBeTruthy();
|
|
|
|
const xml = invoice.getXml();
|
|
|
|
expect(xml).toBeTruthy();
|
|
|
|
expect(xml.length).toBeGreaterThan(100);
|
|
|
|
|
|
|
|
// Check for ZUGFeRD v1 specific markers
|
|
|
|
const isZugferdV1 = xml.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') ||
|
|
|
|
xml.includes('CrossIndustryDocument') ||
|
|
|
|
(xml.includes('ZUGFeRD') && !xml.includes('CrossIndustryInvoice'));
|
|
|
|
|
|
|
|
if (isZugferdV1) {
|
|
|
|
v1DetectedCount++;
|
|
|
|
console.log(`✓ ${fileName}: ZUGFeRD v1 detected and extracted (${metric.duration.toFixed(2)}ms)`);
|
2025-05-25 19:45:37 +00:00
|
|
|
} else {
|
2025-05-28 08:40:26 +00:00
|
|
|
console.log(`✓ ${fileName}: Extracted but not ZUGFeRD v1 format (${metric.duration.toFixed(2)}ms)`);
|
2025-05-25 19:45:37 +00:00
|
|
|
}
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
successCount++;
|
|
|
|
|
|
|
|
} catch (error) {
|
|
|
|
console.log(`✗ ${fileName}: ${error.message}`);
|
2025-05-25 19:45:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
console.log(`\nZUGFeRD v1 Extraction Summary:`);
|
|
|
|
console.log(` Total processed: ${Math.min(10, pdfFiles.length)}`);
|
|
|
|
console.log(` Successful extractions: ${successCount}`);
|
|
|
|
console.log(` ZUGFeRD v1 format detected: ${v1DetectedCount}`);
|
|
|
|
|
|
|
|
// We expect most ZUGFeRD v1 files to be successfully extracted
|
|
|
|
expect(successCount).toBeGreaterThan(0);
|
2025-05-25 19:45:37 +00:00
|
|
|
});
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
tap.test('PDF-02: ZUGFeRD v1 Format Validation - should validate v1 specific elements', async () => {
|
|
|
|
// Get one ZUGFeRD v1 file for detailed validation
|
|
|
|
const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT');
|
|
|
|
const pdfFiles = zugferdV1Files.filter(f => f.endsWith('.pdf'));
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
if (pdfFiles.length === 0) {
|
|
|
|
console.log('No ZUGFeRD v1 PDFs found, skipping validation test');
|
|
|
|
return;
|
2025-05-25 19:45:37 +00:00
|
|
|
}
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
const testFile = pdfFiles[0];
|
|
|
|
const fileName = path.basename(testFile);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
console.log(`Validating ZUGFeRD v1 format with: ${fileName}`);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
const pdfBuffer = await fs.readFile(testFile);
|
|
|
|
const invoice = await EInvoice.fromPdf(pdfBuffer);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
expect(invoice).toBeTruthy();
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
const xml = invoice.getXml();
|
|
|
|
expect(xml).toBeTruthy();
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
// ZUGFeRD v1 specific validations
|
|
|
|
console.log('Checking ZUGFeRD v1 format characteristics:');
|
|
|
|
|
|
|
|
// Should contain ZUGFeRD v1 namespace
|
|
|
|
const hasV1Namespace = xml.includes('urn:ferd:CrossIndustryDocument:invoice:1p0');
|
|
|
|
console.log(` ZUGFeRD v1 namespace: ${hasV1Namespace ? '✓' : '✗'}`);
|
|
|
|
|
|
|
|
// Should contain CrossIndustryDocument root element
|
|
|
|
const hasCrossIndustryDocument = xml.includes('<rsm:CrossIndustryDocument') ||
|
|
|
|
xml.includes('<CrossIndustryDocument');
|
|
|
|
console.log(` CrossIndustryDocument root: ${hasCrossIndustryDocument ? '✓' : '✗'}`);
|
|
|
|
|
|
|
|
// Should contain basic invoice elements
|
|
|
|
const hasInvoiceId = xml.includes('<ram:ID>');
|
|
|
|
console.log(` Invoice ID element: ${hasInvoiceId ? '✓' : '✗'}`);
|
|
|
|
|
|
|
|
const hasIssueDate = xml.includes('<ram:IssueDateTime>');
|
|
|
|
console.log(` Issue date element: ${hasIssueDate ? '✓' : '✗'}`);
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
// Check format detection
|
|
|
|
const detectedFormat = invoice.getFormat();
|
|
|
|
console.log(` Detected format: ${detectedFormat}`);
|
|
|
|
|
|
|
|
// Basic validation - at least some ZUGFeRD v1 characteristics should be present
|
|
|
|
expect(hasCrossIndustryDocument || hasV1Namespace).toBeTruthy();
|
|
|
|
expect(hasInvoiceId).toBeTruthy();
|
2025-05-25 19:45:37 +00:00
|
|
|
});
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
tap.test('PDF-02: ZUGFeRD v1 Performance - should extract v1 PDFs efficiently', async () => {
|
|
|
|
const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT');
|
|
|
|
const pdfFiles = zugferdV1Files.filter(f => f.endsWith('.pdf'));
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
if (pdfFiles.length === 0) {
|
|
|
|
console.log('No ZUGFeRD v1 PDFs found, skipping performance test');
|
|
|
|
return;
|
|
|
|
}
|
2025-05-25 19:45:37 +00:00
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
console.log(`Testing extraction performance with ${Math.min(5, pdfFiles.length)} ZUGFeRD v1 PDFs`);
|
|
|
|
|
|
|
|
const durations: number[] = [];
|
|
|
|
|
|
|
|
for (const filePath of pdfFiles.slice(0, 5)) {
|
|
|
|
const fileName = path.basename(filePath);
|
|
|
|
const pdfBuffer = await fs.readFile(filePath);
|
|
|
|
|
|
|
|
const { metric } = await PerformanceTracker.track(
|
|
|
|
'zugferd-v1-performance',
|
|
|
|
async () => {
|
|
|
|
return await EInvoice.fromPdf(pdfBuffer);
|
|
|
|
},
|
|
|
|
{ file: fileName }
|
|
|
|
);
|
|
|
|
|
|
|
|
durations.push(metric.duration);
|
|
|
|
console.log(` ${fileName}: ${metric.duration.toFixed(2)}ms`);
|
2025-05-25 19:45:37 +00:00
|
|
|
}
|
|
|
|
|
2025-05-28 08:40:26 +00:00
|
|
|
const avgDuration = durations.reduce((a, b) => a + b, 0) / durations.length;
|
|
|
|
const maxDuration = Math.max(...durations);
|
|
|
|
|
|
|
|
console.log(`\nPerformance Summary:`);
|
|
|
|
console.log(` Average: ${avgDuration.toFixed(2)}ms`);
|
|
|
|
console.log(` Maximum: ${maxDuration.toFixed(2)}ms`);
|
|
|
|
|
|
|
|
// Performance expectation - should complete within reasonable time
|
|
|
|
expect(avgDuration).toBeLessThan(1000); // Less than 1 second on average
|
|
|
|
expect(maxDuration).toBeLessThan(5000); // No single extraction over 5 seconds
|
|
|
|
});
|
|
|
|
|
|
|
|
tap.start();
|