einvoice/test/suite/einvoice_pdf-operations/test.pdf-02.zugferd-v1-extraction.ts
2025-05-28 08:40:26 +00:00

157 lines
5.8 KiB
TypeScript

import { tap, expect } from '@git.zone/tstest/tapbundle';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../../helpers/corpus.loader.js';
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
import { promises as fs } from 'fs';
import * as path from 'path';
tap.test('PDF-02: ZUGFeRD v1 Extraction - should extract and validate ZUGFeRD v1 PDFs', async () => {
// Get ZUGFeRD v1 PDF files from corpus
const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT');
const pdfFiles = zugferdV1Files.filter(f => f.endsWith('.pdf'));
console.log(`Testing ZUGFeRD v1 extraction from ${pdfFiles.length} PDFs`);
let successCount = 0;
let v1DetectedCount = 0;
for (const filePath of pdfFiles.slice(0, 10)) { // Test first 10 for performance
const fileName = path.basename(filePath);
try {
const pdfBuffer = await fs.readFile(filePath);
const { result: invoice, metric } = await PerformanceTracker.track(
'zugferd-v1-extraction',
async () => {
return await EInvoice.fromPdf(pdfBuffer);
},
{ file: fileName }
);
expect(invoice).toBeTruthy();
const xml = invoice.getXml();
expect(xml).toBeTruthy();
expect(xml.length).toBeGreaterThan(100);
// Check for ZUGFeRD v1 specific markers
const isZugferdV1 = xml.includes('urn:ferd:CrossIndustryDocument:invoice:1p0') ||
xml.includes('CrossIndustryDocument') ||
(xml.includes('ZUGFeRD') && !xml.includes('CrossIndustryInvoice'));
if (isZugferdV1) {
v1DetectedCount++;
console.log(`${fileName}: ZUGFeRD v1 detected and extracted (${metric.duration.toFixed(2)}ms)`);
} else {
console.log(`${fileName}: Extracted but not ZUGFeRD v1 format (${metric.duration.toFixed(2)}ms)`);
}
successCount++;
} catch (error) {
console.log(`${fileName}: ${error.message}`);
}
}
console.log(`\nZUGFeRD v1 Extraction Summary:`);
console.log(` Total processed: ${Math.min(10, pdfFiles.length)}`);
console.log(` Successful extractions: ${successCount}`);
console.log(` ZUGFeRD v1 format detected: ${v1DetectedCount}`);
// We expect most ZUGFeRD v1 files to be successfully extracted
expect(successCount).toBeGreaterThan(0);
});
tap.test('PDF-02: ZUGFeRD v1 Format Validation - should validate v1 specific elements', async () => {
// Get one ZUGFeRD v1 file for detailed validation
const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT');
const pdfFiles = zugferdV1Files.filter(f => f.endsWith('.pdf'));
if (pdfFiles.length === 0) {
console.log('No ZUGFeRD v1 PDFs found, skipping validation test');
return;
}
const testFile = pdfFiles[0];
const fileName = path.basename(testFile);
console.log(`Validating ZUGFeRD v1 format with: ${fileName}`);
const pdfBuffer = await fs.readFile(testFile);
const invoice = await EInvoice.fromPdf(pdfBuffer);
expect(invoice).toBeTruthy();
const xml = invoice.getXml();
expect(xml).toBeTruthy();
// ZUGFeRD v1 specific validations
console.log('Checking ZUGFeRD v1 format characteristics:');
// Should contain ZUGFeRD v1 namespace
const hasV1Namespace = xml.includes('urn:ferd:CrossIndustryDocument:invoice:1p0');
console.log(` ZUGFeRD v1 namespace: ${hasV1Namespace ? '✓' : '✗'}`);
// Should contain CrossIndustryDocument root element
const hasCrossIndustryDocument = xml.includes('<rsm:CrossIndustryDocument') ||
xml.includes('<CrossIndustryDocument');
console.log(` CrossIndustryDocument root: ${hasCrossIndustryDocument ? '✓' : '✗'}`);
// Should contain basic invoice elements
const hasInvoiceId = xml.includes('<ram:ID>');
console.log(` Invoice ID element: ${hasInvoiceId ? '✓' : '✗'}`);
const hasIssueDate = xml.includes('<ram:IssueDateTime>');
console.log(` Issue date element: ${hasIssueDate ? '✓' : '✗'}`);
// Check format detection
const detectedFormat = invoice.getFormat();
console.log(` Detected format: ${detectedFormat}`);
// Basic validation - at least some ZUGFeRD v1 characteristics should be present
expect(hasCrossIndustryDocument || hasV1Namespace).toBeTruthy();
expect(hasInvoiceId).toBeTruthy();
});
tap.test('PDF-02: ZUGFeRD v1 Performance - should extract v1 PDFs efficiently', async () => {
const zugferdV1Files = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT');
const pdfFiles = zugferdV1Files.filter(f => f.endsWith('.pdf'));
if (pdfFiles.length === 0) {
console.log('No ZUGFeRD v1 PDFs found, skipping performance test');
return;
}
console.log(`Testing extraction performance with ${Math.min(5, pdfFiles.length)} ZUGFeRD v1 PDFs`);
const durations: number[] = [];
for (const filePath of pdfFiles.slice(0, 5)) {
const fileName = path.basename(filePath);
const pdfBuffer = await fs.readFile(filePath);
const { metric } = await PerformanceTracker.track(
'zugferd-v1-performance',
async () => {
return await EInvoice.fromPdf(pdfBuffer);
},
{ file: fileName }
);
durations.push(metric.duration);
console.log(` ${fileName}: ${metric.duration.toFixed(2)}ms`);
}
const avgDuration = durations.reduce((a, b) => a + b, 0) / durations.length;
const maxDuration = Math.max(...durations);
console.log(`\nPerformance Summary:`);
console.log(` Average: ${avgDuration.toFixed(2)}ms`);
console.log(` Maximum: ${maxDuration.toFixed(2)}ms`);
// Performance expectation - should complete within reasonable time
expect(avgDuration).toBeLessThan(1000); // Less than 1 second on average
expect(maxDuration).toBeLessThan(5000); // No single extraction over 5 seconds
});
tap.start();