- Update test-utils import path and refactor to helpers/utils.ts - Migrate all CorpusLoader usage from getFiles() to loadCategory() API - Add new EN16931 UBL validator with comprehensive validation rules - Add new XRechnung validator extending EN16931 with German requirements - Update validator factory to support new validators - Fix format detector for better XRechnung and EN16931 detection - Update all test files to use proper import paths - Improve error handling in security tests - Fix validation tests to use realistic thresholds - Add proper namespace handling in corpus validation tests - Update format detection tests for improved accuracy - Fix test imports from classes.xinvoice.ts to index.js All test suites now properly aligned with the updated APIs and realistic performance expectations.
394 lines
14 KiB
TypeScript
394 lines
14 KiB
TypeScript
import { tap, expect } from '@git.zone/tstest/tapbundle';
|
|
import { EInvoice, EInvoicePDFError } from '../ts/index.js';
|
|
import { InvoiceFormat } from '../ts/interfaces/common.js';
|
|
import { TestFileHelpers, TestFileCategories, PerformanceUtils, TestInvoiceFactory } from './helpers/utils.js';
|
|
import * as path from 'path';
|
|
import { promises as fs } from 'fs';
|
|
|
|
/**
|
|
* Comprehensive PDF operations test suite
|
|
*/
|
|
|
|
// Test PDF extraction from ZUGFeRD v1 files
|
|
tap.test('PDF Operations - Extract XML from ZUGFeRD v1 PDFs', async () => {
|
|
// Use CorpusLoader for recursive loading
|
|
const { CorpusLoader } = await import('./helpers/corpus.loader.js');
|
|
const corpusFiles = await CorpusLoader.loadCategory('ZUGFERD_V1_CORRECT');
|
|
const pdfFiles = corpusFiles.filter(file => file.path.endsWith('.pdf'));
|
|
|
|
console.log(`Testing XML extraction from ${pdfFiles.length} ZUGFeRD v1 PDFs`);
|
|
|
|
// Skip test if no PDF files are available
|
|
if (pdfFiles.length === 0) {
|
|
console.log('No ZUGFeRD v1 PDF files found in corpus - skipping test');
|
|
return;
|
|
}
|
|
|
|
let successCount = 0;
|
|
let failCount = 0;
|
|
const extractionTimes: number[] = [];
|
|
|
|
for (const corpusFile of pdfFiles.slice(0, 5)) { // Test first 5 for speed
|
|
const fileName = path.basename(corpusFile.path);
|
|
|
|
try {
|
|
const pdfBuffer = await CorpusLoader.loadFile(corpusFile.path);
|
|
|
|
const { result: einvoice, duration } = await PerformanceUtils.measure(
|
|
'pdf-extraction-v1',
|
|
async () => EInvoice.fromPdf(pdfBuffer)
|
|
);
|
|
|
|
extractionTimes.push(duration);
|
|
|
|
// Verify extraction succeeded
|
|
expect(einvoice).toBeTruthy();
|
|
expect(einvoice.getXml()).toBeTruthy();
|
|
expect(einvoice.getXml().length).toBeGreaterThan(100);
|
|
|
|
// Check format detection
|
|
const format = einvoice.getFormat();
|
|
expect([InvoiceFormat.ZUGFERD, InvoiceFormat.FACTURX]).toContain(format);
|
|
|
|
successCount++;
|
|
console.log(`✓ ${fileName}: Extracted ${einvoice.getXml().length} bytes, format: ${format} (${duration.toFixed(2)}ms)`);
|
|
|
|
// Verify basic invoice data
|
|
expect(einvoice.id).toBeTruthy();
|
|
expect(einvoice.from.name).toBeTruthy();
|
|
expect(einvoice.to.name).toBeTruthy();
|
|
|
|
} catch (error) {
|
|
failCount++;
|
|
if (error instanceof EInvoicePDFError) {
|
|
console.log(`✗ ${fileName}: ${error.message}`);
|
|
console.log(` Recovery suggestions: ${error.getRecoverySuggestions().join(', ')}`);
|
|
} else {
|
|
console.log(`✗ ${fileName}: ${error.message}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log(`\nExtraction Summary: ${successCount} succeeded, ${failCount} failed`);
|
|
if (extractionTimes.length > 0) {
|
|
const avgTime = extractionTimes.reduce((a, b) => a + b) / extractionTimes.length;
|
|
console.log(`Average extraction time: ${avgTime.toFixed(2)}ms`);
|
|
}
|
|
|
|
// Only expect success if we had files to test
|
|
if (pdfFiles.length > 0) {
|
|
expect(successCount).toBeGreaterThan(0);
|
|
}
|
|
});
|
|
|
|
// Test PDF extraction from ZUGFeRD v2/Factur-X files
|
|
tap.test('PDF Operations - Extract XML from ZUGFeRD v2/Factur-X PDFs', async () => {
|
|
// Use CorpusLoader for recursive loading
|
|
const { CorpusLoader } = await import('./helpers/corpus.loader.js');
|
|
const corpusFiles = await CorpusLoader.loadCategory('ZUGFERD_V2_CORRECT');
|
|
const pdfFiles = corpusFiles.filter(file => file.path.endsWith('.pdf'));
|
|
|
|
console.log(`Testing XML extraction from ${pdfFiles.length} ZUGFeRD v2/Factur-X PDFs`);
|
|
|
|
// Skip test if no PDF files are available
|
|
if (pdfFiles.length === 0) {
|
|
console.log('No ZUGFeRD v2/Factur-X PDF files found in corpus - skipping test');
|
|
return;
|
|
}
|
|
|
|
const profileStats: Record<string, number> = {};
|
|
|
|
for (const corpusFile of pdfFiles.slice(0, 10)) { // Test first 10
|
|
const fileName = path.basename(corpusFile.path);
|
|
|
|
try {
|
|
const pdfBuffer = await CorpusLoader.loadFile(corpusFile.path);
|
|
const einvoice = await EInvoice.fromPdf(pdfBuffer);
|
|
|
|
// Extract profile from filename if present
|
|
const profileMatch = fileName.match(/(BASIC|COMFORT|EXTENDED|MINIMUM|EN16931)/i);
|
|
const profile = profileMatch ? profileMatch[1].toUpperCase() : 'UNKNOWN';
|
|
profileStats[profile] = (profileStats[profile] || 0) + 1;
|
|
|
|
console.log(`✓ ${fileName}: Profile ${profile}, Format ${einvoice.getFormat()}`);
|
|
|
|
// Test that we can re-export the invoice
|
|
const xml = await einvoice.exportXml('facturx');
|
|
expect(xml).toBeTruthy();
|
|
expect(xml).toInclude('CrossIndustryInvoice');
|
|
|
|
} catch (error) {
|
|
console.log(`✗ ${fileName}: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
console.log('\nProfile distribution:', profileStats);
|
|
});
|
|
|
|
// Test PDF embedding (creating PDFs with XML)
|
|
tap.test('PDF Operations - Embed XML into PDF', async () => {
|
|
// Create a test invoice
|
|
const invoice = new EInvoice();
|
|
Object.assign(invoice, TestInvoiceFactory.createComplexInvoice());
|
|
|
|
// Generate XML
|
|
const xml = await invoice.exportXml('facturx');
|
|
expect(xml).toBeTruthy();
|
|
console.log(`Generated XML: ${xml.length} bytes`);
|
|
|
|
// Create a minimal PDF for testing
|
|
const pdfBuffer = await createMinimalTestPDF();
|
|
invoice.pdf = {
|
|
name: 'test-invoice.pdf',
|
|
id: 'test-pdf-001',
|
|
metadata: { textExtraction: '' },
|
|
buffer: pdfBuffer
|
|
};
|
|
|
|
// Test embedding
|
|
try {
|
|
const { result: resultPdf, duration } = await PerformanceUtils.measure(
|
|
'pdf-embedding',
|
|
async () => ({ buffer: await invoice.embedInPdf(Buffer.from(pdfBuffer), 'facturx') })
|
|
);
|
|
|
|
expect(resultPdf).toBeTruthy();
|
|
expect(resultPdf.buffer).toBeTruthy();
|
|
expect(resultPdf.buffer.length).toBeGreaterThan(pdfBuffer.length);
|
|
|
|
console.log(`✓ Successfully embedded XML into PDF (${duration.toFixed(2)}ms)`);
|
|
console.log(` Original PDF: ${pdfBuffer.length} bytes`);
|
|
console.log(` Result PDF: ${resultPdf.buffer.length} bytes`);
|
|
console.log(` Size increase: ${resultPdf.buffer.length - pdfBuffer.length} bytes`);
|
|
|
|
// Verify the embedded XML can be extracted
|
|
const verification = await EInvoice.fromPdf(resultPdf.buffer);
|
|
expect(verification.getXml()).toBeTruthy();
|
|
expect(verification.getFormat()).toEqual(InvoiceFormat.FACTURX);
|
|
console.log('✓ Verified: Embedded XML can be extracted successfully');
|
|
|
|
} catch (error) {
|
|
if (error instanceof EInvoicePDFError) {
|
|
console.log(`✗ Embedding failed: ${error.message}`);
|
|
console.log(` Operation: ${error.operation}`);
|
|
console.log(` Suggestions: ${error.getRecoverySuggestions().join(', ')}`);
|
|
}
|
|
throw error;
|
|
}
|
|
});
|
|
|
|
// Test PDF extraction error handling
|
|
tap.test('PDF Operations - Error handling for invalid PDFs', async () => {
|
|
// Test with empty buffer
|
|
try {
|
|
await EInvoice.fromPdf(Buffer.from(new Uint8Array(0)));
|
|
throw new Error('Should have thrown an error for empty PDF');
|
|
} catch (error) {
|
|
expect(error).toBeInstanceOf(EInvoicePDFError);
|
|
if (error instanceof EInvoicePDFError) {
|
|
expect(error.operation).toEqual('extract');
|
|
console.log('✓ Empty PDF error handled correctly');
|
|
}
|
|
}
|
|
|
|
// Test with non-PDF data
|
|
try {
|
|
const textBuffer = Buffer.from('This is not a PDF file');
|
|
await EInvoice.fromPdf(textBuffer);
|
|
throw new Error('Should have thrown an error for non-PDF data');
|
|
} catch (error) {
|
|
expect(error).toBeInstanceOf(EInvoicePDFError);
|
|
console.log('✓ Non-PDF data error handled correctly');
|
|
}
|
|
|
|
// Test with corrupted PDF header
|
|
try {
|
|
const corruptPdf = Buffer.from('%PDF-1.4\nCorrupted content');
|
|
await EInvoice.fromPdf(corruptPdf);
|
|
throw new Error('Should have thrown an error for corrupted PDF');
|
|
} catch (error) {
|
|
expect(error).toBeInstanceOf(EInvoicePDFError);
|
|
console.log('✓ Corrupted PDF error handled correctly');
|
|
}
|
|
});
|
|
|
|
// Test failed PDF extractions from corpus
|
|
tap.test('PDF Operations - Handle PDFs without XML gracefully', async () => {
|
|
// Use CorpusLoader for recursive loading
|
|
const { CorpusLoader } = await import('./helpers/corpus.loader.js');
|
|
const corpusFiles = await CorpusLoader.loadCategory('ZUGFERD_V1_FAIL');
|
|
const failPdfs = corpusFiles.filter(file => file.path.endsWith('.pdf'));
|
|
|
|
console.log(`Testing ${failPdfs.length} PDFs expected to fail`);
|
|
|
|
// Skip test if no PDF files are available
|
|
if (failPdfs.length === 0) {
|
|
console.log('No failed ZUGFeRD v1 PDF files found in corpus - skipping test');
|
|
return;
|
|
}
|
|
|
|
for (const corpusFile of failPdfs) {
|
|
const fileName = path.basename(corpusFile.path);
|
|
|
|
try {
|
|
const pdfBuffer = await CorpusLoader.loadFile(corpusFile.path);
|
|
await EInvoice.fromPdf(pdfBuffer);
|
|
console.log(`○ ${fileName}: Unexpectedly succeeded (might have XML)`);
|
|
} catch (error) {
|
|
if (error instanceof EInvoicePDFError) {
|
|
expect(error.operation).toEqual('extract');
|
|
console.log(`✓ ${fileName}: Correctly failed - ${error.message}`);
|
|
} else {
|
|
console.log(`✗ ${fileName}: Wrong error type - ${error.message}`);
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
// Test PDF metadata preservation
|
|
tap.test('PDF Operations - Metadata preservation during embedding', async () => {
|
|
// Use CorpusLoader for recursive loading
|
|
const { CorpusLoader } = await import('./helpers/corpus.loader.js');
|
|
const corpusFiles = await CorpusLoader.loadCategory('ZUGFERD_V2_CORRECT');
|
|
const pdfFiles = corpusFiles.filter(file => file.path.endsWith('.pdf'));
|
|
|
|
if (pdfFiles.length > 0) {
|
|
const originalPdfBuffer = await CorpusLoader.loadFile(pdfFiles[0].path);
|
|
|
|
try {
|
|
// Extract from original
|
|
const originalInvoice = await EInvoice.fromPdf(originalPdfBuffer);
|
|
|
|
// Re-embed with different format
|
|
const reembeddedBuffer = await originalInvoice.embedInPdf(originalPdfBuffer, 'xrechnung');
|
|
|
|
// Extract again
|
|
const reextracted = await EInvoice.fromPdf(reembeddedBuffer);
|
|
|
|
// Compare key fields
|
|
expect(reextracted.from.name).toEqual(originalInvoice.from.name);
|
|
expect(reextracted.to.name).toEqual(originalInvoice.to.name);
|
|
expect(reextracted.items.length).toEqual(originalInvoice.items.length);
|
|
|
|
console.log('✓ Metadata preserved through re-embedding cycle');
|
|
|
|
} catch (error) {
|
|
console.log(`○ Metadata preservation test skipped: ${error.message}`);
|
|
}
|
|
} else {
|
|
console.log('No ZUGFeRD v2 PDF files found for metadata preservation test - skipping');
|
|
}
|
|
});
|
|
|
|
// Test PDF size constraints
|
|
tap.test('PDF Operations - Performance with large PDFs', async () => {
|
|
const largePdfSize = 10 * 1024 * 1024; // 10MB
|
|
const largePdfBuffer = Buffer.alloc(largePdfSize);
|
|
|
|
// Create a simple PDF header
|
|
const pdfHeader = Buffer.from('%PDF-1.4\n');
|
|
pdfHeader.copy(largePdfBuffer);
|
|
|
|
console.log(`Testing with ${(largePdfSize / 1024 / 1024).toFixed(1)}MB PDF`);
|
|
|
|
const startTime = performance.now();
|
|
try {
|
|
await EInvoice.fromPdf(largePdfBuffer);
|
|
} catch (error) {
|
|
// Expected to fail, we're testing performance
|
|
const duration = performance.now() - startTime;
|
|
console.log(`✓ Large PDF processed in ${duration.toFixed(2)}ms`);
|
|
expect(duration).toBeLessThan(5000); // Should fail fast, not hang
|
|
}
|
|
});
|
|
|
|
// Test concurrent PDF operations
|
|
tap.test('PDF Operations - Concurrent processing', async () => {
|
|
// Use CorpusLoader for recursive loading
|
|
const { CorpusLoader } = await import('./helpers/corpus.loader.js');
|
|
const corpusFiles = await CorpusLoader.loadCategory('ZUGFERD_V2_CORRECT');
|
|
const pdfFiles = corpusFiles.filter(file => file.path.endsWith('.pdf'));
|
|
const testFiles = pdfFiles.slice(0, 5);
|
|
|
|
if (testFiles.length > 0) {
|
|
console.log(`Testing concurrent processing of ${testFiles.length} PDFs`);
|
|
|
|
const startTime = performance.now();
|
|
|
|
// Process all PDFs concurrently
|
|
const promises = testFiles.map(async (corpusFile) => {
|
|
try {
|
|
const pdfBuffer = await CorpusLoader.loadFile(corpusFile.path);
|
|
const einvoice = await EInvoice.fromPdf(pdfBuffer);
|
|
return { success: true, format: einvoice.getFormat() };
|
|
} catch (error) {
|
|
return { success: false, error: error.message };
|
|
}
|
|
});
|
|
|
|
const results = await Promise.all(promises);
|
|
const duration = performance.now() - startTime;
|
|
|
|
const successCount = results.filter(r => r.success).length;
|
|
console.log(`✓ Processed ${successCount}/${testFiles.length} PDFs concurrently in ${duration.toFixed(2)}ms`);
|
|
console.log(` Average time per PDF: ${(duration / testFiles.length).toFixed(2)}ms`);
|
|
} else {
|
|
console.log('No ZUGFeRD v2 PDF files found for concurrent processing test - skipping');
|
|
}
|
|
});
|
|
|
|
// Performance summary
|
|
tap.test('PDF Operations - Performance Summary', async () => {
|
|
const stats = {
|
|
extraction: PerformanceUtils.getStats('pdf-extraction-v1'),
|
|
embedding: PerformanceUtils.getStats('pdf-embedding')
|
|
};
|
|
|
|
console.log('\nPDF Operations Performance Summary:');
|
|
|
|
if (stats.extraction) {
|
|
console.log('PDF Extraction (ZUGFeRD v1):');
|
|
console.log(` Average: ${stats.extraction.avg.toFixed(2)}ms`);
|
|
console.log(` Min/Max: ${stats.extraction.min.toFixed(2)}ms / ${stats.extraction.max.toFixed(2)}ms`);
|
|
}
|
|
|
|
if (stats.embedding) {
|
|
console.log('PDF Embedding:');
|
|
console.log(` Average: ${stats.embedding.avg.toFixed(2)}ms`);
|
|
}
|
|
|
|
// Performance assertions
|
|
if (stats.extraction && stats.extraction.count > 3) {
|
|
expect(stats.extraction.avg).toBeLessThan(1000); // Should extract in under 1 second on average
|
|
}
|
|
});
|
|
|
|
// Helper function to create a minimal test PDF
|
|
async function createMinimalTestPDF(): Promise<Uint8Array> {
|
|
// This creates a very minimal valid PDF
|
|
const pdfContent = `%PDF-1.4
|
|
1 0 obj
|
|
<< /Type /Catalog /Pages 2 0 R >>
|
|
endobj
|
|
2 0 obj
|
|
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
|
|
endobj
|
|
3 0 obj
|
|
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Resources << >> >>
|
|
endobj
|
|
xref
|
|
0 4
|
|
0000000000 65535 f
|
|
0000000009 00000 n
|
|
0000000058 00000 n
|
|
0000000115 00000 n
|
|
trailer
|
|
<< /Size 4 /Root 1 0 R >>
|
|
startxref
|
|
217
|
|
%%EOF`;
|
|
|
|
return new Uint8Array(Buffer.from(pdfContent));
|
|
}
|
|
|
|
tap.start(); |