352 lines
12 KiB
TypeScript
352 lines
12 KiB
TypeScript
import { tap, expect } from '@push.rocks/tapbundle';
|
|
import { EInvoice, EInvoicePDFError } from '../ts/index.js';
|
|
import { InvoiceFormat } from '../ts/interfaces/common.js';
|
|
import { TestFileHelpers, TestFileCategories, PerformanceUtils, TestInvoiceFactory } from './test-utils.js';
|
|
import * as path from 'path';
|
|
import { promises as fs } from 'fs';
|
|
|
|
/**
|
|
* Comprehensive PDF operations test suite
|
|
*/
|
|
|
|
// Test PDF extraction from ZUGFeRD v1 files
|
|
tap.test('PDF Operations - Extract XML from ZUGFeRD v1 PDFs', async () => {
|
|
const pdfFiles = await TestFileHelpers.getTestFiles(TestFileCategories.ZUGFERD_V1_CORRECT, '*.pdf');
|
|
console.log(`Testing XML extraction from ${pdfFiles.length} ZUGFeRD v1 PDFs`);
|
|
|
|
let successCount = 0;
|
|
let failCount = 0;
|
|
const extractionTimes: number[] = [];
|
|
|
|
for (const file of pdfFiles.slice(0, 5)) { // Test first 5 for speed
|
|
const fileName = path.basename(file);
|
|
|
|
try {
|
|
const pdfBuffer = await TestFileHelpers.loadTestFile(file);
|
|
|
|
const { result: einvoice, duration } = await PerformanceUtils.measure(
|
|
'pdf-extraction-v1',
|
|
async () => EInvoice.fromPdf(pdfBuffer)
|
|
);
|
|
|
|
extractionTimes.push(duration);
|
|
|
|
// Verify extraction succeeded
|
|
expect(einvoice).toBeTruthy();
|
|
expect(einvoice.getXml()).toBeTruthy();
|
|
expect(einvoice.getXml().length).toBeGreaterThan(100);
|
|
|
|
// Check format detection
|
|
const format = einvoice.getFormat();
|
|
expect([InvoiceFormat.ZUGFERD, InvoiceFormat.FACTURX]).toContain(format);
|
|
|
|
successCount++;
|
|
console.log(`✓ ${fileName}: Extracted ${einvoice.getXml().length} bytes, format: ${format} (${duration.toFixed(2)}ms)`);
|
|
|
|
// Verify basic invoice data
|
|
expect(einvoice.id).toBeTruthy();
|
|
expect(einvoice.from.name).toBeTruthy();
|
|
expect(einvoice.to.name).toBeTruthy();
|
|
|
|
} catch (error) {
|
|
failCount++;
|
|
if (error instanceof EInvoicePDFError) {
|
|
console.log(`✗ ${fileName}: ${error.message}`);
|
|
console.log(` Recovery suggestions: ${error.getRecoverySuggestions().join(', ')}`);
|
|
} else {
|
|
console.log(`✗ ${fileName}: ${error.message}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log(`\nExtraction Summary: ${successCount} succeeded, ${failCount} failed`);
|
|
if (extractionTimes.length > 0) {
|
|
const avgTime = extractionTimes.reduce((a, b) => a + b) / extractionTimes.length;
|
|
console.log(`Average extraction time: ${avgTime.toFixed(2)}ms`);
|
|
}
|
|
|
|
expect(successCount).toBeGreaterThan(0);
|
|
});
|
|
|
|
// Test PDF extraction from ZUGFeRD v2/Factur-X files
|
|
tap.test('PDF Operations - Extract XML from ZUGFeRD v2/Factur-X PDFs', async () => {
|
|
const pdfFiles = await TestFileHelpers.getTestFiles(TestFileCategories.ZUGFERD_V2_CORRECT, '*.pdf');
|
|
console.log(`Testing XML extraction from ${pdfFiles.length} ZUGFeRD v2/Factur-X PDFs`);
|
|
|
|
const profileStats: Record<string, number> = {};
|
|
|
|
for (const file of pdfFiles.slice(0, 10)) { // Test first 10
|
|
const fileName = path.basename(file);
|
|
|
|
try {
|
|
const pdfBuffer = await TestFileHelpers.loadTestFile(file);
|
|
const einvoice = await EInvoice.fromPdf(pdfBuffer);
|
|
|
|
// Extract profile from filename if present
|
|
const profileMatch = fileName.match(/(BASIC|COMFORT|EXTENDED|MINIMUM|EN16931)/i);
|
|
const profile = profileMatch ? profileMatch[1].toUpperCase() : 'UNKNOWN';
|
|
profileStats[profile] = (profileStats[profile] || 0) + 1;
|
|
|
|
console.log(`✓ ${fileName}: Profile ${profile}, Format ${einvoice.getFormat()}`);
|
|
|
|
// Test that we can re-export the invoice
|
|
const xml = await einvoice.exportXml('facturx');
|
|
expect(xml).toBeTruthy();
|
|
expect(xml).toInclude('CrossIndustryInvoice');
|
|
|
|
} catch (error) {
|
|
console.log(`✗ ${fileName}: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
console.log('\nProfile distribution:', profileStats);
|
|
});
|
|
|
|
// Test PDF embedding (creating PDFs with XML)
|
|
tap.test('PDF Operations - Embed XML into PDF', async () => {
|
|
// Create a test invoice
|
|
const invoice = new EInvoice();
|
|
Object.assign(invoice, TestInvoiceFactory.createComplexInvoice());
|
|
|
|
// Generate XML
|
|
const xml = await invoice.exportXml('facturx');
|
|
expect(xml).toBeTruthy();
|
|
console.log(`Generated XML: ${xml.length} bytes`);
|
|
|
|
// Create a minimal PDF for testing
|
|
const pdfBuffer = await createMinimalTestPDF();
|
|
invoice.pdf = {
|
|
name: 'test-invoice.pdf',
|
|
id: 'test-pdf-001',
|
|
metadata: { textExtraction: '' },
|
|
buffer: pdfBuffer
|
|
};
|
|
|
|
// Test embedding
|
|
try {
|
|
const { result: resultPdf, duration } = await PerformanceUtils.measure(
|
|
'pdf-embedding',
|
|
async () => invoice.exportPdf('facturx')
|
|
);
|
|
|
|
expect(resultPdf).toBeTruthy();
|
|
expect(resultPdf.buffer).toBeTruthy();
|
|
expect(resultPdf.buffer.length).toBeGreaterThan(pdfBuffer.length);
|
|
|
|
console.log(`✓ Successfully embedded XML into PDF (${duration.toFixed(2)}ms)`);
|
|
console.log(` Original PDF: ${pdfBuffer.length} bytes`);
|
|
console.log(` Result PDF: ${resultPdf.buffer.length} bytes`);
|
|
console.log(` Size increase: ${resultPdf.buffer.length - pdfBuffer.length} bytes`);
|
|
|
|
// Verify the embedded XML can be extracted
|
|
const verification = await EInvoice.fromPdf(resultPdf.buffer);
|
|
expect(verification.getXml()).toBeTruthy();
|
|
expect(verification.getFormat()).toEqual(InvoiceFormat.FACTURX);
|
|
console.log('✓ Verified: Embedded XML can be extracted successfully');
|
|
|
|
} catch (error) {
|
|
if (error instanceof EInvoicePDFError) {
|
|
console.log(`✗ Embedding failed: ${error.message}`);
|
|
console.log(` Operation: ${error.operation}`);
|
|
console.log(` Suggestions: ${error.getRecoverySuggestions().join(', ')}`);
|
|
}
|
|
throw error;
|
|
}
|
|
});
|
|
|
|
// Test PDF extraction error handling
|
|
tap.test('PDF Operations - Error handling for invalid PDFs', async () => {
|
|
// Test with empty buffer
|
|
try {
|
|
await EInvoice.fromPdf(new Uint8Array(0));
|
|
expect.fail('Should have thrown an error for empty PDF');
|
|
} catch (error) {
|
|
expect(error).toBeInstanceOf(EInvoicePDFError);
|
|
if (error instanceof EInvoicePDFError) {
|
|
expect(error.operation).toEqual('extract');
|
|
console.log('✓ Empty PDF error handled correctly');
|
|
}
|
|
}
|
|
|
|
// Test with non-PDF data
|
|
try {
|
|
const textBuffer = Buffer.from('This is not a PDF file');
|
|
await EInvoice.fromPdf(textBuffer);
|
|
expect.fail('Should have thrown an error for non-PDF data');
|
|
} catch (error) {
|
|
expect(error).toBeInstanceOf(EInvoicePDFError);
|
|
console.log('✓ Non-PDF data error handled correctly');
|
|
}
|
|
|
|
// Test with corrupted PDF header
|
|
try {
|
|
const corruptPdf = Buffer.from('%PDF-1.4\nCorrupted content');
|
|
await EInvoice.fromPdf(corruptPdf);
|
|
expect.fail('Should have thrown an error for corrupted PDF');
|
|
} catch (error) {
|
|
expect(error).toBeInstanceOf(EInvoicePDFError);
|
|
console.log('✓ Corrupted PDF error handled correctly');
|
|
}
|
|
});
|
|
|
|
// Test failed PDF extractions from corpus
|
|
tap.test('PDF Operations - Handle PDFs without XML gracefully', async () => {
|
|
const failPdfs = await TestFileHelpers.getTestFiles(TestFileCategories.ZUGFERD_V1_FAIL, '*.pdf');
|
|
console.log(`Testing ${failPdfs.length} PDFs expected to fail`);
|
|
|
|
for (const file of failPdfs) {
|
|
const fileName = path.basename(file);
|
|
|
|
try {
|
|
const pdfBuffer = await TestFileHelpers.loadTestFile(file);
|
|
await EInvoice.fromPdf(pdfBuffer);
|
|
console.log(`○ ${fileName}: Unexpectedly succeeded (might have XML)`);
|
|
} catch (error) {
|
|
if (error instanceof EInvoicePDFError) {
|
|
expect(error.operation).toEqual('extract');
|
|
console.log(`✓ ${fileName}: Correctly failed - ${error.message}`);
|
|
} else {
|
|
console.log(`✗ ${fileName}: Wrong error type - ${error.message}`);
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
// Test PDF metadata preservation
|
|
tap.test('PDF Operations - Metadata preservation during embedding', async () => {
|
|
// Load a real PDF from corpus
|
|
const pdfFiles = await TestFileHelpers.getTestFiles(TestFileCategories.ZUGFERD_V2_CORRECT, '*.pdf');
|
|
|
|
if (pdfFiles.length > 0) {
|
|
const originalPdfBuffer = await TestFileHelpers.loadTestFile(pdfFiles[0]);
|
|
|
|
try {
|
|
// Extract from original
|
|
const originalInvoice = await EInvoice.fromPdf(originalPdfBuffer);
|
|
|
|
// Re-embed with different format
|
|
const reembedded = await originalInvoice.exportPdf('xrechnung');
|
|
|
|
// Extract again
|
|
const reextracted = await EInvoice.fromPdf(reembedded.buffer);
|
|
|
|
// Compare key fields
|
|
expect(reextracted.from.name).toEqual(originalInvoice.from.name);
|
|
expect(reextracted.to.name).toEqual(originalInvoice.to.name);
|
|
expect(reextracted.items.length).toEqual(originalInvoice.items.length);
|
|
|
|
console.log('✓ Metadata preserved through re-embedding cycle');
|
|
|
|
} catch (error) {
|
|
console.log(`○ Metadata preservation test skipped: ${error.message}`);
|
|
}
|
|
}
|
|
});
|
|
|
|
// Test PDF size constraints
|
|
tap.test('PDF Operations - Performance with large PDFs', async () => {
|
|
const largePdfSize = 10 * 1024 * 1024; // 10MB
|
|
const largePdfBuffer = Buffer.alloc(largePdfSize);
|
|
|
|
// Create a simple PDF header
|
|
const pdfHeader = Buffer.from('%PDF-1.4\n');
|
|
pdfHeader.copy(largePdfBuffer);
|
|
|
|
console.log(`Testing with ${(largePdfSize / 1024 / 1024).toFixed(1)}MB PDF`);
|
|
|
|
const startTime = performance.now();
|
|
try {
|
|
await EInvoice.fromPdf(largePdfBuffer);
|
|
} catch (error) {
|
|
// Expected to fail, we're testing performance
|
|
const duration = performance.now() - startTime;
|
|
console.log(`✓ Large PDF processed in ${duration.toFixed(2)}ms`);
|
|
expect(duration).toBeLessThan(5000); // Should fail fast, not hang
|
|
}
|
|
});
|
|
|
|
// Test concurrent PDF operations
|
|
tap.test('PDF Operations - Concurrent processing', async () => {
|
|
const pdfFiles = await TestFileHelpers.getTestFiles(TestFileCategories.ZUGFERD_V2_CORRECT, '*.pdf');
|
|
const testFiles = pdfFiles.slice(0, 5);
|
|
|
|
if (testFiles.length > 0) {
|
|
console.log(`Testing concurrent processing of ${testFiles.length} PDFs`);
|
|
|
|
const startTime = performance.now();
|
|
|
|
// Process all PDFs concurrently
|
|
const promises = testFiles.map(async (file) => {
|
|
try {
|
|
const pdfBuffer = await TestFileHelpers.loadTestFile(file);
|
|
const einvoice = await EInvoice.fromPdf(pdfBuffer);
|
|
return { success: true, format: einvoice.getFormat() };
|
|
} catch (error) {
|
|
return { success: false, error: error.message };
|
|
}
|
|
});
|
|
|
|
const results = await Promise.all(promises);
|
|
const duration = performance.now() - startTime;
|
|
|
|
const successCount = results.filter(r => r.success).length;
|
|
console.log(`✓ Processed ${successCount}/${testFiles.length} PDFs concurrently in ${duration.toFixed(2)}ms`);
|
|
console.log(` Average time per PDF: ${(duration / testFiles.length).toFixed(2)}ms`);
|
|
}
|
|
});
|
|
|
|
// Performance summary
|
|
tap.test('PDF Operations - Performance Summary', async () => {
|
|
const stats = {
|
|
extraction: PerformanceUtils.getStats('pdf-extraction-v1'),
|
|
embedding: PerformanceUtils.getStats('pdf-embedding')
|
|
};
|
|
|
|
console.log('\nPDF Operations Performance Summary:');
|
|
|
|
if (stats.extraction) {
|
|
console.log('PDF Extraction (ZUGFeRD v1):');
|
|
console.log(` Average: ${stats.extraction.avg.toFixed(2)}ms`);
|
|
console.log(` Min/Max: ${stats.extraction.min.toFixed(2)}ms / ${stats.extraction.max.toFixed(2)}ms`);
|
|
}
|
|
|
|
if (stats.embedding) {
|
|
console.log('PDF Embedding:');
|
|
console.log(` Average: ${stats.embedding.avg.toFixed(2)}ms`);
|
|
}
|
|
|
|
// Performance assertions
|
|
if (stats.extraction && stats.extraction.count > 3) {
|
|
expect(stats.extraction.avg).toBeLessThan(1000); // Should extract in under 1 second on average
|
|
}
|
|
});
|
|
|
|
// Helper function to create a minimal test PDF
|
|
async function createMinimalTestPDF(): Promise<Uint8Array> {
|
|
// This creates a very minimal valid PDF
|
|
const pdfContent = `%PDF-1.4
|
|
1 0 obj
|
|
<< /Type /Catalog /Pages 2 0 R >>
|
|
endobj
|
|
2 0 obj
|
|
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
|
|
endobj
|
|
3 0 obj
|
|
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Resources << >> >>
|
|
endobj
|
|
xref
|
|
0 4
|
|
0000000000 65535 f
|
|
0000000009 00000 n
|
|
0000000058 00000 n
|
|
0000000115 00000 n
|
|
trailer
|
|
<< /Size 4 /Root 1 0 R >>
|
|
startxref
|
|
217
|
|
%%EOF`;
|
|
|
|
return new Uint8Array(Buffer.from(pdfContent));
|
|
}
|
|
|
|
tap.start(); |