einvoice/test/suite/einvoice_performance/test.perf-03.pdf-extraction.ts

412 lines
13 KiB
TypeScript

/**
* @file test.perf-03.pdf-extraction.ts
* @description Performance tests for PDF extraction operations
*/
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { PDFDocument, rgb } from 'pdf-lib';
// Simple performance tracking
class SimplePerformanceTracker {
private measurements: Map<string, number[]> = new Map();
private name: string;
constructor(name: string) {
this.name = name;
}
addMeasurement(key: string, time: number): void {
if (!this.measurements.has(key)) {
this.measurements.set(key, []);
}
this.measurements.get(key)!.push(time);
}
getStats(key: string) {
const times = this.measurements.get(key) || [];
if (times.length === 0) return null;
const sorted = [...times].sort((a, b) => a - b);
return {
avg: times.reduce((a, b) => a + b, 0) / times.length,
min: sorted[0],
max: sorted[sorted.length - 1],
p95: sorted[Math.floor(sorted.length * 0.95)]
};
}
printSummary(): void {
console.log(`\n${this.name} - Performance Summary:`);
for (const [key, times] of this.measurements) {
const stats = this.getStats(key);
if (stats) {
console.log(` ${key}: avg=${stats.avg.toFixed(2)}ms, min=${stats.min.toFixed(2)}ms, max=${stats.max.toFixed(2)}ms, p95=${stats.p95.toFixed(2)}ms`);
}
}
}
}
const performanceTracker = new SimplePerformanceTracker('PERF-03: PDF Extraction Speed');
// Helper to create test PDFs with embedded XML
async function createTestPdf(name: string, xmlContent: string, pages: number = 1): Promise<Buffer> {
const pdfDoc = await PDFDocument.create();
// Add pages
for (let i = 0; i < pages; i++) {
const page = pdfDoc.addPage([595, 842]); // A4
page.drawText(`Test Invoice ${name} - Page ${i + 1}`, {
x: 50,
y: 750,
size: 20
});
// Add some content
page.drawRectangle({
x: 50,
y: 600,
width: 495,
height: 100,
borderColor: rgb(0, 0, 0),
borderWidth: 1
});
}
// Attach the XML
await pdfDoc.attach(
Buffer.from(xmlContent, 'utf8'),
'invoice.xml',
{
mimeType: 'application/xml',
description: `Invoice ${name}`
}
);
return Buffer.from(await pdfDoc.save());
}
// Helper to create test XML
function createTestXml(id: string, lineItems: number = 10): string {
const lines = Array(lineItems).fill(null).map((_, i) => `
<cac:InvoiceLine>
<cbc:ID>${i + 1}</cbc:ID>
<cbc:InvoicedQuantity unitCode="C62">1</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">100.00</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>Product ${i + 1}</cbc:Name>
</cac:Item>
<cac:Price>
<cbc:PriceAmount currencyID="EUR">100.00</cbc:PriceAmount>
</cac:Price>
</cac:InvoiceLine>`).join('');
return `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>${id}</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:InvoiceTypeCode>380</cbc:InvoiceTypeCode>
<cbc:DocumentCurrencyCode>EUR</cbc:DocumentCurrencyCode>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Test Supplier</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Berlin</cbc:CityName>
<cbc:PostalZone>10115</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Test Customer</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Munich</cbc:CityName>
<cbc:PostalZone>80331</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingCustomerParty>
<cac:LegalMonetaryTotal>
<cbc:PayableAmount currencyID="EUR">${100 * lineItems}.00</cbc:PayableAmount>
</cac:LegalMonetaryTotal>
${lines}
</Invoice>`;
}
tap.test('PERF-03: Basic PDF extraction performance', async () => {
const testCases = [
{ name: 'Small PDF', pages: 1, lineItems: 10 },
{ name: 'Medium PDF', pages: 10, lineItems: 50 },
{ name: 'Large PDF', pages: 50, lineItems: 200 }
];
const iterations = 20;
for (const testCase of testCases) {
const xmlContent = createTestXml(`PDF-${testCase.name}`, testCase.lineItems);
const pdfBuffer = await createTestPdf(testCase.name, xmlContent, testCase.pages);
const times: number[] = [];
console.log(`Testing ${testCase.name}: ${(pdfBuffer.length / 1024).toFixed(2)} KB`);
for (let i = 0; i < iterations; i++) {
const startTime = performance.now();
const einvoice = await EInvoice.fromPdf(pdfBuffer);
const endTime = performance.now();
const duration = endTime - startTime;
times.push(duration);
performanceTracker.addMeasurement(`extract-${testCase.name}`, duration);
if (i === 0) {
// Verify extraction worked
expect(einvoice.id).toContain(testCase.name);
}
}
const avg = times.reduce((a, b) => a + b, 0) / times.length;
const bytesPerMs = pdfBuffer.length / avg;
console.log(` Average extraction time: ${avg.toFixed(3)}ms`);
console.log(` Throughput: ${(bytesPerMs / 1024).toFixed(2)} KB/ms`);
// Performance expectations
expect(avg).toBeLessThan(testCase.pages * 10 + 100); // Allow 10ms per page + 100ms base
}
});
tap.test('PERF-03: Different attachment methods performance', async () => {
const xmlContent = createTestXml('ATTACHMENT-TEST', 20);
// Test different PDF structures
const testCases = [
{
name: 'Standard attachment',
create: async () => {
const pdfDoc = await PDFDocument.create();
pdfDoc.addPage();
await pdfDoc.attach(Buffer.from(xmlContent), 'invoice.xml', {
mimeType: 'application/xml'
});
return Buffer.from(await pdfDoc.save());
}
},
{
name: 'With AFRelationship',
create: async () => {
const pdfDoc = await PDFDocument.create();
pdfDoc.addPage();
await pdfDoc.attach(Buffer.from(xmlContent), 'invoice.xml', {
mimeType: 'application/xml',
afRelationship: plugins.AFRelationship.Data
});
return Buffer.from(await pdfDoc.save());
}
},
{
name: 'Multiple attachments',
create: async () => {
const pdfDoc = await PDFDocument.create();
pdfDoc.addPage();
// Main invoice
await pdfDoc.attach(Buffer.from(xmlContent), 'invoice.xml', {
mimeType: 'application/xml'
});
// Additional files
await pdfDoc.attach(Buffer.from('<extra>data</extra>'), 'extra.xml', {
mimeType: 'application/xml'
});
return Buffer.from(await pdfDoc.save());
}
}
];
for (const testCase of testCases) {
const pdfBuffer = await testCase.create();
const times: number[] = [];
for (let i = 0; i < 30; i++) {
const startTime = performance.now();
const einvoice = await EInvoice.fromPdf(pdfBuffer);
const endTime = performance.now();
times.push(endTime - startTime);
if (i === 0) {
expect(einvoice.id).toEqual('ATTACHMENT-TEST');
}
}
const avg = times.reduce((a, b) => a + b, 0) / times.length;
console.log(`${testCase.name}: avg=${avg.toFixed(3)}ms`);
performanceTracker.addMeasurement(`attachment-${testCase.name}`, avg);
// All methods should be reasonably fast
expect(avg).toBeLessThan(50);
}
});
tap.test('PERF-03: XML size impact on extraction', async () => {
const sizes = [1, 10, 50, 100, 500];
for (const size of sizes) {
const xmlContent = createTestXml(`SIZE-${size}`, size);
const pdfBuffer = await createTestPdf(`Size test ${size} items`, xmlContent);
const times: number[] = [];
for (let i = 0; i < 20; i++) {
const startTime = performance.now();
await EInvoice.fromPdf(pdfBuffer);
const endTime = performance.now();
times.push(endTime - startTime);
}
const avg = times.reduce((a, b) => a + b, 0) / times.length;
const xmlSizeKB = (xmlContent.length / 1024).toFixed(2);
console.log(`XML with ${size} items (${xmlSizeKB} KB): avg=${avg.toFixed(3)}ms`);
performanceTracker.addMeasurement(`xml-size-${size}`, avg);
// Extraction time should scale reasonably with XML size
expect(avg).toBeLessThan(size * 0.5 + 30);
}
});
tap.test('PERF-03: Concurrent PDF extraction', async () => {
const xmlContent = createTestXml('CONCURRENT', 20);
const pdfBuffer = await createTestPdf('Concurrent test', xmlContent);
const concurrentCounts = [1, 5, 10];
for (const count of concurrentCounts) {
const startTime = performance.now();
const promises = Array(count).fill(null).map(() =>
EInvoice.fromPdf(pdfBuffer)
);
const results = await Promise.all(promises);
const endTime = performance.now();
const totalTime = endTime - startTime;
const avgTimePerExtraction = totalTime / count;
console.log(`Concurrent extractions (${count}): total=${totalTime.toFixed(2)}ms, avg per extraction=${avgTimePerExtraction.toFixed(2)}ms`);
// Verify all extractions succeeded
expect(results.every(e => e.id === 'CONCURRENT')).toEqual(true);
// Concurrent operations should be efficient
expect(avgTimePerExtraction).toBeLessThan(100);
}
});
tap.test('PERF-03: Error handling performance', async () => {
const errorCases = [
{
name: 'PDF without XML',
create: async () => {
const pdfDoc = await PDFDocument.create();
pdfDoc.addPage();
// No XML attachment
return Buffer.from(await pdfDoc.save());
}
},
{
name: 'Invalid PDF',
create: async () => Buffer.from('Not a PDF')
},
{
name: 'Corrupted attachment',
create: async () => {
const pdfDoc = await PDFDocument.create();
pdfDoc.addPage();
await pdfDoc.attach(Buffer.from('<<<invalid xml>>>'), 'invoice.xml', {
mimeType: 'application/xml'
});
return Buffer.from(await pdfDoc.save());
}
}
];
for (const errorCase of errorCases) {
const pdfBuffer = await errorCase.create();
const times: number[] = [];
for (let i = 0; i < 20; i++) {
const startTime = performance.now();
try {
await EInvoice.fromPdf(pdfBuffer);
} catch (error) {
// Expected error
}
const endTime = performance.now();
times.push(endTime - startTime);
}
const avg = times.reduce((a, b) => a + b, 0) / times.length;
console.log(`${errorCase.name} - Error handling: avg=${avg.toFixed(3)}ms`);
// Error cases should fail fast
expect(avg).toBeLessThan(10);
}
});
tap.test('PERF-03: Memory efficiency during extraction', async () => {
// Create a large PDF with many pages
const xmlContent = createTestXml('MEMORY-TEST', 100);
const largePdf = await createTestPdf('Memory test', xmlContent, 100);
console.log(`Large PDF size: ${(largePdf.length / 1024 / 1024).toFixed(2)} MB`);
const initialMemory = process.memoryUsage();
const extractionTimes: number[] = [];
// Extract multiple times to check for memory leaks
for (let i = 0; i < 10; i++) {
const startTime = performance.now();
const einvoice = await EInvoice.fromPdf(largePdf);
const endTime = performance.now();
extractionTimes.push(endTime - startTime);
expect(einvoice.id).toEqual('MEMORY-TEST');
}
const finalMemory = process.memoryUsage();
const memoryIncrease = (finalMemory.heapUsed - initialMemory.heapUsed) / 1024 / 1024;
console.log(`Memory increase after 10 extractions: ${memoryIncrease.toFixed(2)} MB`);
console.log(`Average extraction time: ${(extractionTimes.reduce((a, b) => a + b, 0) / extractionTimes.length).toFixed(2)}ms`);
// Memory increase should be reasonable
expect(memoryIncrease).toBeLessThan(100); // Less than 100MB increase
});
tap.test('PERF-03: Performance Summary', async () => {
performanceTracker.printSummary();
// Overall performance check
const stats = performanceTracker.getStats('extract-Small PDF');
if (stats) {
console.log(`\nSmall PDF extraction performance: avg=${stats.avg.toFixed(2)}ms`);
expect(stats.avg).toBeLessThan(50); // Small PDFs should extract very quickly
}
console.log('\nPDF extraction performance tests completed successfully');
});
tap.start();