einvoice/test/suite/einvoice_pdf-operations/test.pdf-08.large-pdf-performance.ts

960 lines
33 KiB
TypeScript
Raw Normal View History

2025-05-25 19:45:37 +00:00
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
2025-05-28 08:40:26 +00:00
import { rgb } from 'pdf-lib';
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
// Simple instance-based performance tracker for this test
class SimplePerformanceTracker {
private measurements: Map<string, number[]> = new Map();
private name: string;
constructor(name: string) {
this.name = name;
}
addMeasurement(key: string, time: number): void {
if (!this.measurements.has(key)) {
this.measurements.set(key, []);
}
this.measurements.get(key)!.push(time);
}
getAverageTime(): number {
let total = 0;
let count = 0;
for (const times of this.measurements.values()) {
for (const time of times) {
total += time;
count++;
2025-05-25 19:45:37 +00:00
}
}
2025-05-28 08:40:26 +00:00
return count > 0 ? total / count : 0;
}
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
printSummary(): void {
console.log(`\n${this.name} - Performance Summary:`);
for (const [key, times] of this.measurements) {
const avg = times.reduce((a, b) => a + b, 0) / times.length;
const min = Math.min(...times);
const max = Math.max(...times);
console.log(` ${key}: avg=${avg.toFixed(2)}ms, min=${min.toFixed(2)}ms, max=${max.toFixed(2)}ms (${times.length} runs)`);
}
console.log(` Overall average: ${this.getAverageTime().toFixed(2)}ms`);
}
}
const performanceTracker = new SimplePerformanceTracker('PDF-08: Large PDF Performance');
tap.test('PDF-08: Process PDFs of increasing size', async () => {
const startTime = performance.now();
// Dynamic import for EInvoice
const { EInvoice } = await import('../../../ts/index.js');
const { PDFDocument } = plugins;
// Test different PDF sizes
const sizes = [
{ pages: 1, name: '1-page', expectedTime: 1000 },
{ pages: 10, name: '10-page', expectedTime: 2000 },
{ pages: 50, name: '50-page', expectedTime: 5000 },
{ pages: 100, name: '100-page', expectedTime: 10000 }
];
for (const sizeTest of sizes) {
const sizeStartTime = performance.now();
2025-05-25 19:45:37 +00:00
const pdfDoc = await PDFDocument.create();
2025-05-28 08:40:26 +00:00
// Create multiple pages
for (let i = 0; i < sizeTest.pages; i++) {
const page = pdfDoc.addPage([595, 842]); // A4
// Add content to each page
page.drawText(`Invoice Page ${i + 1} of ${sizeTest.pages}`, {
x: 50,
y: 750,
size: 20
});
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
// Add some graphics to increase file size
page.drawRectangle({
x: 50,
y: 600,
width: 495,
height: 100,
borderColor: rgb(0, 0, 0),
borderWidth: 1
});
// Add text content
for (let j = 0; j < 20; j++) {
page.drawText(`Line item ${j + 1}: Product description with details`, {
x: 60,
y: 580 - (j * 20),
size: 10
2025-05-25 19:45:37 +00:00
});
}
}
2025-05-28 08:40:26 +00:00
// Add a simple but valid UBL invoice XML
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>LARGE-PDF-${sizeTest.name}</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:InvoiceTypeCode>380</cbc:InvoiceTypeCode>
<cbc:DocumentCurrencyCode>EUR</cbc:DocumentCurrencyCode>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Test Supplier</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Berlin</cbc:CityName>
<cbc:PostalZone>10115</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Test Customer</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Munich</cbc:CityName>
<cbc:PostalZone>80331</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingCustomerParty>
<cac:LegalMonetaryTotal>
<cbc:PayableAmount currencyID="EUR">100.00</cbc:PayableAmount>
</cac:LegalMonetaryTotal>
<cac:InvoiceLine>
<cbc:ID>1</cbc:ID>
<cbc:InvoicedQuantity unitCode="C62">1</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">100.00</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>Test item for ${sizeTest.pages} page PDF</cbc:Name>
</cac:Item>
<cac:Price>
<cbc:PriceAmount currencyID="EUR">100.00</cbc:PriceAmount>
</cac:Price>
</cac:InvoiceLine>
</Invoice>`;
2025-05-25 19:45:37 +00:00
await pdfDoc.attach(
Buffer.from(xmlContent, 'utf8'),
2025-05-28 08:40:26 +00:00
'invoice.xml',
2025-05-25 19:45:37 +00:00
{
mimeType: 'application/xml',
2025-05-28 08:40:26 +00:00
description: `Invoice for ${sizeTest.pages} page document`
2025-05-25 19:45:37 +00:00
}
);
const pdfBytes = await pdfDoc.save();
const sizeMB = (pdfBytes.length / 1024 / 1024).toFixed(2);
2025-05-28 08:40:26 +00:00
// Test extraction performance
const extractStartTime = performance.now();
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
try {
const einvoice = await EInvoice.fromPdf(pdfBytes);
const xmlString = await einvoice.toXmlString('ubl');
expect(xmlString).toContain(`LARGE-PDF-${sizeTest.name}`);
const extractTime = performance.now() - extractStartTime;
console.log(`${sizeTest.name} (${sizeMB} MB): Extraction took ${extractTime.toFixed(2)}ms`);
// Check if extraction time is reasonable
expect(extractTime).toBeLessThan(sizeTest.expectedTime);
} catch (error) {
console.log(`${sizeTest.name} extraction error:`, error.message);
}
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
const sizeElapsed = performance.now() - sizeStartTime;
performanceTracker.addMeasurement(`size-${sizeTest.name}`, sizeElapsed);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('increasing-sizes', elapsed);
});
tap.test('PDF-08: Memory usage with large PDFs', async () => {
const startTime = performance.now();
// Dynamic import for EInvoice
const { EInvoice } = await import('../../../ts/index.js');
// Monitor memory usage
const initialMemory = process.memoryUsage();
console.log('Initial memory (MB):', {
rss: (initialMemory.rss / 1024 / 1024).toFixed(2),
heapUsed: (initialMemory.heapUsed / 1024 / 1024).toFixed(2)
});
const { PDFDocument } = plugins;
const pdfDoc = await PDFDocument.create();
// Create a large PDF with many objects
const pageCount = 200;
for (let i = 0; i < pageCount; i++) {
const page = pdfDoc.addPage();
// Add many small objects to increase complexity
for (let j = 0; j < 50; j++) {
page.drawText(`Item ${i}-${j}`, {
x: 50 + (j % 10) * 50,
y: 700 - Math.floor(j / 10) * 20,
size: 8
2025-05-25 19:45:37 +00:00
});
}
2025-05-28 08:40:26 +00:00
}
// Add large but valid UBL XML attachment
let xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>LARGE-MEMORY-TEST</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:InvoiceTypeCode>380</cbc:InvoiceTypeCode>
<cbc:DocumentCurrencyCode>EUR</cbc:DocumentCurrencyCode>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Test Supplier</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Berlin</cbc:CityName>
<cbc:PostalZone>10115</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Test Customer</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Munich</cbc:CityName>
<cbc:PostalZone>80331</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingCustomerParty>
<cac:LegalMonetaryTotal>
<cbc:PayableAmount currencyID="EUR">${1000 * 99.99}</cbc:PayableAmount>
</cac:LegalMonetaryTotal>`;
// Add many line items to increase file size
for (let i = 0; i < 1000; i++) {
xmlContent += `
<cac:InvoiceLine>
<cbc:ID>${i + 1}</cbc:ID>
<cbc:InvoicedQuantity unitCode="C62">10</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">999.90</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>Product item ${i} with long description text that increases file size</cbc:Name>
</cac:Item>
<cac:Price>
<cbc:PriceAmount currencyID="EUR">99.99</cbc:PriceAmount>
</cac:Price>
</cac:InvoiceLine>`;
}
xmlContent += '\n</Invoice>';
await pdfDoc.attach(
Buffer.from(xmlContent, 'utf8'),
'large-invoice.xml',
{
mimeType: 'application/xml',
description: 'Large invoice with many line items'
}
);
const pdfBytes = await pdfDoc.save();
const sizeMB = (pdfBytes.length / 1024 / 1024).toFixed(2);
console.log(`Created large PDF: ${sizeMB} MB`);
// Test memory usage during processing
const einvoice = await EInvoice.fromPdf(pdfBytes);
const afterMemory = process.memoryUsage();
console.log('After processing memory (MB):', {
rss: (afterMemory.rss / 1024 / 1024).toFixed(2),
heapUsed: (afterMemory.heapUsed / 1024 / 1024).toFixed(2)
2025-05-25 19:45:37 +00:00
});
2025-05-28 08:40:26 +00:00
const memoryIncrease = afterMemory.heapUsed - initialMemory.heapUsed;
console.log(`Memory increase: ${(memoryIncrease / 1024 / 1024).toFixed(2)} MB`);
// Force garbage collection if available
if (global.gc) {
global.gc();
const gcMemory = process.memoryUsage();
console.log('After GC memory (MB):', {
heapUsed: (gcMemory.heapUsed / 1024 / 1024).toFixed(2)
});
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('memory-usage', elapsed);
});
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
tap.test('PDF-08: Streaming vs loading performance', async () => {
const startTime = performance.now();
// Dynamic import for EInvoice
const { EInvoice } = await import('../../../ts/index.js');
const { PDFDocument } = plugins;
// Create a moderately large PDF
const pdfDoc = await PDFDocument.create();
for (let i = 0; i < 50; i++) {
const page = pdfDoc.addPage();
page.drawText(`Page ${i + 1}`, { x: 50, y: 700, size: 20 });
}
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>STREAM-TEST</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:InvoiceTypeCode>380</cbc:InvoiceTypeCode>
<cbc:DocumentCurrencyCode>EUR</cbc:DocumentCurrencyCode>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Test Supplier</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Berlin</cbc:CityName>
<cbc:PostalZone>10115</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PartyName>
<cbc:Name>Test Customer</cbc:Name>
</cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Munich</cbc:CityName>
<cbc:PostalZone>80331</cbc:PostalZone>
<cac:Country>
<cbc:IdentificationCode>DE</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingCustomerParty>
<cac:LegalMonetaryTotal>
<cbc:PayableAmount currencyID="EUR">100.00</cbc:PayableAmount>
</cac:LegalMonetaryTotal>
<cac:InvoiceLine>
<cbc:ID>1</cbc:ID>
<cbc:InvoicedQuantity unitCode="C62">1</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">100.00</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>Test item</cbc:Name>
</cac:Item>
<cac:Price>
<cbc:PriceAmount currencyID="EUR">100.00</cbc:PriceAmount>
</cac:Price>
</cac:InvoiceLine>
</Invoice>`;
await pdfDoc.attach(
Buffer.from(xmlContent, 'utf8'),
'invoice.xml',
{ mimeType: 'application/xml' }
);
const pdfBytes = await pdfDoc.save();
// Test full loading
const loadStartTime = performance.now();
const einvoice1 = await EInvoice.fromPdf(pdfBytes);
const loadTime = performance.now() - loadStartTime;
console.log(`Full loading time: ${loadTime.toFixed(2)}ms`);
// Note: Actual streaming would require stream API support
// This is a placeholder for streaming performance comparison
console.log('Streaming API would potentially reduce memory usage for large files');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('streaming-comparison', elapsed);
});
tap.test('PDF-08: Concurrent large PDF processing', async () => {
const startTime = performance.now();
// Dynamic import for EInvoice
const { EInvoice } = await import('../../../ts/index.js');
const { PDFDocument } = plugins;
// Create multiple PDFs for concurrent processing
const createPdf = async (id: string, pages: number) => {
2025-05-25 19:45:37 +00:00
const pdfDoc = await PDFDocument.create();
2025-05-28 08:40:26 +00:00
for (let i = 0; i < pages; i++) {
2025-05-25 19:45:37 +00:00
const page = pdfDoc.addPage();
2025-05-28 08:40:26 +00:00
page.drawText(`Document ${id} - Page ${i + 1}`, { x: 50, y: 700, size: 16 });
2025-05-25 19:45:37 +00:00
}
2025-05-28 08:40:26 +00:00
// Create a minimal valid UBL invoice
const minimalUbl = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>${id}</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:InvoiceTypeCode>380</cbc:InvoiceTypeCode>
<cbc:DocumentCurrencyCode>EUR</cbc:DocumentCurrencyCode>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName><cbc:Name>Supplier</cbc:Name></cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Berlin</cbc:CityName>
<cbc:PostalZone>10115</cbc:PostalZone>
<cac:Country><cbc:IdentificationCode>DE</cbc:IdentificationCode></cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PartyName><cbc:Name>Customer</cbc:Name></cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Munich</cbc:CityName>
<cbc:PostalZone>80331</cbc:PostalZone>
<cac:Country><cbc:IdentificationCode>DE</cbc:IdentificationCode></cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingCustomerParty>
<cac:LegalMonetaryTotal>
<cbc:PayableAmount currencyID="EUR">100.00</cbc:PayableAmount>
</cac:LegalMonetaryTotal>
<cac:InvoiceLine>
<cbc:ID>1</cbc:ID>
<cbc:InvoicedQuantity unitCode="C62">1</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">100.00</cbc:LineExtensionAmount>
<cac:Item><cbc:Name>Item</cbc:Name></cac:Item>
<cac:Price><cbc:PriceAmount currencyID="EUR">100.00</cbc:PriceAmount></cac:Price>
</cac:InvoiceLine>
</Invoice>`;
2025-05-25 19:45:37 +00:00
await pdfDoc.attach(
2025-05-28 08:40:26 +00:00
Buffer.from(minimalUbl, 'utf8'),
2025-05-25 19:45:37 +00:00
'invoice.xml',
{ mimeType: 'application/xml' }
);
2025-05-28 08:40:26 +00:00
return pdfDoc.save();
};
// Create PDFs
const pdfPromises = [
createPdf('PDF-A', 30),
createPdf('PDF-B', 40),
createPdf('PDF-C', 50),
createPdf('PDF-D', 60)
];
const pdfs = await Promise.all(pdfPromises);
// Process concurrently
const concurrentStartTime = performance.now();
const processPromises = pdfs.map(async (pdfBytes: Buffer) => {
const einvoice = await EInvoice.fromPdf(pdfBytes);
return einvoice.toXmlString('ubl');
2025-05-25 19:45:37 +00:00
});
2025-05-28 08:40:26 +00:00
const results = await Promise.all(processPromises);
const concurrentTime = performance.now() - concurrentStartTime;
expect(results.length).toEqual(4);
results.forEach((xml: string, index: number) => {
expect(xml).toContain(`PDF-${String.fromCharCode(65 + index)}`);
2025-05-25 19:45:37 +00:00
});
2025-05-28 08:40:26 +00:00
console.log(`Concurrent processing of 4 PDFs: ${concurrentTime.toFixed(2)}ms`);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('concurrent-processing', elapsed);
});
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
tap.test('PDF-08: Large PDF with complex structure', async () => {
const startTime = performance.now();
// Dynamic import for EInvoice
const { EInvoice } = await import('../../../ts/index.js');
const { PDFDocument } = plugins;
const pdfDoc = await PDFDocument.create();
// Create complex structure with forms, annotations, etc.
const formPage = pdfDoc.addPage();
// Add form fields (simplified - actual forms require more setup)
formPage.drawText('Invoice Form', { x: 50, y: 750, size: 24 });
formPage.drawRectangle({
x: 50,
y: 700,
width: 200,
height: 30,
borderColor: rgb(0, 0, 0.5),
borderWidth: 1
});
formPage.drawText('Invoice Number:', { x: 55, y: 710, size: 12 });
// Add multiple embedded files
const attachments = [
{ name: 'invoice.xml', size: 10000 },
{ name: 'terms.pdf', size: 50000 },
{ name: 'logo.png', size: 20000 }
];
for (const att of attachments) {
const content = Buffer.alloc(att.size, 'A'); // Dummy content
await pdfDoc.attach(content, att.name, {
mimeType: att.name.endsWith('.xml') ? 'application/xml' : 'application/octet-stream',
description: `Attachment: ${att.name}`
2025-05-25 19:45:37 +00:00
});
2025-05-28 08:40:26 +00:00
}
// Add many pages with different content types
for (let i = 0; i < 25; i++) {
const page = pdfDoc.addPage();
// Alternate between text-heavy and graphic-heavy pages
if (i % 2 === 0) {
// Text-heavy page
for (let j = 0; j < 40; j++) {
page.drawText(`Line ${j + 1}: Lorem ipsum dolor sit amet, consectetur adipiscing elit.`, {
x: 50,
y: 750 - (j * 18),
size: 10
});
}
} else {
// Graphic-heavy page
for (let j = 0; j < 10; j++) {
for (let k = 0; k < 10; k++) {
page.drawRectangle({
x: 50 + (k * 50),
y: 700 - (j * 50),
width: 45,
height: 45,
color: rgb(Math.random(), Math.random(), Math.random())
2025-05-25 19:45:37 +00:00
});
}
}
}
2025-05-28 08:40:26 +00:00
}
const pdfBytes = await pdfDoc.save();
const sizeMB = (pdfBytes.length / 1024 / 1024).toFixed(2);
console.log(`Complex PDF size: ${sizeMB} MB`);
// Test processing
const processStartTime = performance.now();
try {
const einvoice = await EInvoice.fromPdf(pdfBytes);
const processTime = performance.now() - processStartTime;
console.log(`Complex PDF processed in: ${processTime.toFixed(2)}ms`);
} catch (error) {
console.log('Complex PDF processing error:', error.message);
}
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('complex-structure', elapsed);
});
tap.test('PDF-08: Corpus large PDF analysis', async () => {
const startTime = performance.now();
// Dynamic import for EInvoice
const { EInvoice } = await import('../../../ts/index.js');
2025-05-28 18:46:18 +00:00
const { PDFDocument } = plugins;
2025-05-28 08:40:26 +00:00
let largeFileCount = 0;
let totalSize = 0;
let processedCount = 0;
const sizeDistribution = {
small: 0, // < 100KB
medium: 0, // 100KB - 1MB
large: 0, // 1MB - 10MB
veryLarge: 0 // > 10MB
};
2025-05-28 18:46:18 +00:00
// Create test PDFs of various sizes to simulate corpus
const testPdfs: Array<{ path: string; content: Buffer }> = [];
2025-05-28 08:40:26 +00:00
2025-05-28 18:46:18 +00:00
// Create small PDFs
for (let i = 0; i < 5; i++) {
const pdfDoc = await PDFDocument.create();
const page = pdfDoc.addPage();
page.drawText(`Small PDF ${i}`, { x: 50, y: 700, size: 12 });
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>SMALL-${i}</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:InvoiceTypeCode>380</cbc:InvoiceTypeCode>
<cbc:DocumentCurrencyCode>EUR</cbc:DocumentCurrencyCode>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName><cbc:Name>Supplier</cbc:Name></cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Berlin</cbc:CityName>
<cbc:PostalZone>10115</cbc:PostalZone>
<cac:Country><cbc:IdentificationCode>DE</cbc:IdentificationCode></cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PartyName><cbc:Name>Customer</cbc:Name></cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Munich</cbc:CityName>
<cbc:PostalZone>80331</cbc:PostalZone>
<cac:Country><cbc:IdentificationCode>DE</cbc:IdentificationCode></cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingCustomerParty>
<cac:LegalMonetaryTotal>
<cbc:PayableAmount currencyID="EUR">100.00</cbc:PayableAmount>
</cac:LegalMonetaryTotal>
<cac:InvoiceLine>
<cbc:ID>1</cbc:ID>
<cbc:InvoicedQuantity unitCode="C62">1</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">100.00</cbc:LineExtensionAmount>
<cac:Item><cbc:Name>Item</cbc:Name></cac:Item>
<cac:Price><cbc:PriceAmount currencyID="EUR">100.00</cbc:PriceAmount></cac:Price>
</cac:InvoiceLine>
</Invoice>`;
await pdfDoc.attach(Buffer.from(xmlContent, 'utf8'), 'invoice.xml', {
mimeType: 'application/xml',
description: 'Invoice XML'
});
const pdfBytes = await pdfDoc.save();
testPdfs.push({ path: `small-${i}.pdf`, content: Buffer.from(pdfBytes) });
2025-05-28 08:40:26 +00:00
}
2025-05-28 18:46:18 +00:00
// Create medium PDFs
for (let i = 0; i < 3; i++) {
const pdfDoc = await PDFDocument.create();
// Add multiple pages
for (let j = 0; j < 50; j++) {
const page = pdfDoc.addPage();
page.drawText(`Medium PDF ${i} - Page ${j}`, { x: 50, y: 700, size: 12 });
2025-05-28 08:40:26 +00:00
2025-05-28 18:46:18 +00:00
// Add content to increase size
for (let k = 0; k < 20; k++) {
page.drawText(`Line ${k}: Lorem ipsum dolor sit amet`, {
x: 50,
y: 650 - (k * 20),
size: 10
});
2025-05-28 08:40:26 +00:00
}
2025-05-28 18:46:18 +00:00
}
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>MEDIUM-${i}</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:InvoiceTypeCode>380</cbc:InvoiceTypeCode>
<cbc:DocumentCurrencyCode>EUR</cbc:DocumentCurrencyCode>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName><cbc:Name>Supplier</cbc:Name></cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Berlin</cbc:CityName>
<cbc:PostalZone>10115</cbc:PostalZone>
<cac:Country><cbc:IdentificationCode>DE</cbc:IdentificationCode></cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PartyName><cbc:Name>Customer</cbc:Name></cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Munich</cbc:CityName>
<cbc:PostalZone>80331</cbc:PostalZone>
<cac:Country><cbc:IdentificationCode>DE</cbc:IdentificationCode></cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingCustomerParty>
<cac:LegalMonetaryTotal>
<cbc:PayableAmount currencyID="EUR">500.00</cbc:PayableAmount>
</cac:LegalMonetaryTotal>`;
// Add multiple line items
for (let j = 0; j < 50; j++) {
xmlContent += `
<cac:InvoiceLine>
<cbc:ID>${j + 1}</cbc:ID>
<cbc:InvoicedQuantity unitCode="C62">1</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">10.00</cbc:LineExtensionAmount>
<cac:Item><cbc:Name>Item ${j}</cbc:Name></cac:Item>
<cac:Price><cbc:PriceAmount currencyID="EUR">10.00</cbc:PriceAmount></cac:Price>
</cac:InvoiceLine>`;
}
xmlContent += '\n</Invoice>';
await pdfDoc.attach(Buffer.from(xmlContent, 'utf8'), 'invoice.xml', {
mimeType: 'application/xml',
description: 'Invoice XML'
});
const pdfBytes = await pdfDoc.save();
testPdfs.push({ path: `medium-${i}.pdf`, content: Buffer.from(pdfBytes) });
}
// Create large PDFs
for (let i = 0; i < 2; i++) {
const pdfDoc = await PDFDocument.create();
// Add many pages
for (let j = 0; j < 200; j++) {
const page = pdfDoc.addPage();
page.drawText(`Large PDF ${i} - Page ${j}`, { x: 50, y: 700, size: 12 });
2025-05-28 08:40:26 +00:00
2025-05-28 18:46:18 +00:00
// Add dense content
for (let k = 0; k < 40; k++) {
page.drawText(`Line ${k}: Lorem ipsum dolor sit amet, consectetur adipiscing elit`, {
x: 50,
y: 650 - (k * 15),
size: 8
});
2025-05-25 19:45:37 +00:00
}
}
2025-05-28 18:46:18 +00:00
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>LARGE-${i}</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:InvoiceTypeCode>380</cbc:InvoiceTypeCode>
<cbc:DocumentCurrencyCode>EUR</cbc:DocumentCurrencyCode>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName><cbc:Name>Supplier</cbc:Name></cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Berlin</cbc:CityName>
<cbc:PostalZone>10115</cbc:PostalZone>
<cac:Country><cbc:IdentificationCode>DE</cbc:IdentificationCode></cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PartyName><cbc:Name>Customer</cbc:Name></cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Munich</cbc:CityName>
<cbc:PostalZone>80331</cbc:PostalZone>
<cac:Country><cbc:IdentificationCode>DE</cbc:IdentificationCode></cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingCustomerParty>
<cac:LegalMonetaryTotal>
<cbc:PayableAmount currencyID="EUR">10000.00</cbc:PayableAmount>
</cac:LegalMonetaryTotal>
<cac:InvoiceLine>
<cbc:ID>1</cbc:ID>
<cbc:InvoicedQuantity unitCode="C62">1</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">10000.00</cbc:LineExtensionAmount>
<cac:Item><cbc:Name>Large item</cbc:Name></cac:Item>
<cac:Price><cbc:PriceAmount currencyID="EUR">10000.00</cbc:PriceAmount></cac:Price>
</cac:InvoiceLine>
</Invoice>`;
await pdfDoc.attach(Buffer.from(xmlContent, 'utf8'), 'invoice.xml', {
mimeType: 'application/xml',
description: 'Invoice XML'
});
const pdfBytes = await pdfDoc.save();
testPdfs.push({ path: `large-${i}.pdf`, content: Buffer.from(pdfBytes) });
2025-05-28 08:40:26 +00:00
}
2025-05-28 18:46:18 +00:00
// Process test PDFs
for (const testPdf of testPdfs) {
const sizeMB = testPdf.content.length / 1024 / 1024;
totalSize += testPdf.content.length;
if (testPdf.content.length < 100 * 1024) {
sizeDistribution.small++;
} else if (testPdf.content.length < 1024 * 1024) {
sizeDistribution.medium++;
} else if (testPdf.content.length < 10 * 1024 * 1024) {
sizeDistribution.large++;
largeFileCount++;
} else {
sizeDistribution.veryLarge++;
largeFileCount++;
}
// Test large file processing
if (sizeMB > 1) {
const testStartTime = performance.now();
try {
const einvoice = await EInvoice.fromPdf(testPdf.content);
const testTime = performance.now() - testStartTime;
console.log(`Large file ${testPdf.path} (${sizeMB.toFixed(2)} MB) processed in ${testTime.toFixed(2)}ms`);
} catch (error) {
console.log(`Large file ${testPdf.path} processing failed:`, error.message);
}
}
processedCount++;
2025-05-28 08:40:26 +00:00
}
2025-05-28 18:46:18 +00:00
const avgSize = totalSize / processedCount / 1024;
console.log(`Corpus PDF analysis (${processedCount} files):`);
console.log(`- Average size: ${avgSize.toFixed(2)} KB`);
console.log(`- Large files (>1MB): ${largeFileCount}`);
console.log('Size distribution:', sizeDistribution);
2025-05-28 08:40:26 +00:00
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-large-pdfs', elapsed);
});
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
tap.test('PDF-08: Performance degradation test', async () => {
const startTime = performance.now();
// Dynamic import for EInvoice
const { EInvoice } = await import('../../../ts/index.js');
const { PDFDocument } = plugins;
const processingTimes: number[] = [];
// Test if performance degrades with repeated operations
for (let iteration = 0; iteration < 5; iteration++) {
const iterStartTime = performance.now();
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
// Create PDF
const pdfDoc = await PDFDocument.create();
for (let i = 0; i < 20; i++) {
const page = pdfDoc.addPage();
page.drawText(`Iteration ${iteration + 1} - Page ${i + 1}`, {
x: 50,
y: 700,
size: 16
});
2025-05-25 19:45:37 +00:00
}
2025-05-28 08:40:26 +00:00
// Create a minimal valid UBL invoice for performance test
const perfUbl = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
<cbc:ID>PERF-${iteration}</cbc:ID>
<cbc:IssueDate>2025-01-25</cbc:IssueDate>
<cbc:InvoiceTypeCode>380</cbc:InvoiceTypeCode>
<cbc:DocumentCurrencyCode>EUR</cbc:DocumentCurrencyCode>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PartyName><cbc:Name>Supplier</cbc:Name></cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Berlin</cbc:CityName>
<cbc:PostalZone>10115</cbc:PostalZone>
<cac:Country><cbc:IdentificationCode>DE</cbc:IdentificationCode></cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PartyName><cbc:Name>Customer</cbc:Name></cac:PartyName>
<cac:PostalAddress>
<cbc:CityName>Munich</cbc:CityName>
<cbc:PostalZone>80331</cbc:PostalZone>
<cac:Country><cbc:IdentificationCode>DE</cbc:IdentificationCode></cac:Country>
</cac:PostalAddress>
</cac:Party>
</cac:AccountingCustomerParty>
<cac:LegalMonetaryTotal>
<cbc:PayableAmount currencyID="EUR">100.00</cbc:PayableAmount>
</cac:LegalMonetaryTotal>
<cac:InvoiceLine>
<cbc:ID>1</cbc:ID>
<cbc:InvoicedQuantity unitCode="C62">1</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="EUR">100.00</cbc:LineExtensionAmount>
<cac:Item><cbc:Name>Item</cbc:Name></cac:Item>
<cac:Price><cbc:PriceAmount currencyID="EUR">100.00</cbc:PriceAmount></cac:Price>
</cac:InvoiceLine>
</Invoice>`;
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
await pdfDoc.attach(
Buffer.from(perfUbl, 'utf8'),
'invoice.xml',
{ mimeType: 'application/xml' }
);
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
const pdfBytes = await pdfDoc.save();
// Process PDF
const einvoice = await EInvoice.fromPdf(pdfBytes);
await einvoice.toXmlString('ubl');
const iterTime = performance.now() - iterStartTime;
processingTimes.push(iterTime);
console.log(`Iteration ${iteration + 1}: ${iterTime.toFixed(2)}ms`);
2025-05-28 18:46:18 +00:00
// Allow for cleanup between iterations
if (global.gc && iteration < 4) {
global.gc();
}
// Small delay to stabilize performance
await new Promise(resolve => setTimeout(resolve, 10));
2025-05-28 08:40:26 +00:00
}
// Check for performance degradation
const firstTime = processingTimes[0];
const lastTime = processingTimes[processingTimes.length - 1];
const degradation = ((lastTime - firstTime) / firstTime) * 100;
console.log(`Performance degradation: ${degradation.toFixed(2)}%`);
2025-05-28 18:46:18 +00:00
expect(Math.abs(degradation)).toBeLessThan(150); // Allow up to 150% variation for performance tests
2025-05-28 08:40:26 +00:00
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('degradation-test', elapsed);
});
2025-05-25 19:45:37 +00:00
2025-05-28 08:40:26 +00:00
tap.test('PDF-08: Performance Summary', async () => {
2025-05-25 19:45:37 +00:00
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
2025-05-28 08:40:26 +00:00
expect(avgTime).toBeLessThan(5000); // Large PDFs may take longer
console.log('PDF-08: Large PDF Performance tests completed');
2025-05-25 19:45:37 +00:00
});
tap.start();