495 lines
16 KiB
TypeScript
495 lines
16 KiB
TypeScript
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
|
import * as plugins from '../plugins.js';
|
|
import { EInvoice } from '../../../ts/index.js';
|
|
import { CorpusLoader } from '../corpus.loader.js';
|
|
import { PerformanceTracker } from '../performance.tracker.js';
|
|
|
|
tap.test('PDF-08: Large PDF Performance - should handle large PDFs efficiently', async (t) => {
|
|
// PDF-08: Verify performance with large PDF files
|
|
// This test ensures the system can handle large PDFs without memory issues
|
|
|
|
const performanceTracker = new PerformanceTracker('PDF-08: Large PDF Performance');
|
|
const corpusLoader = new CorpusLoader();
|
|
|
|
t.test('Process PDFs of increasing size', async () => {
|
|
const startTime = performance.now();
|
|
|
|
const { PDFDocument } = plugins;
|
|
|
|
// Test different PDF sizes
|
|
const sizes = [
|
|
{ pages: 1, name: '1-page', expectedTime: 100 },
|
|
{ pages: 10, name: '10-page', expectedTime: 200 },
|
|
{ pages: 50, name: '50-page', expectedTime: 500 },
|
|
{ pages: 100, name: '100-page', expectedTime: 1000 }
|
|
];
|
|
|
|
for (const sizeTest of sizes) {
|
|
const sizeStartTime = performance.now();
|
|
|
|
const pdfDoc = await PDFDocument.create();
|
|
|
|
// Create multiple pages
|
|
for (let i = 0; i < sizeTest.pages; i++) {
|
|
const page = pdfDoc.addPage([595, 842]); // A4
|
|
|
|
// Add content to each page
|
|
page.drawText(`Invoice Page ${i + 1} of ${sizeTest.pages}`, {
|
|
x: 50,
|
|
y: 750,
|
|
size: 20
|
|
});
|
|
|
|
// Add some graphics to increase file size
|
|
page.drawRectangle({
|
|
x: 50,
|
|
y: 600,
|
|
width: 495,
|
|
height: 100,
|
|
borderColor: { red: 0, green: 0, blue: 0 },
|
|
borderWidth: 1
|
|
});
|
|
|
|
// Add text content
|
|
for (let j = 0; j < 20; j++) {
|
|
page.drawText(`Line item ${j + 1}: Product description with details`, {
|
|
x: 60,
|
|
y: 580 - (j * 20),
|
|
size: 10
|
|
});
|
|
}
|
|
}
|
|
|
|
// Add invoice XML
|
|
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
|
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
|
<ID>LARGE-PDF-${sizeTest.name}</ID>
|
|
<IssueDate>2025-01-25</IssueDate>
|
|
<Note>Test invoice for ${sizeTest.pages} page PDF</Note>
|
|
<LineItemCount>${sizeTest.pages * 20}</LineItemCount>
|
|
</Invoice>`;
|
|
|
|
await pdfDoc.attach(
|
|
Buffer.from(xmlContent, 'utf8'),
|
|
'invoice.xml',
|
|
{
|
|
mimeType: 'application/xml',
|
|
description: `Invoice for ${sizeTest.pages} page document`
|
|
}
|
|
);
|
|
|
|
const pdfBytes = await pdfDoc.save();
|
|
const sizeMB = (pdfBytes.length / 1024 / 1024).toFixed(2);
|
|
|
|
// Test extraction performance
|
|
const extractStartTime = performance.now();
|
|
const einvoice = new EInvoice();
|
|
|
|
try {
|
|
await einvoice.loadFromPdfBuffer(pdfBytes);
|
|
const xmlString = einvoice.getXmlString();
|
|
expect(xmlString).toContain(`LARGE-PDF-${sizeTest.name}`);
|
|
|
|
const extractTime = performance.now() - extractStartTime;
|
|
console.log(`${sizeTest.name} (${sizeMB} MB): Extraction took ${extractTime.toFixed(2)}ms`);
|
|
|
|
// Check if extraction time is reasonable
|
|
expect(extractTime).toBeLessThan(sizeTest.expectedTime);
|
|
} catch (error) {
|
|
console.log(`${sizeTest.name} extraction error:`, error.message);
|
|
}
|
|
|
|
const sizeElapsed = performance.now() - sizeStartTime;
|
|
performanceTracker.addMeasurement(`size-${sizeTest.name}`, sizeElapsed);
|
|
}
|
|
|
|
const elapsed = performance.now() - startTime;
|
|
performanceTracker.addMeasurement('increasing-sizes', elapsed);
|
|
});
|
|
|
|
t.test('Memory usage with large PDFs', async () => {
|
|
const startTime = performance.now();
|
|
|
|
// Monitor memory usage
|
|
const initialMemory = process.memoryUsage();
|
|
console.log('Initial memory (MB):', {
|
|
rss: (initialMemory.rss / 1024 / 1024).toFixed(2),
|
|
heapUsed: (initialMemory.heapUsed / 1024 / 1024).toFixed(2)
|
|
});
|
|
|
|
const { PDFDocument } = plugins;
|
|
const pdfDoc = await PDFDocument.create();
|
|
|
|
// Create a large PDF with many objects
|
|
const pageCount = 200;
|
|
for (let i = 0; i < pageCount; i++) {
|
|
const page = pdfDoc.addPage();
|
|
|
|
// Add many small objects to increase complexity
|
|
for (let j = 0; j < 50; j++) {
|
|
page.drawText(`Item ${i}-${j}`, {
|
|
x: 50 + (j % 10) * 50,
|
|
y: 700 - Math.floor(j / 10) * 20,
|
|
size: 8
|
|
});
|
|
}
|
|
}
|
|
|
|
// Add large XML attachment
|
|
let xmlContent = '<?xml version="1.0" encoding="UTF-8"?>\n<LargeInvoice>\n';
|
|
for (let i = 0; i < 1000; i++) {
|
|
xmlContent += ` <LineItem number="${i}">
|
|
<Description>Product item with long description text that increases file size</Description>
|
|
<Quantity>10</Quantity>
|
|
<Price>99.99</Price>
|
|
</LineItem>\n`;
|
|
}
|
|
xmlContent += '</LargeInvoice>';
|
|
|
|
await pdfDoc.attach(
|
|
Buffer.from(xmlContent, 'utf8'),
|
|
'large-invoice.xml',
|
|
{
|
|
mimeType: 'application/xml',
|
|
description: 'Large invoice with many line items'
|
|
}
|
|
);
|
|
|
|
const pdfBytes = await pdfDoc.save();
|
|
const sizeMB = (pdfBytes.length / 1024 / 1024).toFixed(2);
|
|
console.log(`Created large PDF: ${sizeMB} MB`);
|
|
|
|
// Test memory usage during processing
|
|
const einvoice = new EInvoice();
|
|
await einvoice.loadFromPdfBuffer(pdfBytes);
|
|
|
|
const afterMemory = process.memoryUsage();
|
|
console.log('After processing memory (MB):', {
|
|
rss: (afterMemory.rss / 1024 / 1024).toFixed(2),
|
|
heapUsed: (afterMemory.heapUsed / 1024 / 1024).toFixed(2)
|
|
});
|
|
|
|
const memoryIncrease = afterMemory.heapUsed - initialMemory.heapUsed;
|
|
console.log(`Memory increase: ${(memoryIncrease / 1024 / 1024).toFixed(2)} MB`);
|
|
|
|
// Force garbage collection if available
|
|
if (global.gc) {
|
|
global.gc();
|
|
const gcMemory = process.memoryUsage();
|
|
console.log('After GC memory (MB):', {
|
|
heapUsed: (gcMemory.heapUsed / 1024 / 1024).toFixed(2)
|
|
});
|
|
}
|
|
|
|
const elapsed = performance.now() - startTime;
|
|
performanceTracker.addMeasurement('memory-usage', elapsed);
|
|
});
|
|
|
|
t.test('Streaming vs loading performance', async () => {
|
|
const startTime = performance.now();
|
|
|
|
const { PDFDocument } = plugins;
|
|
|
|
// Create a moderately large PDF
|
|
const pdfDoc = await PDFDocument.create();
|
|
for (let i = 0; i < 50; i++) {
|
|
const page = pdfDoc.addPage();
|
|
page.drawText(`Page ${i + 1}`, { x: 50, y: 700, size: 20 });
|
|
}
|
|
|
|
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
|
<Invoice><ID>STREAM-TEST</ID></Invoice>`;
|
|
|
|
await pdfDoc.attach(
|
|
Buffer.from(xmlContent, 'utf8'),
|
|
'invoice.xml',
|
|
{ mimeType: 'application/xml' }
|
|
);
|
|
|
|
const pdfBytes = await pdfDoc.save();
|
|
|
|
// Test full loading
|
|
const loadStartTime = performance.now();
|
|
const einvoice1 = new EInvoice();
|
|
await einvoice1.loadFromPdfBuffer(pdfBytes);
|
|
const loadTime = performance.now() - loadStartTime;
|
|
|
|
console.log(`Full loading time: ${loadTime.toFixed(2)}ms`);
|
|
|
|
// Note: Actual streaming would require stream API support
|
|
// This is a placeholder for streaming performance comparison
|
|
console.log('Streaming API would potentially reduce memory usage for large files');
|
|
|
|
const elapsed = performance.now() - startTime;
|
|
performanceTracker.addMeasurement('streaming-comparison', elapsed);
|
|
});
|
|
|
|
t.test('Concurrent large PDF processing', async () => {
|
|
const startTime = performance.now();
|
|
|
|
const { PDFDocument } = plugins;
|
|
|
|
// Create multiple PDFs for concurrent processing
|
|
const createPdf = async (id: string, pages: number) => {
|
|
const pdfDoc = await PDFDocument.create();
|
|
|
|
for (let i = 0; i < pages; i++) {
|
|
const page = pdfDoc.addPage();
|
|
page.drawText(`Document ${id} - Page ${i + 1}`, { x: 50, y: 700, size: 16 });
|
|
}
|
|
|
|
await pdfDoc.attach(
|
|
Buffer.from(`<Invoice><ID>${id}</ID></Invoice>`, 'utf8'),
|
|
'invoice.xml',
|
|
{ mimeType: 'application/xml' }
|
|
);
|
|
|
|
return pdfDoc.save();
|
|
};
|
|
|
|
// Create PDFs
|
|
const pdfPromises = [
|
|
createPdf('PDF-A', 30),
|
|
createPdf('PDF-B', 40),
|
|
createPdf('PDF-C', 50),
|
|
createPdf('PDF-D', 60)
|
|
];
|
|
|
|
const pdfs = await Promise.all(pdfPromises);
|
|
|
|
// Process concurrently
|
|
const concurrentStartTime = performance.now();
|
|
|
|
const processPromises = pdfs.map(async (pdfBytes, index) => {
|
|
const einvoice = new EInvoice();
|
|
await einvoice.loadFromPdfBuffer(pdfBytes);
|
|
return einvoice.getXmlString();
|
|
});
|
|
|
|
const results = await Promise.all(processPromises);
|
|
const concurrentTime = performance.now() - concurrentStartTime;
|
|
|
|
expect(results.length).toBe(4);
|
|
results.forEach((xml, index) => {
|
|
expect(xml).toContain(`PDF-${String.fromCharCode(65 + index)}`);
|
|
});
|
|
|
|
console.log(`Concurrent processing of 4 PDFs: ${concurrentTime.toFixed(2)}ms`);
|
|
|
|
const elapsed = performance.now() - startTime;
|
|
performanceTracker.addMeasurement('concurrent-processing', elapsed);
|
|
});
|
|
|
|
t.test('Large PDF with complex structure', async () => {
|
|
const startTime = performance.now();
|
|
|
|
const { PDFDocument } = plugins;
|
|
const pdfDoc = await PDFDocument.create();
|
|
|
|
// Create complex structure with forms, annotations, etc.
|
|
const formPage = pdfDoc.addPage();
|
|
|
|
// Add form fields (simplified - actual forms require more setup)
|
|
formPage.drawText('Invoice Form', { x: 50, y: 750, size: 24 });
|
|
formPage.drawRectangle({
|
|
x: 50,
|
|
y: 700,
|
|
width: 200,
|
|
height: 30,
|
|
borderColor: { red: 0, green: 0, blue: 0.5 },
|
|
borderWidth: 1
|
|
});
|
|
formPage.drawText('Invoice Number:', { x: 55, y: 710, size: 12 });
|
|
|
|
// Add multiple embedded files
|
|
const attachments = [
|
|
{ name: 'invoice.xml', size: 10000 },
|
|
{ name: 'terms.pdf', size: 50000 },
|
|
{ name: 'logo.png', size: 20000 }
|
|
];
|
|
|
|
for (const att of attachments) {
|
|
const content = Buffer.alloc(att.size, 'A'); // Dummy content
|
|
await pdfDoc.attach(content, att.name, {
|
|
mimeType: att.name.endsWith('.xml') ? 'application/xml' : 'application/octet-stream',
|
|
description: `Attachment: ${att.name}`
|
|
});
|
|
}
|
|
|
|
// Add many pages with different content types
|
|
for (let i = 0; i < 25; i++) {
|
|
const page = pdfDoc.addPage();
|
|
|
|
// Alternate between text-heavy and graphic-heavy pages
|
|
if (i % 2 === 0) {
|
|
// Text-heavy page
|
|
for (let j = 0; j < 40; j++) {
|
|
page.drawText(`Line ${j + 1}: Lorem ipsum dolor sit amet, consectetur adipiscing elit.`, {
|
|
x: 50,
|
|
y: 750 - (j * 18),
|
|
size: 10
|
|
});
|
|
}
|
|
} else {
|
|
// Graphic-heavy page
|
|
for (let j = 0; j < 10; j++) {
|
|
for (let k = 0; k < 10; k++) {
|
|
page.drawRectangle({
|
|
x: 50 + (k * 50),
|
|
y: 700 - (j * 50),
|
|
width: 45,
|
|
height: 45,
|
|
color: {
|
|
red: Math.random(),
|
|
green: Math.random(),
|
|
blue: Math.random()
|
|
}
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const pdfBytes = await pdfDoc.save();
|
|
const sizeMB = (pdfBytes.length / 1024 / 1024).toFixed(2);
|
|
console.log(`Complex PDF size: ${sizeMB} MB`);
|
|
|
|
// Test processing
|
|
const processStartTime = performance.now();
|
|
const einvoice = new EInvoice();
|
|
|
|
try {
|
|
await einvoice.loadFromPdfBuffer(pdfBytes);
|
|
const processTime = performance.now() - processStartTime;
|
|
console.log(`Complex PDF processed in: ${processTime.toFixed(2)}ms`);
|
|
} catch (error) {
|
|
console.log('Complex PDF processing error:', error.message);
|
|
}
|
|
|
|
const elapsed = performance.now() - startTime;
|
|
performanceTracker.addMeasurement('complex-structure', elapsed);
|
|
});
|
|
|
|
t.test('Corpus large PDF analysis', async () => {
|
|
const startTime = performance.now();
|
|
let largeFileCount = 0;
|
|
let totalSize = 0;
|
|
let processedCount = 0;
|
|
const sizeDistribution = {
|
|
small: 0, // < 100KB
|
|
medium: 0, // 100KB - 1MB
|
|
large: 0, // 1MB - 10MB
|
|
veryLarge: 0 // > 10MB
|
|
};
|
|
|
|
const files = await corpusLoader.getAllFiles();
|
|
const pdfFiles = files.filter(f => f.endsWith('.pdf'));
|
|
|
|
for (const file of pdfFiles) {
|
|
try {
|
|
const content = await corpusLoader.readFile(file);
|
|
const sizeMB = content.length / 1024 / 1024;
|
|
totalSize += content.length;
|
|
|
|
if (content.length < 100 * 1024) {
|
|
sizeDistribution.small++;
|
|
} else if (content.length < 1024 * 1024) {
|
|
sizeDistribution.medium++;
|
|
} else if (content.length < 10 * 1024 * 1024) {
|
|
sizeDistribution.large++;
|
|
largeFileCount++;
|
|
} else {
|
|
sizeDistribution.veryLarge++;
|
|
largeFileCount++;
|
|
}
|
|
|
|
// Test large file processing
|
|
if (sizeMB > 1) {
|
|
const testStartTime = performance.now();
|
|
const einvoice = new EInvoice();
|
|
|
|
try {
|
|
await einvoice.loadFromPdfBuffer(content);
|
|
const testTime = performance.now() - testStartTime;
|
|
console.log(`Large file ${file} (${sizeMB.toFixed(2)} MB) processed in ${testTime.toFixed(2)}ms`);
|
|
} catch (error) {
|
|
console.log(`Large file ${file} processing failed:`, error.message);
|
|
}
|
|
}
|
|
|
|
processedCount++;
|
|
} catch (error) {
|
|
console.log(`Error reading ${file}:`, error.message);
|
|
}
|
|
}
|
|
|
|
const avgSize = totalSize / processedCount / 1024;
|
|
console.log(`Corpus PDF analysis (${processedCount} files):`);
|
|
console.log(`- Average size: ${avgSize.toFixed(2)} KB`);
|
|
console.log(`- Large files (>1MB): ${largeFileCount}`);
|
|
console.log('Size distribution:', sizeDistribution);
|
|
|
|
const elapsed = performance.now() - startTime;
|
|
performanceTracker.addMeasurement('corpus-large-pdfs', elapsed);
|
|
});
|
|
|
|
t.test('Performance degradation test', async () => {
|
|
const startTime = performance.now();
|
|
|
|
const { PDFDocument } = plugins;
|
|
const processingTimes: number[] = [];
|
|
|
|
// Test if performance degrades with repeated operations
|
|
for (let iteration = 0; iteration < 5; iteration++) {
|
|
const iterStartTime = performance.now();
|
|
|
|
// Create PDF
|
|
const pdfDoc = await PDFDocument.create();
|
|
for (let i = 0; i < 20; i++) {
|
|
const page = pdfDoc.addPage();
|
|
page.drawText(`Iteration ${iteration + 1} - Page ${i + 1}`, {
|
|
x: 50,
|
|
y: 700,
|
|
size: 16
|
|
});
|
|
}
|
|
|
|
await pdfDoc.attach(
|
|
Buffer.from(`<Invoice><ID>PERF-${iteration}</ID></Invoice>`, 'utf8'),
|
|
'invoice.xml',
|
|
{ mimeType: 'application/xml' }
|
|
);
|
|
|
|
const pdfBytes = await pdfDoc.save();
|
|
|
|
// Process PDF
|
|
const einvoice = new EInvoice();
|
|
await einvoice.loadFromPdfBuffer(pdfBytes);
|
|
einvoice.getXmlString();
|
|
|
|
const iterTime = performance.now() - iterStartTime;
|
|
processingTimes.push(iterTime);
|
|
console.log(`Iteration ${iteration + 1}: ${iterTime.toFixed(2)}ms`);
|
|
}
|
|
|
|
// Check for performance degradation
|
|
const firstTime = processingTimes[0];
|
|
const lastTime = processingTimes[processingTimes.length - 1];
|
|
const degradation = ((lastTime - firstTime) / firstTime) * 100;
|
|
|
|
console.log(`Performance degradation: ${degradation.toFixed(2)}%`);
|
|
expect(Math.abs(degradation)).toBeLessThan(50); // Allow up to 50% variation
|
|
|
|
const elapsed = performance.now() - startTime;
|
|
performanceTracker.addMeasurement('degradation-test', elapsed);
|
|
});
|
|
|
|
// Print performance summary
|
|
performanceTracker.printSummary();
|
|
|
|
// Performance assertions
|
|
const avgTime = performanceTracker.getAverageTime();
|
|
expect(avgTime).toBeLessThan(2000); // Large PDFs may take longer
|
|
});
|
|
|
|
tap.start(); |