update
This commit is contained in:
@ -0,0 +1,495 @@
|
||||
import { expect, tap } from '@git.zone/tstest/tapbundle';
|
||||
import * as plugins from '../plugins.js';
|
||||
import { EInvoice } from '../../../ts/index.js';
|
||||
import { CorpusLoader } from '../corpus.loader.js';
|
||||
import { PerformanceTracker } from '../performance.tracker.js';
|
||||
|
||||
tap.test('PDF-08: Large PDF Performance - should handle large PDFs efficiently', async (t) => {
|
||||
// PDF-08: Verify performance with large PDF files
|
||||
// This test ensures the system can handle large PDFs without memory issues
|
||||
|
||||
const performanceTracker = new PerformanceTracker('PDF-08: Large PDF Performance');
|
||||
const corpusLoader = new CorpusLoader();
|
||||
|
||||
t.test('Process PDFs of increasing size', async () => {
|
||||
const startTime = performance.now();
|
||||
|
||||
const { PDFDocument } = plugins;
|
||||
|
||||
// Test different PDF sizes
|
||||
const sizes = [
|
||||
{ pages: 1, name: '1-page', expectedTime: 100 },
|
||||
{ pages: 10, name: '10-page', expectedTime: 200 },
|
||||
{ pages: 50, name: '50-page', expectedTime: 500 },
|
||||
{ pages: 100, name: '100-page', expectedTime: 1000 }
|
||||
];
|
||||
|
||||
for (const sizeTest of sizes) {
|
||||
const sizeStartTime = performance.now();
|
||||
|
||||
const pdfDoc = await PDFDocument.create();
|
||||
|
||||
// Create multiple pages
|
||||
for (let i = 0; i < sizeTest.pages; i++) {
|
||||
const page = pdfDoc.addPage([595, 842]); // A4
|
||||
|
||||
// Add content to each page
|
||||
page.drawText(`Invoice Page ${i + 1} of ${sizeTest.pages}`, {
|
||||
x: 50,
|
||||
y: 750,
|
||||
size: 20
|
||||
});
|
||||
|
||||
// Add some graphics to increase file size
|
||||
page.drawRectangle({
|
||||
x: 50,
|
||||
y: 600,
|
||||
width: 495,
|
||||
height: 100,
|
||||
borderColor: { red: 0, green: 0, blue: 0 },
|
||||
borderWidth: 1
|
||||
});
|
||||
|
||||
// Add text content
|
||||
for (let j = 0; j < 20; j++) {
|
||||
page.drawText(`Line item ${j + 1}: Product description with details`, {
|
||||
x: 60,
|
||||
y: 580 - (j * 20),
|
||||
size: 10
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Add invoice XML
|
||||
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
|
||||
<ID>LARGE-PDF-${sizeTest.name}</ID>
|
||||
<IssueDate>2025-01-25</IssueDate>
|
||||
<Note>Test invoice for ${sizeTest.pages} page PDF</Note>
|
||||
<LineItemCount>${sizeTest.pages * 20}</LineItemCount>
|
||||
</Invoice>`;
|
||||
|
||||
await pdfDoc.attach(
|
||||
Buffer.from(xmlContent, 'utf8'),
|
||||
'invoice.xml',
|
||||
{
|
||||
mimeType: 'application/xml',
|
||||
description: `Invoice for ${sizeTest.pages} page document`
|
||||
}
|
||||
);
|
||||
|
||||
const pdfBytes = await pdfDoc.save();
|
||||
const sizeMB = (pdfBytes.length / 1024 / 1024).toFixed(2);
|
||||
|
||||
// Test extraction performance
|
||||
const extractStartTime = performance.now();
|
||||
const einvoice = new EInvoice();
|
||||
|
||||
try {
|
||||
await einvoice.loadFromPdfBuffer(pdfBytes);
|
||||
const xmlString = einvoice.getXmlString();
|
||||
expect(xmlString).toContain(`LARGE-PDF-${sizeTest.name}`);
|
||||
|
||||
const extractTime = performance.now() - extractStartTime;
|
||||
console.log(`${sizeTest.name} (${sizeMB} MB): Extraction took ${extractTime.toFixed(2)}ms`);
|
||||
|
||||
// Check if extraction time is reasonable
|
||||
expect(extractTime).toBeLessThan(sizeTest.expectedTime);
|
||||
} catch (error) {
|
||||
console.log(`${sizeTest.name} extraction error:`, error.message);
|
||||
}
|
||||
|
||||
const sizeElapsed = performance.now() - sizeStartTime;
|
||||
performanceTracker.addMeasurement(`size-${sizeTest.name}`, sizeElapsed);
|
||||
}
|
||||
|
||||
const elapsed = performance.now() - startTime;
|
||||
performanceTracker.addMeasurement('increasing-sizes', elapsed);
|
||||
});
|
||||
|
||||
t.test('Memory usage with large PDFs', async () => {
|
||||
const startTime = performance.now();
|
||||
|
||||
// Monitor memory usage
|
||||
const initialMemory = process.memoryUsage();
|
||||
console.log('Initial memory (MB):', {
|
||||
rss: (initialMemory.rss / 1024 / 1024).toFixed(2),
|
||||
heapUsed: (initialMemory.heapUsed / 1024 / 1024).toFixed(2)
|
||||
});
|
||||
|
||||
const { PDFDocument } = plugins;
|
||||
const pdfDoc = await PDFDocument.create();
|
||||
|
||||
// Create a large PDF with many objects
|
||||
const pageCount = 200;
|
||||
for (let i = 0; i < pageCount; i++) {
|
||||
const page = pdfDoc.addPage();
|
||||
|
||||
// Add many small objects to increase complexity
|
||||
for (let j = 0; j < 50; j++) {
|
||||
page.drawText(`Item ${i}-${j}`, {
|
||||
x: 50 + (j % 10) * 50,
|
||||
y: 700 - Math.floor(j / 10) * 20,
|
||||
size: 8
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Add large XML attachment
|
||||
let xmlContent = '<?xml version="1.0" encoding="UTF-8"?>\n<LargeInvoice>\n';
|
||||
for (let i = 0; i < 1000; i++) {
|
||||
xmlContent += ` <LineItem number="${i}">
|
||||
<Description>Product item with long description text that increases file size</Description>
|
||||
<Quantity>10</Quantity>
|
||||
<Price>99.99</Price>
|
||||
</LineItem>\n`;
|
||||
}
|
||||
xmlContent += '</LargeInvoice>';
|
||||
|
||||
await pdfDoc.attach(
|
||||
Buffer.from(xmlContent, 'utf8'),
|
||||
'large-invoice.xml',
|
||||
{
|
||||
mimeType: 'application/xml',
|
||||
description: 'Large invoice with many line items'
|
||||
}
|
||||
);
|
||||
|
||||
const pdfBytes = await pdfDoc.save();
|
||||
const sizeMB = (pdfBytes.length / 1024 / 1024).toFixed(2);
|
||||
console.log(`Created large PDF: ${sizeMB} MB`);
|
||||
|
||||
// Test memory usage during processing
|
||||
const einvoice = new EInvoice();
|
||||
await einvoice.loadFromPdfBuffer(pdfBytes);
|
||||
|
||||
const afterMemory = process.memoryUsage();
|
||||
console.log('After processing memory (MB):', {
|
||||
rss: (afterMemory.rss / 1024 / 1024).toFixed(2),
|
||||
heapUsed: (afterMemory.heapUsed / 1024 / 1024).toFixed(2)
|
||||
});
|
||||
|
||||
const memoryIncrease = afterMemory.heapUsed - initialMemory.heapUsed;
|
||||
console.log(`Memory increase: ${(memoryIncrease / 1024 / 1024).toFixed(2)} MB`);
|
||||
|
||||
// Force garbage collection if available
|
||||
if (global.gc) {
|
||||
global.gc();
|
||||
const gcMemory = process.memoryUsage();
|
||||
console.log('After GC memory (MB):', {
|
||||
heapUsed: (gcMemory.heapUsed / 1024 / 1024).toFixed(2)
|
||||
});
|
||||
}
|
||||
|
||||
const elapsed = performance.now() - startTime;
|
||||
performanceTracker.addMeasurement('memory-usage', elapsed);
|
||||
});
|
||||
|
||||
t.test('Streaming vs loading performance', async () => {
|
||||
const startTime = performance.now();
|
||||
|
||||
const { PDFDocument } = plugins;
|
||||
|
||||
// Create a moderately large PDF
|
||||
const pdfDoc = await PDFDocument.create();
|
||||
for (let i = 0; i < 50; i++) {
|
||||
const page = pdfDoc.addPage();
|
||||
page.drawText(`Page ${i + 1}`, { x: 50, y: 700, size: 20 });
|
||||
}
|
||||
|
||||
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Invoice><ID>STREAM-TEST</ID></Invoice>`;
|
||||
|
||||
await pdfDoc.attach(
|
||||
Buffer.from(xmlContent, 'utf8'),
|
||||
'invoice.xml',
|
||||
{ mimeType: 'application/xml' }
|
||||
);
|
||||
|
||||
const pdfBytes = await pdfDoc.save();
|
||||
|
||||
// Test full loading
|
||||
const loadStartTime = performance.now();
|
||||
const einvoice1 = new EInvoice();
|
||||
await einvoice1.loadFromPdfBuffer(pdfBytes);
|
||||
const loadTime = performance.now() - loadStartTime;
|
||||
|
||||
console.log(`Full loading time: ${loadTime.toFixed(2)}ms`);
|
||||
|
||||
// Note: Actual streaming would require stream API support
|
||||
// This is a placeholder for streaming performance comparison
|
||||
console.log('Streaming API would potentially reduce memory usage for large files');
|
||||
|
||||
const elapsed = performance.now() - startTime;
|
||||
performanceTracker.addMeasurement('streaming-comparison', elapsed);
|
||||
});
|
||||
|
||||
t.test('Concurrent large PDF processing', async () => {
|
||||
const startTime = performance.now();
|
||||
|
||||
const { PDFDocument } = plugins;
|
||||
|
||||
// Create multiple PDFs for concurrent processing
|
||||
const createPdf = async (id: string, pages: number) => {
|
||||
const pdfDoc = await PDFDocument.create();
|
||||
|
||||
for (let i = 0; i < pages; i++) {
|
||||
const page = pdfDoc.addPage();
|
||||
page.drawText(`Document ${id} - Page ${i + 1}`, { x: 50, y: 700, size: 16 });
|
||||
}
|
||||
|
||||
await pdfDoc.attach(
|
||||
Buffer.from(`<Invoice><ID>${id}</ID></Invoice>`, 'utf8'),
|
||||
'invoice.xml',
|
||||
{ mimeType: 'application/xml' }
|
||||
);
|
||||
|
||||
return pdfDoc.save();
|
||||
};
|
||||
|
||||
// Create PDFs
|
||||
const pdfPromises = [
|
||||
createPdf('PDF-A', 30),
|
||||
createPdf('PDF-B', 40),
|
||||
createPdf('PDF-C', 50),
|
||||
createPdf('PDF-D', 60)
|
||||
];
|
||||
|
||||
const pdfs = await Promise.all(pdfPromises);
|
||||
|
||||
// Process concurrently
|
||||
const concurrentStartTime = performance.now();
|
||||
|
||||
const processPromises = pdfs.map(async (pdfBytes, index) => {
|
||||
const einvoice = new EInvoice();
|
||||
await einvoice.loadFromPdfBuffer(pdfBytes);
|
||||
return einvoice.getXmlString();
|
||||
});
|
||||
|
||||
const results = await Promise.all(processPromises);
|
||||
const concurrentTime = performance.now() - concurrentStartTime;
|
||||
|
||||
expect(results.length).toBe(4);
|
||||
results.forEach((xml, index) => {
|
||||
expect(xml).toContain(`PDF-${String.fromCharCode(65 + index)}`);
|
||||
});
|
||||
|
||||
console.log(`Concurrent processing of 4 PDFs: ${concurrentTime.toFixed(2)}ms`);
|
||||
|
||||
const elapsed = performance.now() - startTime;
|
||||
performanceTracker.addMeasurement('concurrent-processing', elapsed);
|
||||
});
|
||||
|
||||
t.test('Large PDF with complex structure', async () => {
|
||||
const startTime = performance.now();
|
||||
|
||||
const { PDFDocument } = plugins;
|
||||
const pdfDoc = await PDFDocument.create();
|
||||
|
||||
// Create complex structure with forms, annotations, etc.
|
||||
const formPage = pdfDoc.addPage();
|
||||
|
||||
// Add form fields (simplified - actual forms require more setup)
|
||||
formPage.drawText('Invoice Form', { x: 50, y: 750, size: 24 });
|
||||
formPage.drawRectangle({
|
||||
x: 50,
|
||||
y: 700,
|
||||
width: 200,
|
||||
height: 30,
|
||||
borderColor: { red: 0, green: 0, blue: 0.5 },
|
||||
borderWidth: 1
|
||||
});
|
||||
formPage.drawText('Invoice Number:', { x: 55, y: 710, size: 12 });
|
||||
|
||||
// Add multiple embedded files
|
||||
const attachments = [
|
||||
{ name: 'invoice.xml', size: 10000 },
|
||||
{ name: 'terms.pdf', size: 50000 },
|
||||
{ name: 'logo.png', size: 20000 }
|
||||
];
|
||||
|
||||
for (const att of attachments) {
|
||||
const content = Buffer.alloc(att.size, 'A'); // Dummy content
|
||||
await pdfDoc.attach(content, att.name, {
|
||||
mimeType: att.name.endsWith('.xml') ? 'application/xml' : 'application/octet-stream',
|
||||
description: `Attachment: ${att.name}`
|
||||
});
|
||||
}
|
||||
|
||||
// Add many pages with different content types
|
||||
for (let i = 0; i < 25; i++) {
|
||||
const page = pdfDoc.addPage();
|
||||
|
||||
// Alternate between text-heavy and graphic-heavy pages
|
||||
if (i % 2 === 0) {
|
||||
// Text-heavy page
|
||||
for (let j = 0; j < 40; j++) {
|
||||
page.drawText(`Line ${j + 1}: Lorem ipsum dolor sit amet, consectetur adipiscing elit.`, {
|
||||
x: 50,
|
||||
y: 750 - (j * 18),
|
||||
size: 10
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// Graphic-heavy page
|
||||
for (let j = 0; j < 10; j++) {
|
||||
for (let k = 0; k < 10; k++) {
|
||||
page.drawRectangle({
|
||||
x: 50 + (k * 50),
|
||||
y: 700 - (j * 50),
|
||||
width: 45,
|
||||
height: 45,
|
||||
color: {
|
||||
red: Math.random(),
|
||||
green: Math.random(),
|
||||
blue: Math.random()
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const pdfBytes = await pdfDoc.save();
|
||||
const sizeMB = (pdfBytes.length / 1024 / 1024).toFixed(2);
|
||||
console.log(`Complex PDF size: ${sizeMB} MB`);
|
||||
|
||||
// Test processing
|
||||
const processStartTime = performance.now();
|
||||
const einvoice = new EInvoice();
|
||||
|
||||
try {
|
||||
await einvoice.loadFromPdfBuffer(pdfBytes);
|
||||
const processTime = performance.now() - processStartTime;
|
||||
console.log(`Complex PDF processed in: ${processTime.toFixed(2)}ms`);
|
||||
} catch (error) {
|
||||
console.log('Complex PDF processing error:', error.message);
|
||||
}
|
||||
|
||||
const elapsed = performance.now() - startTime;
|
||||
performanceTracker.addMeasurement('complex-structure', elapsed);
|
||||
});
|
||||
|
||||
t.test('Corpus large PDF analysis', async () => {
|
||||
const startTime = performance.now();
|
||||
let largeFileCount = 0;
|
||||
let totalSize = 0;
|
||||
let processedCount = 0;
|
||||
const sizeDistribution = {
|
||||
small: 0, // < 100KB
|
||||
medium: 0, // 100KB - 1MB
|
||||
large: 0, // 1MB - 10MB
|
||||
veryLarge: 0 // > 10MB
|
||||
};
|
||||
|
||||
const files = await corpusLoader.getAllFiles();
|
||||
const pdfFiles = files.filter(f => f.endsWith('.pdf'));
|
||||
|
||||
for (const file of pdfFiles) {
|
||||
try {
|
||||
const content = await corpusLoader.readFile(file);
|
||||
const sizeMB = content.length / 1024 / 1024;
|
||||
totalSize += content.length;
|
||||
|
||||
if (content.length < 100 * 1024) {
|
||||
sizeDistribution.small++;
|
||||
} else if (content.length < 1024 * 1024) {
|
||||
sizeDistribution.medium++;
|
||||
} else if (content.length < 10 * 1024 * 1024) {
|
||||
sizeDistribution.large++;
|
||||
largeFileCount++;
|
||||
} else {
|
||||
sizeDistribution.veryLarge++;
|
||||
largeFileCount++;
|
||||
}
|
||||
|
||||
// Test large file processing
|
||||
if (sizeMB > 1) {
|
||||
const testStartTime = performance.now();
|
||||
const einvoice = new EInvoice();
|
||||
|
||||
try {
|
||||
await einvoice.loadFromPdfBuffer(content);
|
||||
const testTime = performance.now() - testStartTime;
|
||||
console.log(`Large file ${file} (${sizeMB.toFixed(2)} MB) processed in ${testTime.toFixed(2)}ms`);
|
||||
} catch (error) {
|
||||
console.log(`Large file ${file} processing failed:`, error.message);
|
||||
}
|
||||
}
|
||||
|
||||
processedCount++;
|
||||
} catch (error) {
|
||||
console.log(`Error reading ${file}:`, error.message);
|
||||
}
|
||||
}
|
||||
|
||||
const avgSize = totalSize / processedCount / 1024;
|
||||
console.log(`Corpus PDF analysis (${processedCount} files):`);
|
||||
console.log(`- Average size: ${avgSize.toFixed(2)} KB`);
|
||||
console.log(`- Large files (>1MB): ${largeFileCount}`);
|
||||
console.log('Size distribution:', sizeDistribution);
|
||||
|
||||
const elapsed = performance.now() - startTime;
|
||||
performanceTracker.addMeasurement('corpus-large-pdfs', elapsed);
|
||||
});
|
||||
|
||||
t.test('Performance degradation test', async () => {
|
||||
const startTime = performance.now();
|
||||
|
||||
const { PDFDocument } = plugins;
|
||||
const processingTimes: number[] = [];
|
||||
|
||||
// Test if performance degrades with repeated operations
|
||||
for (let iteration = 0; iteration < 5; iteration++) {
|
||||
const iterStartTime = performance.now();
|
||||
|
||||
// Create PDF
|
||||
const pdfDoc = await PDFDocument.create();
|
||||
for (let i = 0; i < 20; i++) {
|
||||
const page = pdfDoc.addPage();
|
||||
page.drawText(`Iteration ${iteration + 1} - Page ${i + 1}`, {
|
||||
x: 50,
|
||||
y: 700,
|
||||
size: 16
|
||||
});
|
||||
}
|
||||
|
||||
await pdfDoc.attach(
|
||||
Buffer.from(`<Invoice><ID>PERF-${iteration}</ID></Invoice>`, 'utf8'),
|
||||
'invoice.xml',
|
||||
{ mimeType: 'application/xml' }
|
||||
);
|
||||
|
||||
const pdfBytes = await pdfDoc.save();
|
||||
|
||||
// Process PDF
|
||||
const einvoice = new EInvoice();
|
||||
await einvoice.loadFromPdfBuffer(pdfBytes);
|
||||
einvoice.getXmlString();
|
||||
|
||||
const iterTime = performance.now() - iterStartTime;
|
||||
processingTimes.push(iterTime);
|
||||
console.log(`Iteration ${iteration + 1}: ${iterTime.toFixed(2)}ms`);
|
||||
}
|
||||
|
||||
// Check for performance degradation
|
||||
const firstTime = processingTimes[0];
|
||||
const lastTime = processingTimes[processingTimes.length - 1];
|
||||
const degradation = ((lastTime - firstTime) / firstTime) * 100;
|
||||
|
||||
console.log(`Performance degradation: ${degradation.toFixed(2)}%`);
|
||||
expect(Math.abs(degradation)).toBeLessThan(50); // Allow up to 50% variation
|
||||
|
||||
const elapsed = performance.now() - startTime;
|
||||
performanceTracker.addMeasurement('degradation-test', elapsed);
|
||||
});
|
||||
|
||||
// Print performance summary
|
||||
performanceTracker.printSummary();
|
||||
|
||||
// Performance assertions
|
||||
const avgTime = performanceTracker.getAverageTime();
|
||||
expect(avgTime).toBeLessThan(2000); // Large PDFs may take longer
|
||||
});
|
||||
|
||||
tap.start();
|
Reference in New Issue
Block a user