import { expect, tap } from '@git.zone/tstest/tapbundle'; import * as plugins from '../plugins.js'; import { EInvoice } from '../../../ts/index.js'; import { CorpusLoader } from '../corpus.loader.js'; import { PerformanceTracker } from '../performance.tracker.js'; tap.test('PDF-06: Multiple Attachments - should handle PDFs with multiple embedded files', async (t) => { // PDF-06: Verify handling of PDFs containing multiple attachments // This test ensures proper extraction and management of multiple embedded files const performanceTracker = new PerformanceTracker('PDF-06: Multiple Attachments'); const corpusLoader = new CorpusLoader(); t.test('Detect multiple attachments in PDF', async () => { const startTime = performance.now(); // Create a test PDF with multiple attachments const { PDFDocument, PDFName, AFRelationship } = plugins; const pdfDoc = await PDFDocument.create(); // Add first page const page = pdfDoc.addPage([595, 842]); // A4 page.drawText('Invoice with Multiple Attachments', { x: 50, y: 750, size: 20 }); // Add multiple XML attachments const attachments = [ { name: 'invoice.xml', content: ` MULTI-ATTACH-001 2025-01-25 Main invoice document `, relationship: AFRelationship.Data, description: 'Main invoice XML' }, { name: 'supplementary.xml', content: ` MULTI-ATTACH-001 Extra invoice details `, relationship: AFRelationship.Supplement, description: 'Supplementary invoice data' }, { name: 'signature.xml', content: ` abc123... `, relationship: AFRelationship.Source, description: 'Digital signature' } ]; // Embed each attachment for (const attachment of attachments) { await pdfDoc.attach( Buffer.from(attachment.content, 'utf8'), attachment.name, { mimeType: 'application/xml', description: attachment.description, creationDate: new Date(), modificationDate: new Date(), afRelationship: attachment.relationship } ); } // Add metadata pdfDoc.setTitle('Multi-attachment Invoice'); pdfDoc.setSubject('Invoice with multiple embedded files'); pdfDoc.setKeywords(['invoice', 'multiple-attachments', 'xml']); // Save PDF const pdfBytes = await pdfDoc.save(); // Test extraction const einvoice = new EInvoice(); try { await einvoice.loadFromPdfBuffer(pdfBytes); // Check if multiple attachments are detected // Note: The API might not expose all attachments directly const xmlContent = einvoice.getXmlString(); expect(xmlContent).toContain('MULTI-ATTACH-001'); console.log('Successfully extracted primary attachment from multi-attachment PDF'); } catch (error) { console.log('Multi-attachment extraction not fully supported:', error.message); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('detect-multiple', elapsed); }); t.test('Extract all attachments from PDF', async () => { const startTime = performance.now(); // Create PDF with various attachment types const { PDFDocument } = plugins; const pdfDoc = await PDFDocument.create(); const page = pdfDoc.addPage(); // Different file types as attachments const mixedAttachments = [ { name: 'invoice_data.xml', content: 'TEST-001', mimeType: 'application/xml' }, { name: 'invoice_image.txt', content: 'BASE64_ENCODED_IMAGE_DATA_HERE', mimeType: 'text/plain' }, { name: 'invoice_style.css', content: '.invoice { font-family: Arial; }', mimeType: 'text/css' }, { name: 'invoice_meta.json', content: '{"version":"1.0","format":"UBL"}', mimeType: 'application/json' } ]; for (const attach of mixedAttachments) { await pdfDoc.attach( Buffer.from(attach.content, 'utf8'), attach.name, { mimeType: attach.mimeType, description: `${attach.name} attachment` } ); } const pdfBytes = await pdfDoc.save(); // Test if we can identify all attachments const einvoice = new EInvoice(); try { await einvoice.loadFromPdfBuffer(pdfBytes); // The library might only extract XML attachments console.log('Extracted attachment from PDF with mixed file types'); } catch (error) { console.log('Mixed attachment handling:', error.message); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('extract-all', elapsed); }); t.test('Handle attachment relationships', async () => { const startTime = performance.now(); const { PDFDocument, AFRelationship } = plugins; const pdfDoc = await PDFDocument.create(); const page = pdfDoc.addPage(); // Test different AFRelationship types const relationshipTests = [ { rel: AFRelationship.Source, desc: 'Source document' }, { rel: AFRelationship.Data, desc: 'Data file' }, { rel: AFRelationship.Alternative, desc: 'Alternative representation' }, { rel: AFRelationship.Supplement, desc: 'Supplementary data' }, { rel: AFRelationship.Unspecified, desc: 'Unspecified relationship' } ]; for (const test of relationshipTests) { const xmlContent = ` ${test.rel} `; await pdfDoc.attach( Buffer.from(xmlContent, 'utf8'), `${test.rel}_document.xml`, { mimeType: 'application/xml', description: test.desc, afRelationship: test.rel } ); } const pdfBytes = await pdfDoc.save(); expect(pdfBytes.length).toBeGreaterThan(0); console.log('Created PDF with various attachment relationships'); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('relationships', elapsed); }); t.test('Attachment size limits', async () => { const startTime = performance.now(); const { PDFDocument } = plugins; const pdfDoc = await PDFDocument.create(); const page = pdfDoc.addPage(); // Test with increasingly large attachments const sizes = [ { size: 1024, name: '1KB' }, // 1 KB { size: 10 * 1024, name: '10KB' }, // 10 KB { size: 100 * 1024, name: '100KB' }, // 100 KB { size: 1024 * 1024, name: '1MB' } // 1 MB ]; for (const sizeTest of sizes) { // Generate XML content of specified size let content = '\n\n'; const padding = ''; while (content.length < sizeTest.size - 100) { content += padding + 'x'.repeat(80) + '\n'; } content += ''; try { await pdfDoc.attach( Buffer.from(content, 'utf8'), `large_${sizeTest.name}.xml`, { mimeType: 'application/xml', description: `Large attachment test ${sizeTest.name}` } ); console.log(`Successfully attached ${sizeTest.name} file`); } catch (error) { console.log(`Failed to attach ${sizeTest.name}:`, error.message); } } const pdfBytes = await pdfDoc.save(); console.log(`Final PDF size with attachments: ${(pdfBytes.length / 1024).toFixed(2)} KB`); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('size-limits', elapsed); }); t.test('Duplicate attachment names', async () => { const startTime = performance.now(); const { PDFDocument } = plugins; const pdfDoc = await PDFDocument.create(); const page = pdfDoc.addPage(); // Try to add multiple attachments with same name const attachmentName = 'invoice.xml'; const versions = [ { content: '', desc: 'Version 1.0' }, { content: '', desc: 'Version 2.0' }, { content: '', desc: 'Version 3.0' } ]; for (const version of versions) { try { await pdfDoc.attach( Buffer.from(version.content, 'utf8'), attachmentName, { mimeType: 'application/xml', description: version.desc } ); console.log(`Attached: ${version.desc}`); } catch (error) { console.log(`Duplicate name handling for ${version.desc}:`, error.message); } } const pdfBytes = await pdfDoc.save(); // Check if duplicates are handled const einvoice = new EInvoice(); try { await einvoice.loadFromPdfBuffer(pdfBytes); console.log('Handled PDF with duplicate attachment names'); } catch (error) { console.log('Duplicate name error:', error.message); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('duplicate-names', elapsed); }); t.test('Corpus PDFs with multiple attachments', async () => { const startTime = performance.now(); let multiAttachmentCount = 0; let processedCount = 0; const files = await corpusLoader.getAllFiles(); const pdfFiles = files.filter(f => f.endsWith('.pdf')); // Sample PDFs to check for multiple attachments const sampleSize = Math.min(30, pdfFiles.length); const sample = pdfFiles.slice(0, sampleSize); for (const file of sample) { try { const content = await corpusLoader.readFile(file); const einvoice = new EInvoice(); // Try to load and check for attachments try { await einvoice.loadFromPdfBuffer(content); // Check if PDF might have multiple attachments // This is approximate since we can't directly query attachment count const pdfString = content.toString('binary'); const attachmentMatches = pdfString.match(/\/EmbeddedFiles/g); if (attachmentMatches && attachmentMatches.length > 1) { multiAttachmentCount++; console.log(`Multiple attachments detected in: ${file}`); } } catch (error) { // Skip PDFs that can't be processed } processedCount++; } catch (error) { console.log(`Error reading ${file}:`, error.message); } } console.log(`Corpus analysis: ${multiAttachmentCount}/${processedCount} PDFs may have multiple attachments`); const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('corpus-multi-attach', elapsed); }); t.test('Attachment extraction order', async () => { const startTime = performance.now(); const { PDFDocument, AFRelationship } = plugins; const pdfDoc = await PDFDocument.create(); const page = pdfDoc.addPage(); // Add attachments in specific order const orderedAttachments = [ { name: '1_first.xml', priority: 'high', afRel: AFRelationship.Data }, { name: '2_second.xml', priority: 'medium', afRel: AFRelationship.Supplement }, { name: '3_third.xml', priority: 'low', afRel: AFRelationship.Alternative } ]; for (const attach of orderedAttachments) { const content = ` ${attach.name} ${attach.priority} `; await pdfDoc.attach( Buffer.from(content, 'utf8'), attach.name, { mimeType: 'application/xml', description: `Priority: ${attach.priority}`, afRelationship: attach.afRel } ); } const pdfBytes = await pdfDoc.save(); // Test extraction order const einvoice = new EInvoice(); try { await einvoice.loadFromPdfBuffer(pdfBytes); // Check which attachment was extracted const xmlContent = einvoice.getXmlString(); console.log('Extraction order test completed'); // Library likely extracts based on AFRelationship priority if (xmlContent.includes('1_first.xml')) { console.log('Extracted primary (Data) attachment first'); } } catch (error) { console.log('Order extraction error:', error.message); } const elapsed = performance.now() - startTime; performanceTracker.addMeasurement('extraction-order', elapsed); }); // Print performance summary performanceTracker.printSummary(); // Performance assertions const avgTime = performanceTracker.getAverageTime(); expect(avgTime).toBeLessThan(500); // Multiple attachments may take longer }); tap.start();