import { tap } from '@git.zone/tstest/tapbundle'; import * as plugins from '../plugins.js'; import { EInvoice } from '../../../ts/index.js'; import { PerformanceTracker } from '../performance.tracker.js'; const performanceTracker = new PerformanceTracker('EDGE-05: Zero-Byte PDFs'); tap.test('EDGE-05: Zero-Byte PDFs - should handle zero-byte and minimal PDF files', async (t) => { const einvoice = new EInvoice(); // Test 1: Truly zero-byte PDF const zeroByteFile = await performanceTracker.measureAsync( 'truly-zero-byte-pdf', async () => { const zeroPDF = Buffer.alloc(0); try { const result = await einvoice.extractFromPDF(zeroPDF); return { handled: true, hasContent: !!result, hasXML: result?.xml !== undefined, hasAttachments: result?.attachments?.length > 0, error: null, bufferSize: zeroPDF.length }; } catch (error) { return { handled: true, hasContent: false, error: error.message, errorType: error.constructor.name, bufferSize: zeroPDF.length }; } } ); t.ok(zeroByteFile.handled, 'Zero-byte PDF was handled'); t.notOk(zeroByteFile.hasContent, 'Zero-byte PDF has no content'); t.equal(zeroByteFile.bufferSize, 0, 'Buffer size is zero'); // Test 2: Minimal PDF structure const minimalPDFStructure = await performanceTracker.measureAsync( 'minimal-pdf-structure', async () => { const minimalPDFs = [ { name: 'header-only', content: Buffer.from('%PDF-1.4') }, { name: 'header-and-eof', content: Buffer.from('%PDF-1.4\n%%EOF') }, { name: 'empty-catalog', content: Buffer.from( '%PDF-1.4\n' + '1 0 obj\n<< /Type /Catalog >>\nendobj\n' + 'xref\n0 2\n' + '0000000000 65535 f\n' + '0000000009 00000 n\n' + 'trailer\n<< /Size 2 /Root 1 0 R >>\n' + 'startxref\n64\n%%EOF' ) }, { name: 'single-empty-page', content: Buffer.from( '%PDF-1.4\n' + '1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n' + '2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n' + 'xref\n0 3\n' + '0000000000 65535 f\n' + '0000000009 00000 n\n' + '0000000052 00000 n\n' + 'trailer\n<< /Size 3 /Root 1 0 R >>\n' + 'startxref\n110\n%%EOF' ) } ]; const results = []; for (const pdf of minimalPDFs) { try { const result = await einvoice.extractFromPDF(pdf.content); results.push({ name: pdf.name, size: pdf.content.length, processed: true, hasXML: !!result?.xml, hasAttachments: result?.attachments?.length > 0, hasMetadata: !!result?.metadata }); } catch (error) { results.push({ name: pdf.name, size: pdf.content.length, processed: false, error: error.message }); } } return results; } ); minimalPDFStructure.forEach(result => { t.ok(result.processed || result.error, `Minimal PDF ${result.name} was processed`); t.notOk(result.hasXML, `Minimal PDF ${result.name} has no XML`); }); // Test 3: Truncated PDF files const truncatedPDFs = await performanceTracker.measureAsync( 'truncated-pdf-files', async () => { // Start with a valid PDF structure and truncate at different points const fullPDF = Buffer.from( '%PDF-1.4\n' + '1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n' + '2 0 obj\n<< /Type /Pages /Count 1 /Kids [3 0 R] >>\nendobj\n' + '3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\nendobj\n' + 'xref\n0 4\n' + '0000000000 65535 f\n' + '0000000009 00000 n\n' + '0000000052 00000 n\n' + '0000000110 00000 n\n' + 'trailer\n<< /Size 4 /Root 1 0 R >>\n' + 'startxref\n196\n%%EOF' ); const truncationPoints = [ { name: 'after-header', bytes: 10 }, { name: 'mid-object', bytes: 50 }, { name: 'before-xref', bytes: 150 }, { name: 'mid-xref', bytes: 250 }, { name: 'before-eof', bytes: fullPDF.length - 5 } ]; const results = []; for (const point of truncationPoints) { const truncated = fullPDF.slice(0, point.bytes); try { const result = await einvoice.extractFromPDF(truncated); results.push({ truncationPoint: point.name, size: truncated.length, recovered: true, hasPartialData: !!result }); } catch (error) { results.push({ truncationPoint: point.name, size: truncated.length, recovered: false, error: error.message, isCorruptionError: error.message.includes('corrupt') || error.message.includes('truncated') }); } } return results; } ); truncatedPDFs.forEach(result => { t.ok(!result.recovered || result.isCorruptionError, `Truncated PDF at ${result.truncationPoint} should fail or be detected as corrupt`); }); // Test 4: PDF with zero-byte attachment const zeroByteAttachment = await performanceTracker.measureAsync( 'pdf-with-zero-byte-attachment', async () => { // Create a PDF with an embedded file of zero bytes const pdfWithEmptyAttachment = Buffer.from( '%PDF-1.4\n' + '1 0 obj\n<< /Type /Catalog /Names 2 0 R >>\nendobj\n' + '2 0 obj\n<< /EmbeddedFiles 3 0 R >>\nendobj\n' + '3 0 obj\n<< /Names [(empty.xml) 4 0 R] >>\nendobj\n' + '4 0 obj\n<< /Type /Filespec /F (empty.xml) /EF << /F 5 0 R >> >>\nendobj\n' + '5 0 obj\n<< /Type /EmbeddedFile /Length 0 >>\nstream\n\nendstream\nendobj\n' + 'xref\n0 6\n' + '0000000000 65535 f\n' + '0000000009 00000 n\n' + '0000000062 00000 n\n' + '0000000103 00000 n\n' + '0000000151 00000 n\n' + '0000000229 00000 n\n' + 'trailer\n<< /Size 6 /Root 1 0 R >>\n' + 'startxref\n307\n%%EOF' ); try { const result = await einvoice.extractFromPDF(pdfWithEmptyAttachment); return { processed: true, hasAttachments: result?.attachments?.length > 0, attachmentCount: result?.attachments?.length || 0, firstAttachmentSize: result?.attachments?.[0]?.size || 0, firstAttachmentName: result?.attachments?.[0]?.name || null }; } catch (error) { return { processed: false, error: error.message }; } } ); t.ok(zeroByteAttachment.processed, 'PDF with zero-byte attachment was processed'); if (zeroByteAttachment.hasAttachments) { t.equal(zeroByteAttachment.firstAttachmentSize, 0, 'Attachment size is zero'); } // Test 5: PDF with only metadata const metadataOnlyPDF = await performanceTracker.measureAsync( 'pdf-with-only-metadata', async () => { const pdfWithMetadata = Buffer.from( '%PDF-1.4\n' + '1 0 obj\n<< /Type /Catalog /Metadata 2 0 R >>\nendobj\n' + '2 0 obj\n<< /Type /Metadata /Subtype /XML /Length 100 >>\n' + 'stream\n' + '\n' + 'endstream\nendobj\n' + 'xref\n0 3\n' + '0000000000 65535 f\n' + '0000000009 00000 n\n' + '0000000068 00000 n\n' + 'trailer\n<< /Size 3 /Root 1 0 R >>\n' + 'startxref\n259\n%%EOF' ); try { const result = await einvoice.extractFromPDF(pdfWithMetadata); return { processed: true, hasMetadata: !!result?.metadata, hasXML: !!result?.xml, hasContent: !!result?.content, isEmpty: !result?.xml && !result?.attachments?.length }; } catch (error) { return { processed: false, error: error.message }; } } ); t.ok(metadataOnlyPDF.processed, 'PDF with only metadata was processed'); t.ok(metadataOnlyPDF.isEmpty, 'PDF with only metadata has no invoice content'); // Test 6: Compressed empty streams const compressedEmptyStreams = await performanceTracker.measureAsync( 'compressed-empty-streams', async () => { const compressionMethods = [ { name: 'flate', filter: '/FlateDecode' }, { name: 'lzw', filter: '/LZWDecode' }, { name: 'ascii85', filter: '/ASCII85Decode' }, { name: 'asciihex', filter: '/ASCIIHexDecode' } ]; const results = []; for (const method of compressionMethods) { const pdf = Buffer.from( '%PDF-1.4\n' + `1 0 obj\n<< /Length 0 /Filter ${method.filter} >>\n` + 'stream\n\nendstream\nendobj\n' + 'xref\n0 2\n' + '0000000000 65535 f\n' + '0000000009 00000 n\n' + 'trailer\n<< /Size 2 >>\n' + 'startxref\n100\n%%EOF' ); try { const result = await einvoice.processPDFStream(pdf); results.push({ method: method.name, handled: true, decompressed: true }); } catch (error) { results.push({ method: method.name, handled: true, error: error.message }); } } return results; } ); compressedEmptyStreams.forEach(result => { t.ok(result.handled, `Empty ${result.method} stream was handled`); }); // Test 7: Zero-page PDF const zeroPagePDF = await performanceTracker.measureAsync( 'zero-page-pdf', async () => { const zeroPagesPDF = Buffer.from( '%PDF-1.4\n' + '1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n' + '2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n' + 'xref\n0 3\n' + '0000000000 65535 f\n' + '0000000009 00000 n\n' + '0000000058 00000 n\n' + 'trailer\n<< /Size 3 /Root 1 0 R >>\n' + 'startxref\n115\n%%EOF' ); try { const result = await einvoice.extractFromPDF(zeroPagesPDF); return { processed: true, pageCount: result?.pageCount || 0, hasContent: !!result?.content, canExtractXML: !!result?.xml }; } catch (error) { return { processed: false, error: error.message }; } } ); t.ok(zeroPagePDF.processed || zeroPagePDF.error, 'Zero-page PDF was handled'); if (zeroPagePDF.processed) { t.equal(zeroPagePDF.pageCount, 0, 'Page count is zero'); } // Test 8: PDF with empty form fields const emptyFormFields = await performanceTracker.measureAsync( 'pdf-with-empty-form-fields', async () => { const formPDF = Buffer.from( '%PDF-1.4\n' + '1 0 obj\n<< /Type /Catalog /AcroForm 2 0 R >>\nendobj\n' + '2 0 obj\n<< /Fields [] >>\nendobj\n' + 'xref\n0 3\n' + '0000000000 65535 f\n' + '0000000009 00000 n\n' + '0000000065 00000 n\n' + 'trailer\n<< /Size 3 /Root 1 0 R >>\n' + 'startxref\n100\n%%EOF' ); try { const result = await einvoice.extractFromPDF(formPDF); return { processed: true, hasForm: !!result?.form, formFieldCount: result?.form?.fields?.length || 0, hasData: !!result?.data }; } catch (error) { return { processed: false, error: error.message }; } } ); t.ok(emptyFormFields.processed, 'PDF with empty form fields was processed'); // Test 9: Recovery attempts on zero-byte files const recoveryAttempts = await performanceTracker.measureAsync( 'recovery-attempts-zero-byte', async () => { const corruptScenarios = [ { name: 'no-header', content: Buffer.from('This is not a PDF') }, { name: 'binary-garbage', content: Buffer.from([0xFF, 0xFE, 0xFD, 0xFC, 0x00, 0x01, 0x02, 0x03]) }, { name: 'html-instead', content: Buffer.from('Not a PDF') }, { name: 'partial-header', content: Buffer.from('%PDF-') }, { name: 'wrong-version', content: Buffer.from('%PDF-99.9\n%%EOF') } ]; const results = []; for (const scenario of corruptScenarios) { try { const result = await einvoice.extractFromPDF(scenario.content, { attemptRecovery: true }); results.push({ scenario: scenario.name, recovered: !!result, hasAnyData: !!result?.xml || !!result?.attachments?.length }); } catch (error) { results.push({ scenario: scenario.name, recovered: false, errorMessage: error.message, recognized: error.message.includes('PDF') || error.message.includes('format') }); } } return results; } ); recoveryAttempts.forEach(result => { t.ok(!result.recovered, `Recovery should fail for ${result.scenario}`); t.ok(result.recognized, `Error should recognize invalid PDF format`); }); // Test 10: Batch processing with zero-byte PDFs const batchWithZeroBytes = await performanceTracker.measureAsync( 'batch-processing-zero-byte', async () => { const batch = [ { name: 'normal', content: createValidPDF() }, { name: 'zero-byte', content: Buffer.alloc(0) }, { name: 'normal2', content: createValidPDF() }, { name: 'header-only', content: Buffer.from('%PDF-1.4') }, { name: 'normal3', content: createValidPDF() } ]; const results = { total: batch.length, successful: 0, failed: 0, skipped: 0, errors: [] }; for (const item of batch) { try { const result = await einvoice.extractFromPDF(item.content); if (result?.xml || result?.attachments?.length) { results.successful++; } else { results.skipped++; } } catch (error) { results.failed++; results.errors.push({ name: item.name, error: error.message }); } } return results; } ); t.equal(batchWithZeroBytes.total, batchWithZeroBytes.successful + batchWithZeroBytes.failed + batchWithZeroBytes.skipped, 'All batch items were processed'); t.ok(batchWithZeroBytes.failed > 0, 'Some zero-byte PDFs failed as expected'); // Print performance summary performanceTracker.printSummary(); }); // Helper function to create a valid PDF with invoice attachment function createValidPDF(): Buffer { return Buffer.from( '%PDF-1.4\n' + '1 0 obj\n<< /Type /Catalog /Names 2 0 R >>\nendobj\n' + '2 0 obj\n<< /EmbeddedFiles 3 0 R >>\nendobj\n' + '3 0 obj\n<< /Names [(invoice.xml) 4 0 R] >>\nendobj\n' + '4 0 obj\n<< /Type /Filespec /F (invoice.xml) /EF << /F 5 0 R >> >>\nendobj\n' + '5 0 obj\n<< /Type /EmbeddedFile /Length 50 >>\nstream\n' + 'TEST\n' + 'endstream\nendobj\n' + 'xref\n0 6\n' + '0000000000 65535 f\n' + '0000000009 00000 n\n' + '0000000062 00000 n\n' + '0000000103 00000 n\n' + '0000000151 00000 n\n' + '0000000229 00000 n\n' + 'trailer\n<< /Size 6 /Root 1 0 R >>\n' + 'startxref\n350\n%%EOF' ); } // Run the test tap.start();