einvoice/test/suite/einvoice_edge-cases/test.edge-05.zero-byte-pdf.ts

import { tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { PerformanceTracker } from '../performance.tracker.js';

const performanceTracker = new PerformanceTracker('EDGE-05: Zero-Byte PDFs');

tap.test('EDGE-05: Zero-Byte PDFs - should handle zero-byte and minimal PDF files', async (t) => {
  const einvoice = new EInvoice();

  // Test 1: Truly zero-byte PDF
  const zeroByteFile = await performanceTracker.measureAsync(
    'truly-zero-byte-pdf',
    async () => {
      const zeroPDF = Buffer.alloc(0);

      try {
        const result = await einvoice.extractFromPDF(zeroPDF);

        return {
          handled: true,
          hasContent: !!result,
          hasXML: result?.xml !== undefined,
          hasAttachments: result?.attachments?.length > 0,
          error: null,
          bufferSize: zeroPDF.length
        };
      } catch (error) {
        return {
          handled: true,
          hasContent: false,
          error: error.message,
          errorType: error.constructor.name,
          bufferSize: zeroPDF.length
        };
      }
    }
  );

  t.ok(zeroByteFile.handled, 'Zero-byte PDF was handled');
  t.notOk(zeroByteFile.hasContent, 'Zero-byte PDF has no content');
  t.equal(zeroByteFile.bufferSize, 0, 'Buffer size is zero');

  // Test 2: Minimal PDF structure
  const minimalPDFStructure = await performanceTracker.measureAsync(
    'minimal-pdf-structure',
    async () => {
      const minimalPDFs = [
        {
          name: 'header-only',
          content: Buffer.from('%PDF-1.4')
        },
        {
          name: 'header-and-eof',
          content: Buffer.from('%PDF-1.4\n%%EOF')
        },
        {
          name: 'empty-catalog',
          content: Buffer.from(
            '%PDF-1.4\n' +
            '1 0 obj\n<< /Type /Catalog >>\nendobj\n' +
            'xref\n0 2\n' +
            '0000000000 65535 f\n' +
            '0000000009 00000 n\n' +
            'trailer\n<< /Size 2 /Root 1 0 R >>\n' +
            'startxref\n64\n%%EOF'
          )
        },
        {
          name: 'single-empty-page',
          content: Buffer.from(
            '%PDF-1.4\n' +
            '1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n' +
            '2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n' +
            'xref\n0 3\n' +
            '0000000000 65535 f\n' +
            '0000000009 00000 n\n' +
            '0000000052 00000 n\n' +
            'trailer\n<< /Size 3 /Root 1 0 R >>\n' +
            'startxref\n110\n%%EOF'
          )
        }
      ];

      const results = [];

      for (const pdf of minimalPDFs) {
        try {
          const result = await einvoice.extractFromPDF(pdf.content);

          results.push({
            name: pdf.name,
            size: pdf.content.length,
            processed: true,
            hasXML: !!result?.xml,
            hasAttachments: result?.attachments?.length > 0,
            hasMetadata: !!result?.metadata
          });
        } catch (error) {
          results.push({
            name: pdf.name,
            size: pdf.content.length,
            processed: false,
            error: error.message
          });
        }
      }

      return results;
    }
  );

  minimalPDFStructure.forEach(result => {
    t.ok(result.processed || result.error, `Minimal PDF ${result.name} was processed`);
    t.notOk(result.hasXML, `Minimal PDF ${result.name} has no XML`);
  });

  // Test 3: Truncated PDF files
  const truncatedPDFs = await performanceTracker.measureAsync(
    'truncated-pdf-files',
    async () => {
      // Start with a valid PDF structure and truncate at different points
      const fullPDF = Buffer.from(
        '%PDF-1.4\n' +
        '1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n' +
        '2 0 obj\n<< /Type /Pages /Count 1 /Kids [3 0 R] >>\nendobj\n' +
        '3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\nendobj\n' +
        'xref\n0 4\n' +
        '0000000000 65535 f\n' +
        '0000000009 00000 n\n' +
        '0000000052 00000 n\n' +
        '0000000110 00000 n\n' +
        'trailer\n<< /Size 4 /Root 1 0 R >>\n' +
        'startxref\n196\n%%EOF'
      );

      const truncationPoints = [
        { name: 'after-header', bytes: 10 },
        { name: 'mid-object', bytes: 50 },
        { name: 'before-xref', bytes: 150 },
        { name: 'mid-xref', bytes: 250 },
        { name: 'before-eof', bytes: fullPDF.length - 5 }
      ];

      const results = [];

      for (const point of truncationPoints) {
        const truncated = fullPDF.slice(0, point.bytes);

        try {
          const result = await einvoice.extractFromPDF(truncated);

          results.push({
            truncationPoint: point.name,
            size: truncated.length,
            recovered: true,
            hasPartialData: !!result
          });
        } catch (error) {
          results.push({
            truncationPoint: point.name,
            size: truncated.length,
            recovered: false,
            error: error.message,
            isCorruptionError: error.message.includes('corrupt') || error.message.includes('truncated')
          });
        }
      }

      return results;
    }
  );

  truncatedPDFs.forEach(result => {
    t.ok(!result.recovered || result.isCorruptionError,
      `Truncated PDF at ${result.truncationPoint} should fail or be detected as corrupt`);
  });

  // Test 4: PDF with zero-byte attachment
  const zeroByteAttachment = await performanceTracker.measureAsync(
    'pdf-with-zero-byte-attachment',
    async () => {
      // Create a PDF with an embedded file of zero bytes
      const pdfWithEmptyAttachment = Buffer.from(
        '%PDF-1.4\n' +
        '1 0 obj\n<< /Type /Catalog /Names 2 0 R >>\nendobj\n' +
        '2 0 obj\n<< /EmbeddedFiles 3 0 R >>\nendobj\n' +
        '3 0 obj\n<< /Names [(empty.xml) 4 0 R] >>\nendobj\n' +
        '4 0 obj\n<< /Type /Filespec /F (empty.xml) /EF << /F 5 0 R >> >>\nendobj\n' +
        '5 0 obj\n<< /Type /EmbeddedFile /Length 0 >>\nstream\n\nendstream\nendobj\n' +
        'xref\n0 6\n' +
        '0000000000 65535 f\n' +
        '0000000009 00000 n\n' +
        '0000000062 00000 n\n' +
        '0000000103 00000 n\n' +
        '0000000151 00000 n\n' +
        '0000000229 00000 n\n' +
        'trailer\n<< /Size 6 /Root 1 0 R >>\n' +
        'startxref\n307\n%%EOF'
      );

      try {
        const result = await einvoice.extractFromPDF(pdfWithEmptyAttachment);

        return {
          processed: true,
          hasAttachments: result?.attachments?.length > 0,
          attachmentCount: result?.attachments?.length || 0,
          firstAttachmentSize: result?.attachments?.[0]?.size || 0,
          firstAttachmentName: result?.attachments?.[0]?.name || null
        };
      } catch (error) {
        return {
          processed: false,
          error: error.message
        };
      }
    }
  );

  t.ok(zeroByteAttachment.processed, 'PDF with zero-byte attachment was processed');
  if (zeroByteAttachment.hasAttachments) {
    t.equal(zeroByteAttachment.firstAttachmentSize, 0, 'Attachment size is zero');
  }

  // Test 5: PDF with only metadata
  const metadataOnlyPDF = await performanceTracker.measureAsync(
    'pdf-with-only-metadata',
    async () => {
      const pdfWithMetadata = Buffer.from(
        '%PDF-1.4\n' +
        '1 0 obj\n<< /Type /Catalog /Metadata 2 0 R >>\nendobj\n' +
        '2 0 obj\n<< /Type /Metadata /Subtype /XML /Length 100 >>\n' +
        'stream\n' +
        '<?xml version="1.0"?><x:xmpmeta xmlns:x="adobe:ns:meta/"><rdf:RDF></rdf:RDF></x:xmpmeta>\n' +
        'endstream\nendobj\n' +
        'xref\n0 3\n' +
        '0000000000 65535 f\n' +
        '0000000009 00000 n\n' +
        '0000000068 00000 n\n' +
        'trailer\n<< /Size 3 /Root 1 0 R >>\n' +
        'startxref\n259\n%%EOF'
      );

      try {
        const result = await einvoice.extractFromPDF(pdfWithMetadata);

        return {
          processed: true,
          hasMetadata: !!result?.metadata,
          hasXML: !!result?.xml,
          hasContent: !!result?.content,
          isEmpty: !result?.xml && !result?.attachments?.length
        };
      } catch (error) {
        return {
          processed: false,
          error: error.message
        };
      }
    }
  );

  t.ok(metadataOnlyPDF.processed, 'PDF with only metadata was processed');
  t.ok(metadataOnlyPDF.isEmpty, 'PDF with only metadata has no invoice content');

  // Test 6: Compressed empty streams
  const compressedEmptyStreams = await performanceTracker.measureAsync(
    'compressed-empty-streams',
    async () => {
      const compressionMethods = [
        { name: 'flate', filter: '/FlateDecode' },
        { name: 'lzw', filter: '/LZWDecode' },
        { name: 'ascii85', filter: '/ASCII85Decode' },
        { name: 'asciihex', filter: '/ASCIIHexDecode' }
      ];

      const results = [];

      for (const method of compressionMethods) {
        const pdf = Buffer.from(
          '%PDF-1.4\n' +
          `1 0 obj\n<< /Length 0 /Filter ${method.filter} >>\n` +
          'stream\n\nendstream\nendobj\n' +
          'xref\n0 2\n' +
          '0000000000 65535 f\n' +
          '0000000009 00000 n\n' +
          'trailer\n<< /Size 2 >>\n' +
          'startxref\n100\n%%EOF'
        );

        try {
          const result = await einvoice.processPDFStream(pdf);

          results.push({
            method: method.name,
            handled: true,
            decompressed: true
          });
        } catch (error) {
          results.push({
            method: method.name,
            handled: true,
            error: error.message
          });
        }
      }

      return results;
    }
  );

  compressedEmptyStreams.forEach(result => {
    t.ok(result.handled, `Empty ${result.method} stream was handled`);
  });

  // Test 7: Zero-page PDF
  const zeroPagePDF = await performanceTracker.measureAsync(
    'zero-page-pdf',
    async () => {
      const zeroPagesPDF = Buffer.from(
        '%PDF-1.4\n' +
        '1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n' +
        '2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n' +
        'xref\n0 3\n' +
        '0000000000 65535 f\n' +
        '0000000009 00000 n\n' +
        '0000000058 00000 n\n' +
        'trailer\n<< /Size 3 /Root 1 0 R >>\n' +
        'startxref\n115\n%%EOF'
      );

      try {
        const result = await einvoice.extractFromPDF(zeroPagesPDF);

        return {
          processed: true,
          pageCount: result?.pageCount || 0,
          hasContent: !!result?.content,
          canExtractXML: !!result?.xml
        };
      } catch (error) {
        return {
          processed: false,
          error: error.message
        };
      }
    }
  );

  t.ok(zeroPagePDF.processed || zeroPagePDF.error, 'Zero-page PDF was handled');
  if (zeroPagePDF.processed) {
    t.equal(zeroPagePDF.pageCount, 0, 'Page count is zero');
  }

  // Test 8: PDF with empty form fields
  const emptyFormFields = await performanceTracker.measureAsync(
    'pdf-with-empty-form-fields',
    async () => {
      const formPDF = Buffer.from(
        '%PDF-1.4\n' +
        '1 0 obj\n<< /Type /Catalog /AcroForm 2 0 R >>\nendobj\n' +
        '2 0 obj\n<< /Fields [] >>\nendobj\n' +
        'xref\n0 3\n' +
        '0000000000 65535 f\n' +
        '0000000009 00000 n\n' +
        '0000000065 00000 n\n' +
        'trailer\n<< /Size 3 /Root 1 0 R >>\n' +
        'startxref\n100\n%%EOF'
      );

      try {
        const result = await einvoice.extractFromPDF(formPDF);

        return {
          processed: true,
          hasForm: !!result?.form,
          formFieldCount: result?.form?.fields?.length || 0,
          hasData: !!result?.data
        };
      } catch (error) {
        return {
          processed: false,
          error: error.message
        };
      }
    }
  );

  t.ok(emptyFormFields.processed, 'PDF with empty form fields was processed');

  // Test 9: Recovery attempts on zero-byte files
  const recoveryAttempts = await performanceTracker.measureAsync(
    'recovery-attempts-zero-byte',
    async () => {
      const corruptScenarios = [
        {
          name: 'no-header',
          content: Buffer.from('This is not a PDF')
        },
        {
          name: 'binary-garbage',
          content: Buffer.from([0xFF, 0xFE, 0xFD, 0xFC, 0x00, 0x01, 0x02, 0x03])
        },
        {
          name: 'html-instead',
          content: Buffer.from('<html><body>Not a PDF</body></html>')
        },
        {
          name: 'partial-header',
          content: Buffer.from('%PDF-')
        },
        {
          name: 'wrong-version',
          content: Buffer.from('%PDF-99.9\n%%EOF')
        }
      ];

      const results = [];

      for (const scenario of corruptScenarios) {
        try {
          const result = await einvoice.extractFromPDF(scenario.content, {
            attemptRecovery: true
          });

          results.push({
            scenario: scenario.name,
            recovered: !!result,
            hasAnyData: !!result?.xml || !!result?.attachments?.length
          });
        } catch (error) {
          results.push({
            scenario: scenario.name,
            recovered: false,
            errorMessage: error.message,
            recognized: error.message.includes('PDF') || error.message.includes('format')
          });
        }
      }

      return results;
    }
  );

  recoveryAttempts.forEach(result => {
    t.ok(!result.recovered, `Recovery should fail for ${result.scenario}`);
    t.ok(result.recognized, `Error should recognize invalid PDF format`);
  });

  // Test 10: Batch processing with zero-byte PDFs
  const batchWithZeroBytes = await performanceTracker.measureAsync(
    'batch-processing-zero-byte',
    async () => {
      const batch = [
        { name: 'normal', content: createValidPDF() },
        { name: 'zero-byte', content: Buffer.alloc(0) },
        { name: 'normal2', content: createValidPDF() },
        { name: 'header-only', content: Buffer.from('%PDF-1.4') },
        { name: 'normal3', content: createValidPDF() }
      ];

      const results = {
        total: batch.length,
        successful: 0,
        failed: 0,
        skipped: 0,
        errors: []
      };

      for (const item of batch) {
        try {
          const result = await einvoice.extractFromPDF(item.content);

          if (result?.xml || result?.attachments?.length) {
            results.successful++;
          } else {
            results.skipped++;
          }
        } catch (error) {
          results.failed++;
          results.errors.push({
            name: item.name,
            error: error.message
          });
        }
      }

      return results;
    }
  );

  t.equal(batchWithZeroBytes.total,
    batchWithZeroBytes.successful + batchWithZeroBytes.failed + batchWithZeroBytes.skipped,
    'All batch items were processed');
  t.ok(batchWithZeroBytes.failed > 0, 'Some zero-byte PDFs failed as expected');

  // Print performance summary
  performanceTracker.printSummary();
});

// Helper function to create a valid PDF with invoice attachment
function createValidPDF(): Buffer {
  return Buffer.from(
    '%PDF-1.4\n' +
    '1 0 obj\n<< /Type /Catalog /Names 2 0 R >>\nendobj\n' +
    '2 0 obj\n<< /EmbeddedFiles 3 0 R >>\nendobj\n' +
    '3 0 obj\n<< /Names [(invoice.xml) 4 0 R] >>\nendobj\n' +
    '4 0 obj\n<< /Type /Filespec /F (invoice.xml) /EF << /F 5 0 R >> >>\nendobj\n' +
    '5 0 obj\n<< /Type /EmbeddedFile /Length 50 >>\nstream\n' +
    '<?xml version="1.0"?><Invoice><ID>TEST</ID></Invoice>\n' +
    'endstream\nendobj\n' +
    'xref\n0 6\n' +
    '0000000000 65535 f\n' +
    '0000000009 00000 n\n' +
    '0000000062 00000 n\n' +
    '0000000103 00000 n\n' +
    '0000000151 00000 n\n' +
    '0000000229 00000 n\n' +
    'trailer\n<< /Size 6 /Root 1 0 R >>\n' +
    'startxref\n350\n%%EOF'
  );
}

// Run the test
tap.start();