einvoice/test/suite/einvoice_edge-cases/test.edge-02.gigabyte-files.ts

import { tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { PerformanceTracker } from '../performance.tracker.js';
import * as fs from 'fs';
import * as path from 'path';

const performanceTracker = new PerformanceTracker('EDGE-02: Gigabyte-Size Invoices');

tap.test('EDGE-02: Gigabyte-Size Invoices - should handle extremely large invoice files', async (t) => {
  const einvoice = new EInvoice();

  // Test 1: Large number of line items
  const manyLineItems = await performanceTracker.measureAsync(
    'many-line-items',
    async () => {
      // Create invoice with 100,000 line items (simulated)
      const lineItemCount = 100000;
      const chunkSize = 1000;

      const header = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
  <ID>LARGE-001</ID>
  <IssueDate>2024-01-01</IssueDate>
  <InvoiceLines>`;

      const footer = `  </InvoiceLines>
  <TotalAmount>1000000.00</TotalAmount>
</Invoice>`;

      // Simulate streaming parse
      const startTime = Date.now();
      const startMemory = process.memoryUsage();

      try {
        // In real implementation, would stream parse
        const mockStream = {
          header,
          lineItemCount,
          footer,
          processed: 0
        };

        // Process in chunks
        while (mockStream.processed < lineItemCount) {
          const batchSize = Math.min(chunkSize, lineItemCount - mockStream.processed);

          // Simulate processing chunk
          for (let i = 0; i < batchSize; i++) {
            const itemNum = mockStream.processed + i;
            // Would normally append to stream: generateLineItem(itemNum)
          }

          mockStream.processed += batchSize;

          // Check memory usage
          const currentMemory = process.memoryUsage();
          if (currentMemory.heapUsed - startMemory.heapUsed > 500 * 1024 * 1024) {
            throw new Error('Memory limit exceeded');
          }
        }

        const endTime = Date.now();
        const endMemory = process.memoryUsage();

        return {
          success: true,
          lineItems: lineItemCount,
          timeTaken: endTime - startTime,
          memoryUsed: endMemory.heapUsed - startMemory.heapUsed,
          throughput: lineItemCount / ((endTime - startTime) / 1000)
        };
      } catch (error) {
        return {
          success: false,
          error: error.message,
          lineItems: mockStream?.processed || 0
        };
      }
    }
  );

  t.ok(manyLineItems.success || manyLineItems.error, 'Large line item count was processed');

  // Test 2: Large text content
  const largeTextContent = await performanceTracker.measureAsync(
    'large-text-content',
    async () => {
      // Create invoice with very large description fields
      const descriptionSize = 10 * 1024 * 1024; // 10MB per description
      const itemCount = 10;

      const results = {
        totalSize: 0,
        processed: 0,
        memoryPeaks: []
      };

      try {
        for (let i = 0; i < itemCount; i++) {
          const largeDescription = 'A'.repeat(descriptionSize);
          const xml = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice>
  <ID>LARGE-TEXT-${i}</ID>
  <Description>${largeDescription}</Description>
</Invoice>`;

          const memBefore = process.memoryUsage().heapUsed;

          // Process with streaming if available
          const processed = await einvoice.parseWithStreaming(xml);

          const memAfter = process.memoryUsage().heapUsed;
          results.memoryPeaks.push(memAfter - memBefore);

          results.totalSize += xml.length;
          results.processed++;

          // Force GC between items if available
          if (global.gc) {
            global.gc();
          }
        }

        return {
          success: true,
          ...results,
          avgMemoryPerItem: results.memoryPeaks.reduce((a, b) => a + b, 0) / results.memoryPeaks.length
        };
      } catch (error) {
        return {
          success: false,
          error: error.message,
          ...results
        };
      }
    }
  );

  t.ok(largeTextContent.processed > 0, 'Large text content was processed');

  // Test 3: Streaming vs loading entire file
  const streamingComparison = await performanceTracker.measureAsync(
    'streaming-vs-loading',
    async () => {
      const testSizes = [
        { size: 1 * 1024 * 1024, name: '1MB' },
        { size: 10 * 1024 * 1024, name: '10MB' },
        { size: 100 * 1024 * 1024, name: '100MB' }
      ];

      const results = [];

      for (const test of testSizes) {
        // Generate test data
        const testXML = generateLargeInvoice(test.size);

        // Test full loading
        let fullLoadResult;
        try {
          const startTime = Date.now();
          const startMem = process.memoryUsage();

          await einvoice.parseDocument(testXML);

          const endTime = Date.now();
          const endMem = process.memoryUsage();

          fullLoadResult = {
            method: 'full-load',
            success: true,
            time: endTime - startTime,
            memory: endMem.heapUsed - startMem.heapUsed
          };
        } catch (error) {
          fullLoadResult = {
            method: 'full-load',
            success: false,
            error: error.message
          };
        }

        // Test streaming
        let streamResult;
        try {
          const startTime = Date.now();
          const startMem = process.memoryUsage();

          await einvoice.parseWithStreaming(testXML);

          const endTime = Date.now();
          const endMem = process.memoryUsage();

          streamResult = {
            method: 'streaming',
            success: true,
            time: endTime - startTime,
            memory: endMem.heapUsed - startMem.heapUsed
          };
        } catch (error) {
          streamResult = {
            method: 'streaming',
            success: false,
            error: error.message
          };
        }

        results.push({
          size: test.name,
          fullLoad: fullLoadResult,
          streaming: streamResult,
          memoryRatio: streamResult.memory && fullLoadResult.memory ?
            streamResult.memory / fullLoadResult.memory : null
        });
      }

      return results;
    }
  );

  streamingComparison.forEach(result => {
    if (result.streaming.success && result.fullLoad.success) {
      t.ok(result.memoryRatio < 0.5,
        `Streaming uses less memory for ${result.size}`);
    }
  });

  // Test 4: Memory-mapped file processing
  const memoryMappedProcessing = await performanceTracker.measureAsync(
    'memory-mapped-processing',
    async () => {
      const testFile = path.join(process.cwd(), '.nogit', 'large-test.xml');
      const fileSize = 500 * 1024 * 1024; // 500MB

      try {
        // Create large test file if it doesn't exist
        if (!fs.existsSync(testFile)) {
          const dir = path.dirname(testFile);
          if (!fs.existsSync(dir)) {
            fs.mkdirSync(dir, { recursive: true });
          }

          // Write file in chunks
          const stream = fs.createWriteStream(testFile);
          stream.write('<?xml version="1.0" encoding="UTF-8"?><Invoice><Items>');

          const chunkSize = 1024 * 1024; // 1MB chunks
          const chunk = '<Item>' + 'X'.repeat(chunkSize - 14) + '</Item>';
          const chunks = Math.floor(fileSize / chunkSize);

          for (let i = 0; i < chunks; i++) {
            stream.write(chunk);
          }

          stream.write('</Items></Invoice>');
          stream.end();
        }

        // Process with memory mapping
        const startTime = Date.now();
        const startMem = process.memoryUsage();

        const result = await einvoice.processLargeFile(testFile, {
          useMemoryMapping: true,
          chunkSize: 10 * 1024 * 1024 // 10MB chunks
        });

        const endTime = Date.now();
        const endMem = process.memoryUsage();

        // Clean up
        if (fs.existsSync(testFile)) {
          fs.unlinkSync(testFile);
        }

        return {
          success: true,
          fileSize,
          timeTaken: endTime - startTime,
          memoryUsed: endMem.heapUsed - startMem.heapUsed,
          throughputMBps: (fileSize / (1024 * 1024)) / ((endTime - startTime) / 1000)
        };
      } catch (error) {
        // Clean up on error
        if (fs.existsSync(testFile)) {
          fs.unlinkSync(testFile);
        }

        return {
          success: false,
          error: error.message
        };
      }
    }
  );

  t.ok(memoryMappedProcessing.success || memoryMappedProcessing.error,
    'Memory-mapped processing completed');

  // Test 5: Concurrent large file processing
  const concurrentLargeFiles = await performanceTracker.measureAsync(
    'concurrent-large-files',
    async () => {
      const fileCount = 5;
      const fileSize = 50 * 1024 * 1024; // 50MB each

      const promises = [];
      const startTime = Date.now();
      const startMem = process.memoryUsage();

      for (let i = 0; i < fileCount; i++) {
        const xml = generateLargeInvoice(fileSize);

        promises.push(
          einvoice.parseWithStreaming(xml)
            .then(() => ({ fileId: i, success: true }))
            .catch(error => ({ fileId: i, success: false, error: error.message }))
        );
      }

      const results = await Promise.all(promises);

      const endTime = Date.now();
      const endMem = process.memoryUsage();

      const successful = results.filter(r => r.success).length;

      return {
        totalFiles: fileCount,
        successful,
        failed: fileCount - successful,
        totalTime: endTime - startTime,
        totalMemory: endMem.heapUsed - startMem.heapUsed,
        avgTimePerFile: (endTime - startTime) / fileCount,
        results
      };
    }
  );

  t.ok(concurrentLargeFiles.successful > 0, 'Some concurrent large files were processed');

  // Test 6: Progressive loading with backpressure
  const progressiveLoading = await performanceTracker.measureAsync(
    'progressive-loading-backpressure',
    async () => {
      const totalSize = 200 * 1024 * 1024; // 200MB
      const chunkSize = 10 * 1024 * 1024; // 10MB chunks

      const results = {
        chunksProcessed: 0,
        backpressureEvents: 0,
        memoryPeaks: [],
        processingTimes: []
      };

      try {
        for (let offset = 0; offset < totalSize; offset += chunkSize) {
          const chunkData = generateInvoiceChunk(offset, Math.min(chunkSize, totalSize - offset));

          const chunkStart = Date.now();
          const memBefore = process.memoryUsage();

          // Check for backpressure
          if (memBefore.heapUsed > 300 * 1024 * 1024) {
            results.backpressureEvents++;

            // Wait for memory to reduce
            if (global.gc) {
              global.gc();
            }
            await new Promise(resolve => setTimeout(resolve, 100));
          }

          await einvoice.processChunk(chunkData, {
            isFirst: offset === 0,
            isLast: offset + chunkSize >= totalSize
          });

          const chunkEnd = Date.now();
          const memAfter = process.memoryUsage();

          results.chunksProcessed++;
          results.processingTimes.push(chunkEnd - chunkStart);
          results.memoryPeaks.push(memAfter.heapUsed);
        }

        return {
          success: true,
          ...results,
          avgProcessingTime: results.processingTimes.reduce((a, b) => a + b, 0) / results.processingTimes.length,
          maxMemoryPeak: Math.max(...results.memoryPeaks)
        };
      } catch (error) {
        return {
          success: false,
          error: error.message,
          ...results
        };
      }
    }
  );

  t.ok(progressiveLoading.chunksProcessed > 0, 'Progressive loading processed chunks');
  t.ok(progressiveLoading.backpressureEvents >= 0, 'Backpressure was handled');

  // Test 7: Large attachment handling
  const largeAttachments = await performanceTracker.measureAsync(
    'large-attachment-handling',
    async () => {
      const attachmentSizes = [
        { size: 10 * 1024 * 1024, name: '10MB' },
        { size: 50 * 1024 * 1024, name: '50MB' },
        { size: 100 * 1024 * 1024, name: '100MB' }
      ];

      const results = [];

      for (const attachment of attachmentSizes) {
        try {
          // Create PDF with large attachment
          const largePDF = createPDFWithAttachment(attachment.size);

          const startTime = Date.now();
          const startMem = process.memoryUsage();

          const extracted = await einvoice.extractFromPDF(largePDF, {
            streamAttachments: true
          });

          const endTime = Date.now();
          const endMem = process.memoryUsage();

          results.push({
            size: attachment.name,
            success: true,
            hasAttachment: !!extracted?.attachments?.length,
            timeTaken: endTime - startTime,
            memoryUsed: endMem.heapUsed - startMem.heapUsed
          });
        } catch (error) {
          results.push({
            size: attachment.name,
            success: false,
            error: error.message
          });
        }
      }

      return results;
    }
  );

  largeAttachments.forEach(result => {
    t.ok(result.success || result.error, `${result.size} attachment was processed`);
  });

  // Test 8: Format conversion of large files
  const largeFormatConversion = await performanceTracker.measureAsync(
    'large-format-conversion',
    async () => {
      const testSizes = [10, 50]; // MB
      const results = [];

      for (const sizeMB of testSizes) {
        const size = sizeMB * 1024 * 1024;
        const largeUBL = generateLargeUBLInvoice(size);

        try {
          const startTime = Date.now();
          const startMem = process.memoryUsage();

          const converted = await einvoice.convertFormat(largeUBL, 'cii', {
            streaming: true
          });

          const endTime = Date.now();
          const endMem = process.memoryUsage();

          results.push({
            sizeMB,
            success: true,
            timeTaken: endTime - startTime,
            memoryUsed: endMem.heapUsed - startMem.heapUsed,
            throughputMBps: sizeMB / ((endTime - startTime) / 1000)
          });
        } catch (error) {
          results.push({
            sizeMB,
            success: false,
            error: error.message
          });
        }
      }

      return results;
    }
  );

  largeFormatConversion.forEach(result => {
    t.ok(result.success || result.error, `${result.sizeMB}MB conversion completed`);
  });

  // Test 9: Validation of gigabyte files
  const gigabyteValidation = await performanceTracker.measureAsync(
    'gigabyte-file-validation',
    async () => {
      // Simulate validation of 1GB file
      const fileSize = 1024 * 1024 * 1024; // 1GB
      const chunkSize = 50 * 1024 * 1024; // 50MB chunks

      const validationResults = {
        chunksValidated: 0,
        errors: [],
        warnings: [],
        timeTaken: 0
      };

      const startTime = Date.now();

      try {
        const totalChunks = Math.ceil(fileSize / chunkSize);

        for (let i = 0; i < totalChunks; i++) {
          // Simulate chunk validation
          const chunkValidation = await einvoice.validateChunk({
            chunkIndex: i,
            totalChunks,
            size: Math.min(chunkSize, fileSize - i * chunkSize)
          });

          validationResults.chunksValidated++;

          if (chunkValidation?.errors) {
            validationResults.errors.push(...chunkValidation.errors);
          }
          if (chunkValidation?.warnings) {
            validationResults.warnings.push(...chunkValidation.warnings);
          }

          // Simulate memory pressure
          if (i % 5 === 0 && global.gc) {
            global.gc();
          }
        }

        validationResults.timeTaken = Date.now() - startTime;

        return {
          success: true,
          ...validationResults,
          throughputMBps: (fileSize / (1024 * 1024)) / (validationResults.timeTaken / 1000)
        };
      } catch (error) {
        return {
          success: false,
          error: error.message,
          ...validationResults
        };
      }
    }
  );

  t.ok(gigabyteValidation.chunksValidated > 0, 'Gigabyte file validation progressed');

  // Test 10: Recovery after large file processing
  const largeFileRecovery = await performanceTracker.measureAsync(
    'large-file-recovery',
    async () => {
      const results = {
        largeFileProcessed: false,
        memoryRecovered: false,
        normalFileAfter: false
      };

      // Get baseline memory
      if (global.gc) global.gc();
      await new Promise(resolve => setTimeout(resolve, 100));
      const baselineMemory = process.memoryUsage().heapUsed;

      // Process large file
      try {
        const largeXML = generateLargeInvoice(100 * 1024 * 1024); // 100MB
        await einvoice.parseDocument(largeXML);
        results.largeFileProcessed = true;
      } catch (error) {
        // Expected for very large files
      }

      // Force cleanup
      if (global.gc) global.gc();
      await new Promise(resolve => setTimeout(resolve, 100));

      const afterCleanupMemory = process.memoryUsage().heapUsed;
      results.memoryRecovered = afterCleanupMemory < baselineMemory + 50 * 1024 * 1024; // Within 50MB

      // Try normal file
      try {
        const normalXML = '<?xml version="1.0"?><Invoice><ID>NORMAL</ID></Invoice>';
        await einvoice.parseDocument(normalXML);
        results.normalFileAfter = true;
      } catch (error) {
        // Should not happen
      }

      return results;
    }
  );

  t.ok(largeFileRecovery.memoryRecovered, 'Memory was recovered after large file');
  t.ok(largeFileRecovery.normalFileAfter, 'Normal processing works after large file');

  // Print performance summary
  performanceTracker.printSummary();
});

// Helper function to generate large invoice
function generateLargeInvoice(targetSize: number): string {
  let xml = '<?xml version="1.0" encoding="UTF-8"?><Invoice><Items>';

  const itemTemplate = '<Item><ID>XXX</ID><Description>Test item description that contains some text</Description><Amount>100.00</Amount></Item>';
  const itemSize = itemTemplate.length;
  const itemCount = Math.floor(targetSize / itemSize);

  for (let i = 0; i < itemCount; i++) {
    xml += itemTemplate.replace('XXX', i.toString());
  }

  xml += '</Items></Invoice>';
  return xml;
}

// Helper function to generate invoice chunk
function generateInvoiceChunk(offset: number, size: number): any {
  return {
    offset,
    size,
    data: Buffer.alloc(size, 'A')
  };
}

// Helper function to create PDF with attachment
function createPDFWithAttachment(attachmentSize: number): Buffer {
  // Simplified mock - in reality would create actual PDF
  return Buffer.alloc(attachmentSize + 1024, 'P');
}

// Helper function to generate large UBL invoice
function generateLargeUBLInvoice(size: number): string {
  let xml = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
  <ID>LARGE-UBL-001</ID>
  <IssueDate>2024-01-01</IssueDate>
  <InvoiceLines>`;

  const lineTemplate = `<InvoiceLine><ID>X</ID><InvoicedQuantity>1</InvoicedQuantity><LineExtensionAmount>100</LineExtensionAmount></InvoiceLine>`;
  const lineSize = lineTemplate.length;
  const lineCount = Math.floor(size / lineSize);

  for (let i = 0; i < lineCount; i++) {
    xml += lineTemplate.replace('X', i.toString());
  }

  xml += '</InvoiceLines></Invoice>';
  return xml;
}

// Run the test
tap.start();