einvoice/test/suite/einvoice_corpus-validation/test.corp-08.failed-invoices.ts

import { tap, expect } from '@git.zone/tstest/tapbundle';
import { EInvoice } from '../../../ts/index.js';
import { ValidationLevel } from '../../../ts/interfaces/common.js';
import { CorpusLoader } from '../../helpers/corpus.loader.js';
import { PerformanceTracker } from '../../helpers/performance.tracker.js';
import * as path from 'path';
import * as fs from 'fs/promises';

/**
 * Test ID: CORP-08
 * Test Description: Failed Invoice Handling
 * Priority: High
 *
 * This test validates proper error handling and recovery when processing
 * invalid or malformed invoices from the corpus.
 */

tap.test('CORP-08: Failed Invoice Handling - should handle invalid invoices gracefully', async () => {
  // Load failed/invalid test files from various categories
  const failCategories = [
    'ZUGFERD_V1_FAIL',
    'ZUGFERD_V2_FAIL',
    'EN16931_INVALID'
  ];

  const failedFiles: Array<{ path: string; size: number; category: string }> = [];

  // Collect all failed invoice files
  for (const category of failCategories) {
    try {
      const files = await CorpusLoader.loadCategory(category);
      failedFiles.push(...files.map(f => ({ ...f, category })));
    } catch (e) {
      // Category might not exist
      console.log(`Category ${category} not found, skipping...`);
    }
  }

  // Also test some synthetic invalid files
  const syntheticInvalids = [
    {
      name: 'empty.xml',
      content: '',
      expectedError: 'empty'
    },
    {
      name: 'not-xml.xml',
      content: 'This is not XML content',
      expectedError: 'parse'
    },
    {
      name: 'invalid-structure.xml',
      content: '<?xml version="1.0"?><Invoice><Invalid>Structure</Wrong></Invoice>',
      expectedError: 'structure'
    },
    {
      name: 'missing-required.xml',
      content: '<?xml version="1.0"?><Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"></Invoice>',
      expectedError: 'validation'
    },
    {
      name: 'malformed-encoding.xml',
      content: '<?xml version="1.0" encoding="UTF-8"?><Invoice>Ä Invalid UTF-8 bytes</Invoice>',
      expectedError: 'encoding'
    }
  ];

  console.log(`Testing ${failedFiles.length} failed corpus files and ${syntheticInvalids.length} synthetic invalid files`);

  const results = {
    totalFiles: failedFiles.length + syntheticInvalids.length,
    handled: 0,
    unhandled: 0,
    errorTypes: new Map<string, number>(),
    errorMessages: new Map<string, number>(),
    recoveryAttempts: 0,
    partialRecoveries: 0
  };

  // Test corpus failed files
  console.log('\n--- Testing corpus failed files ---');
  if (failedFiles.length > 0) {
    for (const file of failedFiles) {
      try {
        const xmlBuffer = await CorpusLoader.loadFile(file.path);
        const xmlString = xmlBuffer.toString('utf-8');

        const invoice = new EInvoice();
        let error: any = null;
        let stage = 'unknown';

        try {
          // Attempt to parse
          stage = 'parse';
          await invoice.fromXmlString(xmlString);

          // Attempt to validate
          stage = 'validate';
          const validationResult = await invoice.validate(ValidationLevel.BUSINESS);

          if (!validationResult.valid) {
            error = new Error(validationResult.errors?.[0]?.message || 'Validation failed');
            error.type = 'validation';
            error.details = validationResult.errors;
          }
        } catch (e: any) {
          error = e;
          error.type = stage;
        }

        if (error) {
          results.handled++;

          // Categorize error
          const errorType = error.type || 'unknown';
          results.errorTypes.set(errorType, (results.errorTypes.get(errorType) || 0) + 1);

          // Track common error messages
          const errorMsg = error.message.substring(0, 50);
          results.errorMessages.set(errorMsg, (results.errorMessages.get(errorMsg) || 0) + 1);

          console.log(`✓ ${path.basename(file.path)}: Error handled properly (${errorType})`);

          // Test error recovery attempt
          if (errorType === 'parse') {
            results.recoveryAttempts++;

            // Try recovery strategies
            const recovered = await attemptRecovery(xmlString, invoice);
            if (recovered) {
              results.partialRecoveries++;
              console.log(`  - Partial recovery successful`);
            }
          }
        } else {
          // File was expected to fail but didn't
          console.log(`✗ ${path.basename(file.path)}: Expected to fail but succeeded`);
        }

      } catch (unexpectedError: any) {
        results.unhandled++;
        console.log(`✗ ${path.basename(file.path)}: Unhandled error - ${unexpectedError.message}`);
      }
    }
  } else {
    console.log('⚠ No failed files found in corpus - skipping test');
  }

  // Test synthetic invalid files
  console.log('\n--- Testing synthetic invalid files ---');
  for (const invalid of syntheticInvalids) {
    try {
      const invoice = new EInvoice();
      let errorOccurred = false;
      let errorType = '';

      try {
        await invoice.fromXmlString(invalid.content);

        // If parsing succeeded, try validation
        const validationResult = await invoice.validate();
        if (!validationResult.valid) {
          errorOccurred = true;
          errorType = 'validation';
        }
      } catch (error: any) {
        errorOccurred = true;
        errorType = determineErrorType(error);
        results.handled++;

        // Track error type
        results.errorTypes.set(errorType, (results.errorTypes.get(errorType) || 0) + 1);
      }

      if (errorOccurred) {
        console.log(`✓ ${invalid.name}: Correctly failed with ${errorType} error`);

        if (errorType !== invalid.expectedError && invalid.expectedError !== 'any') {
          console.log(`  Note: Expected ${invalid.expectedError} but got ${errorType}`);
        }
      } else {
        console.log(`✗ ${invalid.name}: Should have failed but succeeded`);
      }

    } catch (unexpectedError: any) {
      results.unhandled++;
      console.log(`✗ ${invalid.name}: Unhandled error - ${unexpectedError.message}`);
    }
  }

  // Test error message quality
  console.log('\n--- Testing error message quality ---');
  const testCases = [
    {
      xml: '<Invoice/>',
      check: 'descriptive'
    },
    {
      xml: '<?xml version="1.0"?><Invoice xmlns="bad-namespace"/>',
      check: 'namespace'
    },
    {
      xml: '<?xml version="1.0"?><CrossIndustryInvoice><ExchangedDocument><ID></ID></ExchangedDocument></CrossIndustryInvoice>',
      check: 'required-field'
    }
  ];

  for (const testCase of testCases) {
    try {
      const invoice = new EInvoice();
      await invoice.fromXmlString(testCase.xml);
      const result = await invoice.validate();

      if (!result.valid && result.errors?.length) {
        const error = result.errors[0];

        // Check error message quality
        const hasErrorCode = !!error.code;
        const hasDescription = error.message.length > 20;
        const hasContext = !!error.path || !!error.field;

        if (hasErrorCode && hasDescription) {
          console.log(`✓ Good error message quality for ${testCase.check}`);
          console.log(`  Message: ${error.message.substring(0, 80)}...`);
        } else {
          console.log(`✗ Poor error message quality for ${testCase.check}`);
        }
      }
    } catch (error: any) {
      // Parse errors should also have good messages
      if (error.message && error.message.length > 20) {
        console.log(`✓ Parse error has descriptive message`);
      }
    }
  }

  // Test error recovery mechanisms
  console.log('\n--- Testing error recovery mechanisms ---');
  const recoverableErrors = [
    {
      name: 'missing-closing-tag',
      xml: '<?xml version="1.0"?><Invoice><ID>123</ID>',
      recovery: 'auto-close'
    },
    {
      name: 'encoding-issue',
      xml: '<?xml version="1.0" encoding="ISO-8859-1"?><Invoice><Name>Café</Name></Invoice>',
      recovery: 'encoding-fix'
    },
    {
      name: 'namespace-mismatch',
      xml: '<Invoice xmlns="wrong-namespace"><ID>123</ID></Invoice>',
      recovery: 'namespace-fix'
    }
  ];

  for (const testCase of recoverableErrors) {
    const invoice = new EInvoice();
    const recovered = await attemptRecovery(testCase.xml, invoice);

    if (recovered) {
      console.log(`✓ ${testCase.name}: Recovery successful using ${testCase.recovery}`);
    } else {
      console.log(`  ${testCase.name}: Recovery not implemented`);
    }
  }

  // Summary report
  console.log('\n=== Failed Invoice Handling Summary ===');
  console.log(`Total files tested: ${results.totalFiles}`);
  console.log(`Properly handled: ${results.handled} (${(results.handled/results.totalFiles*100).toFixed(1)}%)`);
  console.log(`Unhandled errors: ${results.unhandled}`);

  console.log('\nError Types Distribution:');
  results.errorTypes.forEach((count, type) => {
    console.log(`  ${type}: ${count} occurrences`);
  });

  console.log('\nCommon Error Messages:');
  const sortedErrors = Array.from(results.errorMessages.entries())
    .sort((a, b) => b[1] - a[1])
    .slice(0, 5);
  sortedErrors.forEach(([msg, count]) => {
    console.log(`  "${msg}...": ${count} times`);
  });

  console.log('\nRecovery Statistics:');
  console.log(`  Recovery attempts: ${results.recoveryAttempts}`);
  console.log(`  Partial recoveries: ${results.partialRecoveries}`);
  console.log(`  Recovery rate: ${results.recoveryAttempts > 0 ?
    (results.partialRecoveries/results.recoveryAttempts*100).toFixed(1) : 0}%`);

  // Success criteria
  const handlingRate = results.handled / results.totalFiles;
  expect(handlingRate).toBeGreaterThan(0.75); // 75% of errors should be handled gracefully

  // No unhandled errors in production
  expect(results.unhandled).toBeLessThan(results.totalFiles * 0.25); // Less than 25% unhandled
});

// Helper function to determine error type
function determineErrorType(error: Error): string {
  const message = error.message.toLowerCase();

  if (message.includes('parse') || message.includes('syntax')) return 'parse';
  if (message.includes('encoding') || message.includes('utf')) return 'encoding';
  if (message.includes('valid')) return 'validation';
  if (message.includes('require') || message.includes('missing')) return 'required-field';
  if (message.includes('namespace')) return 'namespace';
  if (message.includes('empty')) return 'empty';

  return 'unknown';
}

// Helper function to attempt recovery
async function attemptRecovery(xml: string, invoice: EInvoice): Promise<boolean> {
  // Try various recovery strategies

  // 1. Try to fix encoding
  if (xml.includes('encoding=') && !xml.includes('UTF-8')) {
    try {
      const utf8Xml = xml.replace(/encoding="[^"]*"/, 'encoding="UTF-8"');
      await invoice.fromXmlString(utf8Xml);
      return true;
    } catch (e) {
      // Continue to next strategy
    }
  }

  // 2. Try to auto-close tags
  if (!xml.includes('</') && xml.includes('<')) {
    try {
      // Simple auto-close attempt
      const tags = xml.match(/<([^\/>\s]+)/g);
      if (tags) {
        let fixedXml = xml;
        tags.reverse().forEach(tag => {
          const tagName = tag.substring(1);
          if (!fixedXml.includes(`</${tagName}>`)) {
            fixedXml += `</${tagName}>`;
          }
        });
        await invoice.fromXmlString(fixedXml);
        return true;
      }
    } catch (e) {
      // Continue
    }
  }

  // 3. Try namespace fixes
  if (xml.includes('xmlns=')) {
    try {
      // Try with common namespaces
      const namespaces = [
        'urn:oasis:names:specification:ubl:schema:xsd:Invoice-2',
        'urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100'
      ];

      for (const ns of namespaces) {
        const fixedXml = xml.replace(/xmlns="[^"]*"/, `xmlns="${ns}"`);
        try {
          await invoice.fromXmlString(fixedXml);
          return true;
        } catch (e) {
          // Try next namespace
        }
      }
    } catch (e) {
      // Failed
    }
  }

  return false;
}

tap.start();