import { tap, expect } from '@git.zone/tstest/tapbundle'; import { EInvoice } from '../../../ts/index.js'; import { PDFExtractor } from '../../../ts/formats/pdf/pdf.extractor.js'; tap.test('EDGE-09: Corrupted ZIP Containers - should handle corrupted ZIP/container files gracefully', async () => { console.log('Testing corrupted ZIP container handling...\n'); // Test 1: Invalid PDF headers const testInvalidPdfHeaders = async () => { const corruptHeaders = [ { name: 'wrong-magic-bytes', data: Buffer.from('NOTAPDF\x00\x00\x00\x00'), description: 'Invalid PDF signature' }, { name: 'truncated-header', data: Buffer.from('PK\x03'), description: 'ZIP-like header (not PDF)' }, { name: 'empty-file', data: Buffer.from(''), description: 'Empty file' } ]; const results = []; for (const corrupt of corruptHeaders) { try { const extractor = new PDFExtractor(); const result = await extractor.extractXml(corrupt.data); results.push({ name: corrupt.name, handled: true, success: result.success, error: result.error?.message }); } catch (error) { results.push({ name: corrupt.name, handled: false, error: error.message }); } } return results; }; const invalidHeaderResults = await testInvalidPdfHeaders(); console.log('Test 1 - Invalid PDF headers:'); invalidHeaderResults.forEach(result => { console.log(` ${result.name}: ${result.handled ? 'Handled gracefully' : 'Threw exception'}`); if (result.error) { console.log(` Error: ${result.error.substring(0, 50)}...`); } }); // All should be handled gracefully (no exceptions) expect(invalidHeaderResults.every(r => r.handled)).toEqual(true); // Test 2: Corrupted PDF structure const testCorruptedPdfStructure = async () => { const corruptedPdfs = [ { name: 'pdf-header-only', data: Buffer.from('%PDF-1.4\n'), description: 'PDF header without content' }, { name: 'incomplete-pdf', data: Buffer.from('%PDF-1.4\n1 0 obj\n<< /Type /Catalog >>\nendobj\n'), description: 'PDF without xref table' }, { name: 'mixed-binary', data: Buffer.concat([ Buffer.from('%PDF-1.4\n'), Buffer.from([0xFF, 0xFE, 0xFD, 0xFC]), Buffer.from('\nendobj\n') ]), description: 'PDF with binary garbage' } ]; const results = []; for (const pdf of corruptedPdfs) { try { const einvoice = await EInvoice.fromPdf(pdf.data); results.push({ name: pdf.name, loaded: true, hasFormat: einvoice.getFormat() !== 'unknown' }); } catch (error) { results.push({ name: pdf.name, loaded: false, errorType: error.constructor.name, graceful: !error.message.includes('Cannot read') && !error.message.includes('undefined') && (error.message.includes('PDF') || error.message.includes('XML')) }); } } return results; }; const corruptedPdfResults = await testCorruptedPdfStructure(); console.log('\nTest 2 - Corrupted PDF structure:'); corruptedPdfResults.forEach(result => { console.log(` ${result.name}: ${result.loaded ? 'Loaded' : 'Failed'} ${result.graceful ? '[Graceful]' : ''}`); }); // All should fail gracefully expect(corruptedPdfResults.every(r => !r.loaded && r.graceful)).toEqual(true); // Test 3: Non-PDF files masquerading as PDFs const testNonPdfFiles = async () => { const nonPdfFiles = [ { name: 'xml-file', data: Buffer.from('TEST-001'), description: 'Plain XML file' }, { name: 'json-file', data: Buffer.from('{"invoice": {"id": "TEST-001", "amount": 100}}'), description: 'JSON file' }, { name: 'html-file', data: Buffer.from('

Invoice

'), description: 'HTML file' } ]; const results = []; for (const file of nonPdfFiles) { try { const einvoice = await EInvoice.fromPdf(file.data); results.push({ name: file.name, processed: true, format: einvoice.getFormat() }); } catch (error) { results.push({ name: file.name, processed: false, errorClear: error.message.includes('PDF') || error.message.includes('No XML found') || error.message.includes('Invalid') }); } } return results; }; const nonPdfResults = await testNonPdfFiles(); console.log('\nTest 3 - Non-PDF files:'); nonPdfResults.forEach(result => { console.log(` ${result.name}: ${result.processed ? `Processed (${result.format})` : 'Rejected'} ${result.errorClear ? '[Clear error]' : ''}`); }); // All should be rejected with clear errors expect(nonPdfResults.every(r => !r.processed && r.errorClear)).toEqual(true); // Test 4: Edge case sizes const testEdgeCaseSizes = async () => { const sizes = [ { size: 0, name: 'empty' }, { size: 1, name: '1-byte' }, { size: 10, name: '10-bytes' }, { size: 1024, name: '1KB' } ]; const results = []; for (const { size, name } of sizes) { const data = Buffer.alloc(size); if (size > 0) { // Add partial PDF header if there's space const header = '%PDF-1.4'; data.write(header.substring(0, Math.min(size, header.length)), 0); } try { const extractor = new PDFExtractor(); const result = await extractor.extractXml(data); results.push({ size: name, handled: true, hasError: !!result.error }); } catch (error) { results.push({ size: name, handled: false, error: error.message }); } } return results; }; const sizeResults = await testEdgeCaseSizes(); console.log('\nTest 4 - Edge case sizes:'); sizeResults.forEach(result => { console.log(` ${result.size}: ${result.handled ? 'Handled' : 'Exception'} ${result.hasError ? '[Expected error]' : ''}`); }); // All should be handled without throwing expect(sizeResults.every(r => r.handled)).toEqual(true); // Test 5: Partial PDF with embedded XML (recovery test) const testPartialPdfRecovery = async () => { // Create a partial PDF that might contain XML const partialPdfWithXml = Buffer.concat([ Buffer.from('%PDF-1.4\n'), Buffer.from('1 0 obj\n<<\n/Type /EmbeddedFile\n/Subtype /text#2Fxml\n>>\nstream\n'), Buffer.from('\n\n'), Buffer.from(' \n PARTIAL-001\n \n'), Buffer.from('\n'), Buffer.from('endstream\nendobj\n') // Intentionally incomplete - missing xref and trailer ]); try { const extractor = new PDFExtractor(); const result = await extractor.extractXml(partialPdfWithXml); return { extracted: result.success, hasXml: !!result.xml, xmlValid: result.xml ? result.xml.includes('PARTIAL-001') : false, errorType: result.error?.type }; } catch (error) { return { extracted: false, exception: true, error: error.message }; } }; const recoveryResult = await testPartialPdfRecovery(); console.log('\nTest 5 - Partial PDF recovery:'); console.log(` Extraction: ${recoveryResult.extracted ? 'Success' : 'Failed'}`); console.log(` Has XML: ${recoveryResult.hasXml || false}`); console.log(` Exception: ${recoveryResult.exception || false}`); // Should handle gracefully even if extraction fails expect(!recoveryResult.exception).toEqual(true); console.log('\n✓ All corrupted ZIP/PDF edge cases handled appropriately'); }); tap.start();