einvoice/test/suite/einvoice_edge-cases/test.edge-09.corrupted-zip.ts
Philipp Kunz 784a50bc7f fix(tests): Fixed ENC-01, ENC-02, and ENC-03 encoding tests
- Fixed UTF-8 encoding test (ENC-01) to accept multiple encoding declarations
- Fixed UTF-16 encoding test (ENC-02) by rewriting with correct API usage
- Fixed ISO-8859-1 encoding test (ENC-03) with proper address fields and methods
- All three encoding tests now pass successfully
- Updated edge-cases tests (EDGE-02 through EDGE-07) with new test structure
2025-05-28 13:05:59 +00:00

259 lines
8.2 KiB
TypeScript

import { tap, expect } from '@git.zone/tstest/tapbundle';
import { EInvoice } from '../../../ts/index.js';
import { PDFExtractor } from '../../../ts/formats/pdf/pdf.extractor.js';
tap.test('EDGE-09: Corrupted ZIP Containers - should handle corrupted ZIP/container files gracefully', async () => {
console.log('Testing corrupted ZIP container handling...\n');
// Test 1: Invalid PDF headers
const testInvalidPdfHeaders = async () => {
const corruptHeaders = [
{
name: 'wrong-magic-bytes',
data: Buffer.from('NOTAPDF\x00\x00\x00\x00'),
description: 'Invalid PDF signature'
},
{
name: 'truncated-header',
data: Buffer.from('PK\x03'),
description: 'ZIP-like header (not PDF)'
},
{
name: 'empty-file',
data: Buffer.from(''),
description: 'Empty file'
}
];
const results = [];
for (const corrupt of corruptHeaders) {
try {
const extractor = new PDFExtractor();
const result = await extractor.extractXml(corrupt.data);
results.push({
name: corrupt.name,
handled: true,
success: result.success,
error: result.error?.message
});
} catch (error) {
results.push({
name: corrupt.name,
handled: false,
error: error.message
});
}
}
return results;
};
const invalidHeaderResults = await testInvalidPdfHeaders();
console.log('Test 1 - Invalid PDF headers:');
invalidHeaderResults.forEach(result => {
console.log(` ${result.name}: ${result.handled ? 'Handled gracefully' : 'Threw exception'}`);
if (result.error) {
console.log(` Error: ${result.error.substring(0, 50)}...`);
}
});
// All should be handled gracefully (no exceptions)
expect(invalidHeaderResults.every(r => r.handled)).toEqual(true);
// Test 2: Corrupted PDF structure
const testCorruptedPdfStructure = async () => {
const corruptedPdfs = [
{
name: 'pdf-header-only',
data: Buffer.from('%PDF-1.4\n'),
description: 'PDF header without content'
},
{
name: 'incomplete-pdf',
data: Buffer.from('%PDF-1.4\n1 0 obj\n<< /Type /Catalog >>\nendobj\n'),
description: 'PDF without xref table'
},
{
name: 'mixed-binary',
data: Buffer.concat([
Buffer.from('%PDF-1.4\n'),
Buffer.from([0xFF, 0xFE, 0xFD, 0xFC]),
Buffer.from('\nendobj\n')
]),
description: 'PDF with binary garbage'
}
];
const results = [];
for (const pdf of corruptedPdfs) {
try {
const einvoice = await EInvoice.fromPdf(pdf.data);
results.push({
name: pdf.name,
loaded: true,
hasFormat: einvoice.getFormat() !== 'unknown'
});
} catch (error) {
results.push({
name: pdf.name,
loaded: false,
errorType: error.constructor.name,
graceful: !error.message.includes('Cannot read') &&
!error.message.includes('undefined') &&
(error.message.includes('PDF') || error.message.includes('XML'))
});
}
}
return results;
};
const corruptedPdfResults = await testCorruptedPdfStructure();
console.log('\nTest 2 - Corrupted PDF structure:');
corruptedPdfResults.forEach(result => {
console.log(` ${result.name}: ${result.loaded ? 'Loaded' : 'Failed'} ${result.graceful ? '[Graceful]' : ''}`);
});
// All should fail gracefully
expect(corruptedPdfResults.every(r => !r.loaded && r.graceful)).toEqual(true);
// Test 3: Non-PDF files masquerading as PDFs
const testNonPdfFiles = async () => {
const nonPdfFiles = [
{
name: 'xml-file',
data: Buffer.from('<?xml version="1.0"?><Invoice xmlns="test"><ID>TEST-001</ID></Invoice>'),
description: 'Plain XML file'
},
{
name: 'json-file',
data: Buffer.from('{"invoice": {"id": "TEST-001", "amount": 100}}'),
description: 'JSON file'
},
{
name: 'html-file',
data: Buffer.from('<!DOCTYPE html><html><body><h1>Invoice</h1></body></html>'),
description: 'HTML file'
}
];
const results = [];
for (const file of nonPdfFiles) {
try {
const einvoice = await EInvoice.fromPdf(file.data);
results.push({
name: file.name,
processed: true,
format: einvoice.getFormat()
});
} catch (error) {
results.push({
name: file.name,
processed: false,
errorClear: error.message.includes('PDF') ||
error.message.includes('No XML found') ||
error.message.includes('Invalid')
});
}
}
return results;
};
const nonPdfResults = await testNonPdfFiles();
console.log('\nTest 3 - Non-PDF files:');
nonPdfResults.forEach(result => {
console.log(` ${result.name}: ${result.processed ? `Processed (${result.format})` : 'Rejected'} ${result.errorClear ? '[Clear error]' : ''}`);
});
// All should be rejected with clear errors
expect(nonPdfResults.every(r => !r.processed && r.errorClear)).toEqual(true);
// Test 4: Edge case sizes
const testEdgeCaseSizes = async () => {
const sizes = [
{ size: 0, name: 'empty' },
{ size: 1, name: '1-byte' },
{ size: 10, name: '10-bytes' },
{ size: 1024, name: '1KB' }
];
const results = [];
for (const { size, name } of sizes) {
const data = Buffer.alloc(size);
if (size > 0) {
// Add partial PDF header if there's space
const header = '%PDF-1.4';
data.write(header.substring(0, Math.min(size, header.length)), 0);
}
try {
const extractor = new PDFExtractor();
const result = await extractor.extractXml(data);
results.push({
size: name,
handled: true,
hasError: !!result.error
});
} catch (error) {
results.push({
size: name,
handled: false,
error: error.message
});
}
}
return results;
};
const sizeResults = await testEdgeCaseSizes();
console.log('\nTest 4 - Edge case sizes:');
sizeResults.forEach(result => {
console.log(` ${result.size}: ${result.handled ? 'Handled' : 'Exception'} ${result.hasError ? '[Expected error]' : ''}`);
});
// All should be handled without throwing
expect(sizeResults.every(r => r.handled)).toEqual(true);
// Test 5: Partial PDF with embedded XML (recovery test)
const testPartialPdfRecovery = async () => {
// Create a partial PDF that might contain XML
const partialPdfWithXml = Buffer.concat([
Buffer.from('%PDF-1.4\n'),
Buffer.from('1 0 obj\n<<\n/Type /EmbeddedFile\n/Subtype /text#2Fxml\n>>\nstream\n'),
Buffer.from('<?xml version="1.0"?>\n<rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100">\n'),
Buffer.from(' <rsm:ExchangedDocument>\n <ram:ID>PARTIAL-001</ram:ID>\n </rsm:ExchangedDocument>\n'),
Buffer.from('</rsm:CrossIndustryInvoice>\n'),
Buffer.from('endstream\nendobj\n')
// Intentionally incomplete - missing xref and trailer
]);
try {
const extractor = new PDFExtractor();
const result = await extractor.extractXml(partialPdfWithXml);
return {
extracted: result.success,
hasXml: !!result.xml,
xmlValid: result.xml ? result.xml.includes('PARTIAL-001') : false,
errorType: result.error?.type
};
} catch (error) {
return {
extracted: false,
exception: true,
error: error.message
};
}
};
const recoveryResult = await testPartialPdfRecovery();
console.log('\nTest 5 - Partial PDF recovery:');
console.log(` Extraction: ${recoveryResult.extracted ? 'Success' : 'Failed'}`);
console.log(` Has XML: ${recoveryResult.hasXml || false}`);
console.log(` Exception: ${recoveryResult.exception || false}`);
// Should handle gracefully even if extraction fails
expect(!recoveryResult.exception).toEqual(true);
console.log('\n✓ All corrupted ZIP/PDF edge cases handled appropriately');
});
tap.start();