import { tap, expect } from '@git.zone/tstest/tapbundle';
import { EInvoice } from '../../../ts/index.js';
import { PDFExtractor } from '../../../ts/formats/pdf/pdf.extractor.js';
tap.test('EDGE-09: Corrupted ZIP Containers - should handle corrupted ZIP/container files gracefully', async () => {
console.log('Testing corrupted ZIP container handling...\n');
// Test 1: Invalid PDF headers
const testInvalidPdfHeaders = async () => {
const corruptHeaders = [
{
name: 'wrong-magic-bytes',
data: Buffer.from('NOTAPDF\x00\x00\x00\x00'),
description: 'Invalid PDF signature'
},
{
name: 'truncated-header',
data: Buffer.from('PK\x03'),
description: 'ZIP-like header (not PDF)'
},
{
name: 'empty-file',
data: Buffer.from(''),
description: 'Empty file'
}
];
const results = [];
for (const corrupt of corruptHeaders) {
try {
const extractor = new PDFExtractor();
const result = await extractor.extractXml(corrupt.data);
results.push({
name: corrupt.name,
handled: true,
success: result.success,
error: result.error?.message
});
} catch (error) {
results.push({
name: corrupt.name,
handled: false,
error: error.message
});
}
}
return results;
};
const invalidHeaderResults = await testInvalidPdfHeaders();
console.log('Test 1 - Invalid PDF headers:');
invalidHeaderResults.forEach(result => {
console.log(` ${result.name}: ${result.handled ? 'Handled gracefully' : 'Threw exception'}`);
if (result.error) {
console.log(` Error: ${result.error.substring(0, 50)}...`);
}
});
// All should be handled gracefully (no exceptions)
expect(invalidHeaderResults.every(r => r.handled)).toEqual(true);
// Test 2: Corrupted PDF structure
const testCorruptedPdfStructure = async () => {
const corruptedPdfs = [
{
name: 'pdf-header-only',
data: Buffer.from('%PDF-1.4\n'),
description: 'PDF header without content'
},
{
name: 'incomplete-pdf',
data: Buffer.from('%PDF-1.4\n1 0 obj\n<< /Type /Catalog >>\nendobj\n'),
description: 'PDF without xref table'
},
{
name: 'mixed-binary',
data: Buffer.concat([
Buffer.from('%PDF-1.4\n'),
Buffer.from([0xFF, 0xFE, 0xFD, 0xFC]),
Buffer.from('\nendobj\n')
]),
description: 'PDF with binary garbage'
}
];
const results = [];
for (const pdf of corruptedPdfs) {
try {
const einvoice = await EInvoice.fromPdf(pdf.data);
results.push({
name: pdf.name,
loaded: true,
hasFormat: einvoice.getFormat() !== 'unknown'
});
} catch (error) {
results.push({
name: pdf.name,
loaded: false,
errorType: error.constructor.name,
graceful: !error.message.includes('Cannot read') &&
!error.message.includes('undefined') &&
(error.message.includes('PDF') || error.message.includes('XML'))
});
}
}
return results;
};
const corruptedPdfResults = await testCorruptedPdfStructure();
console.log('\nTest 2 - Corrupted PDF structure:');
corruptedPdfResults.forEach(result => {
console.log(` ${result.name}: ${result.loaded ? 'Loaded' : 'Failed'} ${result.graceful ? '[Graceful]' : ''}`);
});
// All should fail gracefully
expect(corruptedPdfResults.every(r => !r.loaded && r.graceful)).toEqual(true);
// Test 3: Non-PDF files masquerading as PDFs
const testNonPdfFiles = async () => {
const nonPdfFiles = [
{
name: 'xml-file',
data: Buffer.from('