einvoice/test/suite/einvoice_edge-cases/test.edge-09.corrupted-zip.ts
2025-05-26 04:04:51 +00:00

804 lines
23 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { PerformanceTracker } from '../performance.tracker.js';
const performanceTracker = new PerformanceTracker('EDGE-09: Corrupted ZIP Containers');
tap.test('EDGE-09: Corrupted ZIP Containers - should handle corrupted ZIP/container files', async (t) => {
const einvoice = new EInvoice();
// Test 1: Invalid ZIP headers
const invalidZipHeaders = await performanceTracker.measureAsync(
'invalid-zip-headers',
async () => {
const corruptHeaders = [
{
name: 'wrong-magic-bytes',
data: Buffer.from('NOTAZIP\x00\x00\x00\x00'),
description: 'Invalid ZIP signature'
},
{
name: 'partial-header',
data: Buffer.from('PK\x03'),
description: 'Incomplete ZIP header'
},
{
name: 'corrupted-local-header',
data: Buffer.concat([
Buffer.from('PK\x03\x04'), // Local file header signature
Buffer.from([0xFF, 0xFF, 0xFF, 0xFF]), // Corrupted version/flags
Buffer.alloc(20, 0) // Rest of header
]),
description: 'Corrupted local file header'
},
{
name: 'missing-central-directory',
data: Buffer.concat([
Buffer.from('PK\x03\x04'), // Local file header
Buffer.alloc(26, 0), // Header data
Buffer.from('PK\x07\x08'), // Data descriptor
Buffer.alloc(12, 0), // Descriptor data
// Missing central directory
]),
description: 'Missing central directory'
}
];
const results = [];
for (const corrupt of corruptHeaders) {
try {
const extracted = await einvoice.extractFromContainer(corrupt.data);
results.push({
type: corrupt.name,
recovered: !!extracted,
filesExtracted: extracted?.files?.length || 0,
error: null
});
} catch (error) {
results.push({
type: corrupt.name,
recovered: false,
error: error.message,
isZipError: error.message.toLowerCase().includes('zip') ||
error.message.toLowerCase().includes('archive')
});
}
}
return results;
}
);
invalidZipHeaders.forEach(result => {
t.ok(!result.recovered || result.isZipError,
`Invalid header ${result.type} should fail or be detected`);
});
// Test 2: Truncated ZIP files
const truncatedZipFiles = await performanceTracker.measureAsync(
'truncated-zip-files',
async () => {
// Create a valid ZIP structure and truncate at different points
const validZip = createValidZipStructure();
const truncationPoints = [
{ point: 10, name: 'header-truncated' },
{ point: 50, name: 'file-data-truncated' },
{ point: validZip.length - 50, name: 'directory-truncated' },
{ point: validZip.length - 10, name: 'eocd-truncated' },
{ point: validZip.length - 1, name: 'last-byte-missing' }
];
const results = [];
for (const truncation of truncationPoints) {
const truncated = validZip.slice(0, truncation.point);
try {
const recovery = await einvoice.recoverFromCorruptedZip(truncated, {
attemptPartialRecovery: true
});
results.push({
truncation: truncation.name,
size: truncated.length,
recovered: recovery?.success || false,
filesRecovered: recovery?.recoveredFiles || 0,
dataRecovered: recovery?.recoveredBytes || 0
});
} catch (error) {
results.push({
truncation: truncation.name,
size: truncated.length,
recovered: false,
error: error.message
});
}
}
return results;
}
);
truncatedZipFiles.forEach(result => {
t.ok(result.recovered === false || result.filesRecovered < 1,
`Truncated ZIP at ${result.truncation} should have limited recovery`);
});
// Test 3: CRC errors
const crcErrors = await performanceTracker.measureAsync(
'crc-checksum-errors',
async () => {
const scenarios = [
{
name: 'single-bit-flip',
corruption: (data: Buffer) => {
const copy = Buffer.from(data);
// Flip a bit in the compressed data
if (copy.length > 100) {
copy[100] ^= 0x01;
}
return copy;
}
},
{
name: 'data-corruption',
corruption: (data: Buffer) => {
const copy = Buffer.from(data);
// Corrupt a chunk of data
for (let i = 50; i < Math.min(100, copy.length); i++) {
copy[i] = 0xFF;
}
return copy;
}
},
{
name: 'wrong-crc-stored',
corruption: (data: Buffer) => {
const copy = Buffer.from(data);
// Find and corrupt CRC values
const crcOffset = findCRCOffset(copy);
if (crcOffset > 0) {
copy.writeUInt32LE(0xDEADBEEF, crcOffset);
}
return copy;
}
}
];
const results = [];
for (const scenario of scenarios) {
const validZip = createZipWithInvoice();
const corrupted = scenario.corruption(validZip);
try {
const extraction = await einvoice.extractFromContainer(corrupted, {
ignoreCRCErrors: false
});
results.push({
scenario: scenario.name,
extracted: true,
crcValidated: extraction?.crcValid || false,
dataIntegrity: extraction?.integrityCheck || 'unknown'
});
} catch (error) {
results.push({
scenario: scenario.name,
extracted: false,
error: error.message,
isCRCError: error.message.toLowerCase().includes('crc') ||
error.message.toLowerCase().includes('checksum')
});
}
}
return results;
}
);
crcErrors.forEach(result => {
t.ok(!result.extracted || !result.crcValidated || result.isCRCError,
`CRC error ${result.scenario} should be detected`);
});
// Test 4: Compression method issues
const compressionMethodIssues = await performanceTracker.measureAsync(
'compression-method-issues',
async () => {
const compressionTests = [
{
name: 'unsupported-method',
method: 99, // Invalid compression method
description: 'Unknown compression algorithm'
},
{
name: 'store-but-compressed',
method: 0, // Store (no compression)
compressed: true,
description: 'Stored method but data is compressed'
},
{
name: 'deflate-corrupted',
method: 8, // Deflate
corrupted: true,
description: 'Deflate stream corrupted'
},
{
name: 'bzip2-in-zip',
method: 12, // Bzip2 (not standard in ZIP)
description: 'Non-standard compression method'
}
];
const results = [];
for (const test of compressionTests) {
const zipData = createZipWithCompressionMethod(test.method, test);
try {
const extracted = await einvoice.extractFromContainer(zipData);
results.push({
test: test.name,
method: test.method,
extracted: true,
filesFound: extracted?.files?.length || 0,
decompressed: extracted?.decompressed || false
});
} catch (error) {
results.push({
test: test.name,
method: test.method,
extracted: false,
error: error.message,
isCompressionError: error.message.includes('compress') ||
error.message.includes('method')
});
}
}
return results;
}
);
compressionMethodIssues.forEach(result => {
if (result.method === 0 || result.method === 8) {
t.ok(result.extracted || result.isCompressionError,
`Standard compression ${result.test} should be handled`);
} else {
t.notOk(result.extracted,
`Non-standard compression ${result.test} should fail`);
}
});
// Test 5: Nested/recursive ZIP bombs
const nestedZipBombs = await performanceTracker.measureAsync(
'nested-zip-bombs',
async () => {
const bombTypes = [
{
name: 'deep-nesting',
depth: 10,
description: 'ZIP within ZIP, 10 levels deep'
},
{
name: 'exponential-expansion',
copies: 10,
description: 'Each level contains 10 copies'
},
{
name: 'circular-reference',
circular: true,
description: 'ZIP contains itself'
},
{
name: 'compression-ratio-bomb',
ratio: 1000,
description: 'Extreme compression ratio'
}
];
const results = [];
for (const bomb of bombTypes) {
const bombZip = createZipBomb(bomb);
const startTime = Date.now();
const startMemory = process.memoryUsage();
try {
const extraction = await einvoice.extractFromContainer(bombZip, {
maxDepth: 5,
maxExpandedSize: 100 * 1024 * 1024, // 100MB limit
maxFiles: 1000
});
const endTime = Date.now();
const endMemory = process.memoryUsage();
results.push({
type: bomb.name,
handled: true,
timeTaken: endTime - startTime,
memoryUsed: endMemory.heapUsed - startMemory.heapUsed,
depthReached: extraction?.maxDepth || 0,
stopped: extraction?.limitReached || false
});
} catch (error) {
results.push({
type: bomb.name,
handled: true,
prevented: true,
error: error.message,
isBombDetected: error.message.includes('bomb') ||
error.message.includes('depth') ||
error.message.includes('limit')
});
}
}
return results;
}
);
nestedZipBombs.forEach(result => {
t.ok(result.prevented || result.stopped,
`ZIP bomb ${result.type} should be prevented or limited`);
});
// Test 6: Character encoding in filenames
const filenameEncodingIssues = await performanceTracker.measureAsync(
'filename-encoding-issues',
async () => {
const encodingTests = [
{
name: 'utf8-bom-filename',
filename: '\uFEFFファイル.xml',
encoding: 'utf8'
},
{
name: 'cp437-extended',
filename: 'Ñoño_español.xml',
encoding: 'cp437'
},
{
name: 'mixed-encoding',
filename: 'Test_文件_файл.xml',
encoding: 'mixed'
},
{
name: 'null-bytes',
filename: 'file\x00.xml',
encoding: 'binary'
},
{
name: 'path-traversal',
filename: '../../../etc/passwd',
encoding: 'ascii'
}
];
const results = [];
for (const test of encodingTests) {
const zipData = createZipWithFilename(test.filename, test.encoding);
try {
const extracted = await einvoice.extractFromContainer(zipData);
const files = extracted?.files || [];
results.push({
test: test.name,
extracted: true,
fileCount: files.length,
filenamePreserved: files.some(f => f.name === test.filename),
filenameNormalized: files[0]?.name || null,
securityCheck: !files.some(f => f.name.includes('..'))
});
} catch (error) {
results.push({
test: test.name,
extracted: false,
error: error.message
});
}
}
return results;
}
);
filenameEncodingIssues.forEach(result => {
t.ok(result.securityCheck,
`Filename ${result.test} should pass security checks`);
});
// Test 7: Factur-X/ZUGFeRD specific corruptions
const facturXCorruptions = await performanceTracker.measureAsync(
'facturx-zugferd-corruptions',
async () => {
const corruptionTypes = [
{
name: 'missing-metadata',
description: 'PDF/A-3 without required metadata'
},
{
name: 'wrong-attachment-relationship',
description: 'XML not marked as Alternative'
},
{
name: 'multiple-xml-versions',
description: 'Both Factur-X and ZUGFeRD XML present'
},
{
name: 'corrupted-xml-stream',
description: 'XML attachment stream corrupted'
}
];
const results = [];
for (const corruption of corruptionTypes) {
const corruptedPDF = createCorruptedFacturX(corruption.name);
try {
const extraction = await einvoice.extractFromPDF(corruptedPDF);
results.push({
corruption: corruption.name,
extracted: !!extraction,
hasValidXML: extraction?.xml && isValidXML(extraction.xml),
hasMetadata: !!extraction?.metadata,
conformance: extraction?.conformanceLevel || 'unknown'
});
} catch (error) {
results.push({
corruption: corruption.name,
extracted: false,
error: error.message
});
}
}
return results;
}
);
facturXCorruptions.forEach(result => {
t.ok(result.extracted || result.error,
`Factur-X corruption ${result.corruption} was handled`);
});
// Test 8: Recovery strategies
const recoveryStrategies = await performanceTracker.measureAsync(
'zip-recovery-strategies',
async () => {
const strategies = [
{
name: 'scan-for-headers',
description: 'Scan for local file headers'
},
{
name: 'reconstruct-central-dir',
description: 'Rebuild central directory'
},
{
name: 'raw-deflate-extraction',
description: 'Extract raw deflate streams'
},
{
name: 'pattern-matching',
description: 'Find XML by pattern matching'
}
];
const corruptedZip = createSeverelyCorruptedZip();
const results = [];
for (const strategy of strategies) {
try {
const recovered = await einvoice.attemptZipRecovery(corruptedZip, {
strategy: strategy.name
});
results.push({
strategy: strategy.name,
success: recovered?.success || false,
filesRecovered: recovered?.files?.length || 0,
xmlFound: recovered?.files?.some(f => f.name.endsWith('.xml')) || false,
confidence: recovered?.confidence || 0
});
} catch (error) {
results.push({
strategy: strategy.name,
success: false,
error: error.message
});
}
}
return results;
}
);
recoveryStrategies.forEach(result => {
t.ok(result.success || result.error,
`Recovery strategy ${result.strategy} was attempted`);
});
// Test 9: Multi-part archive issues
const multiPartArchiveIssues = await performanceTracker.measureAsync(
'multi-part-archive-issues',
async () => {
const multiPartTests = [
{
name: 'missing-parts',
parts: ['part1.zip', null, 'part3.zip'],
description: 'Missing middle part'
},
{
name: 'wrong-order',
parts: ['part3.zip', 'part1.zip', 'part2.zip'],
description: 'Parts in wrong order'
},
{
name: 'mixed-formats',
parts: ['part1.zip', 'part2.rar', 'part3.zip'],
description: 'Different archive formats'
},
{
name: 'size-mismatch',
parts: createMismatchedParts(),
description: 'Part sizes do not match'
}
];
const results = [];
for (const test of multiPartTests) {
try {
const assembled = await einvoice.assembleMultiPartArchive(test.parts);
const extracted = await einvoice.extractFromContainer(assembled);
results.push({
test: test.name,
assembled: true,
extracted: !!extracted,
complete: extracted?.isComplete || false
});
} catch (error) {
results.push({
test: test.name,
assembled: false,
error: error.message
});
}
}
return results;
}
);
multiPartArchiveIssues.forEach(result => {
t.ok(!result.assembled || !result.complete,
`Multi-part issue ${result.test} should cause problems`);
});
// Test 10: Performance with corrupted files
const corruptedPerformance = await performanceTracker.measureAsync(
'corrupted-file-performance',
async () => {
const sizes = [
{ size: 1024, name: '1KB' },
{ size: 1024 * 1024, name: '1MB' },
{ size: 10 * 1024 * 1024, name: '10MB' }
];
const results = [];
for (const sizeTest of sizes) {
// Create corrupted file of specific size
const corrupted = createCorruptedZipOfSize(sizeTest.size);
const startTime = Date.now();
const timeout = 10000; // 10 second timeout
try {
const extractPromise = einvoice.extractFromContainer(corrupted);
const timeoutPromise = new Promise((_, reject) =>
setTimeout(() => reject(new Error('Timeout')), timeout)
);
await Promise.race([extractPromise, timeoutPromise]);
const timeTaken = Date.now() - startTime;
results.push({
size: sizeTest.name,
completed: true,
timeTaken,
timedOut: false
});
} catch (error) {
const timeTaken = Date.now() - startTime;
results.push({
size: sizeTest.name,
completed: false,
timeTaken,
timedOut: error.message === 'Timeout',
error: error.message
});
}
}
return results;
}
);
corruptedPerformance.forEach(result => {
t.ok(!result.timedOut,
`Corrupted file ${result.size} should not cause timeout`);
});
// Print performance summary
performanceTracker.printSummary();
});
// Helper functions
function createValidZipStructure(): Buffer {
// Simplified ZIP structure
const parts = [];
// Local file header
parts.push(Buffer.from('PK\x03\x04')); // Signature
parts.push(Buffer.alloc(26, 0)); // Header fields
parts.push(Buffer.from('test.xml')); // Filename
parts.push(Buffer.from('<Invoice><ID>123</ID></Invoice>')); // File data
// Central directory
parts.push(Buffer.from('PK\x01\x02')); // Signature
parts.push(Buffer.alloc(42, 0)); // Header fields
parts.push(Buffer.from('test.xml')); // Filename
// End of central directory
parts.push(Buffer.from('PK\x05\x06')); // Signature
parts.push(Buffer.alloc(18, 0)); // EOCD fields
return Buffer.concat(parts);
}
function createZipWithInvoice(): Buffer {
// Create a simple ZIP with invoice XML
return createValidZipStructure();
}
function findCRCOffset(data: Buffer): number {
// Find CRC32 field in ZIP structure
const sig = Buffer.from('PK\x03\x04');
const idx = data.indexOf(sig);
if (idx >= 0) {
return idx + 14; // CRC32 offset in local file header
}
return -1;
}
function createZipWithCompressionMethod(method: number, options: any): Buffer {
const parts = [];
// Local file header with specific compression method
parts.push(Buffer.from('PK\x03\x04'));
const header = Buffer.alloc(26, 0);
header.writeUInt16LE(method, 8); // Compression method
parts.push(header);
parts.push(Buffer.from('invoice.xml'));
// Add compressed or uncompressed data based on method
if (options.corrupted) {
parts.push(Buffer.from([0xFF, 0xFE, 0xFD])); // Invalid deflate stream
} else if (method === 0) {
parts.push(Buffer.from('<Invoice/>'));
} else {
parts.push(Buffer.from([0x78, 0x9C])); // Deflate header
parts.push(Buffer.alloc(10, 0)); // Compressed data
}
return Buffer.concat(parts);
}
function createZipBomb(config: any): Buffer {
// Create various types of ZIP bombs
if (config.circular) {
// Create a ZIP that references itself
return Buffer.from('PK...[circular reference]...');
} else if (config.depth) {
// Create nested ZIPs
let zip = Buffer.from('<Invoice/>');
for (let i = 0; i < config.depth; i++) {
zip = wrapInZip(zip, `level${i}.zip`);
}
return zip;
}
return Buffer.from('PK');
}
function wrapInZip(content: Buffer, filename: string): Buffer {
// Wrap content in a ZIP file
return Buffer.concat([
Buffer.from('PK\x03\x04'),
Buffer.alloc(26, 0),
Buffer.from(filename),
content
]);
}
function createZipWithFilename(filename: string, encoding: string): Buffer {
const parts = [];
parts.push(Buffer.from('PK\x03\x04'));
const header = Buffer.alloc(26, 0);
// Set filename length
const filenameBuffer = Buffer.from(filename, encoding === 'binary' ? 'binary' : 'utf8');
header.writeUInt16LE(filenameBuffer.length, 24);
parts.push(header);
parts.push(filenameBuffer);
parts.push(Buffer.from('<Invoice/>'));
return Buffer.concat(parts);
}
function createCorruptedFacturX(type: string): Buffer {
// Create corrupted Factur-X/ZUGFeRD PDFs
const mockPDF = Buffer.from('%PDF-1.4\n...');
return mockPDF;
}
function createSeverelyCorruptedZip(): Buffer {
// Create a severely corrupted ZIP for recovery testing
const data = Buffer.alloc(1024);
data.fill(0xFF);
// Add some ZIP-like signatures at random positions
data.write('PK\x03\x04', 100);
data.write('<Invoice', 200);
data.write('</Invoice>', 300);
return data;
}
function createMismatchedParts(): Buffer[] {
return [
Buffer.alloc(1000, 1),
Buffer.alloc(500, 2),
Buffer.alloc(1500, 3)
];
}
function createCorruptedZipOfSize(size: number): Buffer {
const data = Buffer.alloc(size);
// Fill with random data
for (let i = 0; i < size; i += 4) {
data.writeUInt32LE(Math.random() * 0xFFFFFFFF, i);
}
// Add ZIP signature at start
data.write('PK\x03\x04', 0);
return data;
}
function isValidXML(content: string): boolean {
try {
// Simple XML validation check
return content.includes('<?xml') && content.includes('>');
} catch {
return false;
}
}
// Run the test
tap.start();