This commit is contained in:
2025-05-28 10:15:48 +00:00
parent 32f8bc192a
commit 5928948cfd
4 changed files with 788 additions and 646 deletions

View File

@ -1,10 +1,7 @@
import { tap, expect } from '@git.zone/tstest/tapbundle';
import * as plugins from '../../../ts/plugins.ts';
import { EInvoice } from '../../../ts/classes.xinvoice.ts';
import { CorpusLoader } from '../../helpers/corpus.loader.ts';
import { PerformanceTracker } from '../../helpers/performance.tracker.ts';
const testTimeout = 300000; // 5 minutes timeout for PDF processing
import * as plugins from '../../../ts/plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../../helpers/corpus.loader.js';
// PDF-09: Corrupted PDF Recovery
// Tests recovery mechanisms for corrupted, malformed, or partially damaged PDF files
@ -15,23 +12,23 @@ tap.test('PDF-09: Corrupted PDF Recovery - Truncated PDF Files', async (tools) =
try {
// Get a working PDF from corpus to create corrupted versions
const validPdfs = await CorpusLoader.getFiles('ZUGFERD_V1');
const validPdfs = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT');
if (validPdfs.length === 0) {
tools.log('⚠ No valid PDF files found for corruption testing');
console.log('⚠ No valid PDF files found for corruption testing');
return;
}
const basePdf = validPdfs[0];
const basePdfName = plugins.path.basename(basePdf);
tools.log(`Creating corrupted versions of: ${basePdfName}`);
console.log(`Creating corrupted versions of: ${basePdfName}`);
// Read the original PDF
const originalPdfBuffer = await plugins.fs.readFile(basePdf);
const originalSize = originalPdfBuffer.length;
tools.log(`Original PDF size: ${(originalSize / 1024).toFixed(1)}KB`);
console.log(`Original PDF size: ${(originalSize / 1024).toFixed(1)}KB`);
// Test different levels of truncation
const truncationTests = [
@ -47,44 +44,44 @@ tap.test('PDF-09: Corrupted PDF Recovery - Truncated PDF Files', async (tools) =
const truncatedBuffer = originalPdfBuffer.subarray(0, truncatedSize);
const truncatedPath = plugins.path.join(process.cwd(), '.nogit', `truncated-${truncationTest.name.toLowerCase().replace(/\s+/g, '-')}.pdf`);
await plugins.fs.ensureDir(plugins.path.dirname(truncatedPath));
await plugins.fs.mkdir(plugins.path.dirname(truncatedPath), { recursive: true });
await plugins.fs.writeFile(truncatedPath, truncatedBuffer);
tools.log(`Testing ${truncationTest.name} (${(truncatedSize / 1024).toFixed(1)}KB)...`);
console.log(`Testing ${truncationTest.name} (${(truncatedSize / 1024).toFixed(1)}KB)...`);
try {
const invoice = new EInvoice();
const extractionResult = await invoice.fromFile(truncatedPath);
if (extractionResult) {
tools.log(` ✓ Unexpected success - managed to extract from ${truncationTest.name}`);
console.log(` ✓ Unexpected success - managed to extract from ${truncationTest.name}`);
// Verify extracted content
const xmlContent = await invoice.toXmlString();
const xmlContent = await invoice.toXmlString('ubl');
if (xmlContent && xmlContent.length > 50) {
tools.log(` Extracted XML length: ${xmlContent.length} chars`);
console.log(` Extracted XML length: ${xmlContent.length} chars`);
}
} else {
tools.log(` ✓ Expected failure - no extraction from ${truncationTest.name}`);
console.log(` ✓ Expected failure - no extraction from ${truncationTest.name}`);
}
} catch (extractionError) {
// Expected for corrupted files
tools.log(` ✓ Expected error for ${truncationTest.name}: ${extractionError.message.substring(0, 100)}...`);
console.log(` ✓ Expected error for ${truncationTest.name}: ${extractionError.message.substring(0, 100)}...`);
expect(extractionError.message).toBeTruthy();
}
// Clean up
await plugins.fs.remove(truncatedPath);
await plugins.fs.unlink(truncatedPath);
}
} catch (error) {
tools.log(`Truncated PDF test failed: ${error.message}`);
console.log(`Truncated PDF test failed: ${error.message}`);
throw error;
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('pdf-corrupted-truncated', duration);
console.log(`Test completed in ${duration}ms`);
});
tap.test('PDF-09: Corrupted PDF Recovery - Header Corruption', async (tools) => {
@ -125,10 +122,10 @@ tap.test('PDF-09: Corrupted PDF Recovery - Header Corruption', async (tools) =>
];
for (const headerTest of headerCorruptionTests) {
tools.log(`Testing ${headerTest.name}...`);
console.log(`Testing ${headerTest.name}...`);
const corruptedPath = plugins.path.join(process.cwd(), '.nogit', `header-${headerTest.name.toLowerCase().replace(/\s+/g, '-')}.pdf`);
await plugins.fs.ensureDir(plugins.path.dirname(corruptedPath));
await plugins.fs.mkdir(plugins.path.dirname(corruptedPath), { recursive: true });
try {
// Create corrupted file
@ -139,26 +136,26 @@ tap.test('PDF-09: Corrupted PDF Recovery - Header Corruption', async (tools) =>
if (headerTest.expectedError) {
if (extractionResult) {
tools.log(` ⚠ Expected error for ${headerTest.name} but extraction succeeded`);
console.log(` ⚠ Expected error for ${headerTest.name} but extraction succeeded`);
} else {
tools.log(` ✓ Expected failure - no extraction from ${headerTest.name}`);
console.log(` ✓ Expected failure - no extraction from ${headerTest.name}`);
}
} else {
tools.log(`${headerTest.name}: Extraction succeeded as expected`);
console.log(`${headerTest.name}: Extraction succeeded as expected`);
}
} catch (extractionError) {
if (headerTest.expectedError) {
tools.log(` ✓ Expected error for ${headerTest.name}: ${extractionError.message.substring(0, 80)}...`);
console.log(` ✓ Expected error for ${headerTest.name}: ${extractionError.message.substring(0, 80)}...`);
expect(extractionError.message).toBeTruthy();
} else {
tools.log(` ✗ Unexpected error for ${headerTest.name}: ${extractionError.message}`);
console.log(` ✗ Unexpected error for ${headerTest.name}: ${extractionError.message}`);
throw extractionError;
}
} finally {
// Clean up
try {
await plugins.fs.remove(corruptedPath);
await plugins.fs.unlink(corruptedPath);
} catch (cleanupError) {
// Ignore cleanup errors
}
@ -166,24 +163,24 @@ tap.test('PDF-09: Corrupted PDF Recovery - Header Corruption', async (tools) =>
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('pdf-corrupted-header', duration);
console.log(`Test completed in ${duration}ms`);
});
tap.test('PDF-09: Corrupted PDF Recovery - Random Byte Corruption', async (tools) => {
const startTime = Date.now();
try {
const validPdfs = await CorpusLoader.getFiles('ZUGFERD_V1');
const validPdfs = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT');
if (validPdfs.length === 0) {
tools.log('⚠ No valid PDF files found for random corruption testing');
console.log('⚠ No valid PDF files found for random corruption testing');
return;
}
const basePdf = validPdfs[0];
const originalBuffer = await plugins.fs.readFile(basePdf);
tools.log(`Testing random byte corruption with: ${plugins.path.basename(basePdf)}`);
console.log(`Testing random byte corruption with: ${plugins.path.basename(basePdf)}`);
// Test different levels of random corruption
const corruptionLevels = [
@ -194,7 +191,7 @@ tap.test('PDF-09: Corrupted PDF Recovery - Random Byte Corruption', async (tools
];
for (const corruptionLevel of corruptionLevels) {
tools.log(`Testing ${corruptionLevel.name}...`);
console.log(`Testing ${corruptionLevel.name}...`);
// Create corrupted version
const corruptedBuffer = Buffer.from(originalBuffer);
@ -207,7 +204,7 @@ tap.test('PDF-09: Corrupted PDF Recovery - Random Byte Corruption', async (tools
}
const corruptedPath = plugins.path.join(process.cwd(), '.nogit', `random-${corruptionLevel.name.toLowerCase().replace(/\s+/g, '-')}.pdf`);
await plugins.fs.ensureDir(plugins.path.dirname(corruptedPath));
await plugins.fs.mkdir(plugins.path.dirname(corruptedPath), { recursive: true });
await plugins.fs.writeFile(corruptedPath, corruptedBuffer);
try {
@ -215,29 +212,29 @@ tap.test('PDF-09: Corrupted PDF Recovery - Random Byte Corruption', async (tools
const extractionResult = await invoice.fromFile(corruptedPath);
if (extractionResult) {
tools.log(` ✓ Resilient recovery from ${corruptionLevel.name}`);
console.log(` ✓ Resilient recovery from ${corruptionLevel.name}`);
// Verify extracted content quality
const xmlContent = await invoice.toXmlString();
const xmlContent = await invoice.toXmlString('ubl');
if (xmlContent && xmlContent.length > 100) {
tools.log(` Extracted ${xmlContent.length} chars of XML`);
console.log(` Extracted ${xmlContent.length} chars of XML`);
// Test if XML is well-formed
try {
// Simple XML validation
if (xmlContent.includes('<?xml') && xmlContent.includes('</')) {
tools.log(` ✓ Extracted XML appears well-formed`);
console.log(` ✓ Extracted XML appears well-formed`);
}
} catch (xmlError) {
tools.log(` ⚠ Extracted XML may be malformed: ${xmlError.message}`);
console.log(` ⚠ Extracted XML may be malformed: ${xmlError.message}`);
}
}
} else {
tools.log(` ⚠ No extraction possible from ${corruptionLevel.name}`);
console.log(` ⚠ No extraction possible from ${corruptionLevel.name}`);
}
} catch (extractionError) {
tools.log(` ⚠ Extraction failed for ${corruptionLevel.name}: ${extractionError.message.substring(0, 80)}...`);
console.log(` ⚠ Extraction failed for ${corruptionLevel.name}: ${extractionError.message.substring(0, 80)}...`);
// Check if error message is helpful
expect(extractionError.message).toBeTruthy();
@ -245,99 +242,99 @@ tap.test('PDF-09: Corrupted PDF Recovery - Random Byte Corruption', async (tools
}
// Clean up
await plugins.fs.remove(corruptedPath);
await plugins.fs.unlink(corruptedPath);
}
} catch (error) {
tools.log(`Random corruption test failed: ${error.message}`);
console.log(`Random corruption test failed: ${error.message}`);
throw error;
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('pdf-corrupted-random', duration);
console.log(`Test completed in ${duration}ms`);
});
tap.test('PDF-09: Corrupted PDF Recovery - Structural Damage', async (tools) => {
const startTime = Date.now();
try {
const validPdfs = await CorpusLoader.getFiles('ZUGFERD_V1');
const validPdfs = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT');
if (validPdfs.length === 0) {
tools.log('⚠ No valid PDF files found for structural damage testing');
console.log('⚠ No valid PDF files found for structural damage testing');
return;
}
const basePdf = validPdfs[0];
const originalContent = await plugins.fs.readFile(basePdf, 'binary');
tools.log(`Testing structural damage with: ${plugins.path.basename(basePdf)}`);
console.log(`Testing structural damage with: ${plugins.path.basename(basePdf)}`);
// Test different types of structural damage
const structuralDamageTests = [
{
name: 'Missing xref table',
damage: (content) => content.replace(/xref\s*\n[\s\S]*?trailer/g, 'damaged-xref')
damage: (content: string) => content.replace(/xref\s*\n[\s\S]*?trailer/g, 'damaged-xref')
},
{
name: 'Corrupted trailer',
damage: (content) => content.replace(/trailer\s*<<[\s\S]*?>>/g, 'damaged-trailer')
damage: (content: string) => content.replace(/trailer\s*<<[\s\S]*?>>/g, 'damaged-trailer')
},
{
name: 'Missing startxref',
damage: (content) => content.replace(/startxref\s*\d+/g, 'damaged-startxref')
damage: (content: string) => content.replace(/startxref\s*\d+/g, 'damaged-startxref')
},
{
name: 'Corrupted PDF objects',
damage: (content) => content.replace(/\d+\s+\d+\s+obj/g, 'XX XX damaged')
damage: (content: string) => content.replace(/\d+\s+\d+\s+obj/g, 'XX XX damaged')
},
{
name: 'Missing EOF marker',
damage: (content) => content.replace(/%%EOF\s*$/, 'CORRUPTED')
damage: (content: string) => content.replace(/%%EOF\s*$/, 'CORRUPTED')
}
];
for (const damageTest of structuralDamageTests) {
tools.log(`Testing ${damageTest.name}...`);
console.log(`Testing ${damageTest.name}...`);
try {
const damagedContent = damageTest.damage(originalContent);
const damagedPath = plugins.path.join(process.cwd(), '.nogit', `structural-${damageTest.name.toLowerCase().replace(/\s+/g, '-')}.pdf`);
await plugins.fs.ensureDir(plugins.path.dirname(damagedPath));
await plugins.fs.mkdir(plugins.path.dirname(damagedPath), { recursive: true });
await plugins.fs.writeFile(damagedPath, damagedContent, 'binary');
const invoice = new EInvoice();
const extractionResult = await invoice.fromFile(damagedPath);
if (extractionResult) {
tools.log(` ✓ Recovered from ${damageTest.name}`);
console.log(` ✓ Recovered from ${damageTest.name}`);
// Test extracted content
const xmlContent = await invoice.toXmlString();
const xmlContent = await invoice.toXmlString('ubl');
if (xmlContent && xmlContent.length > 50) {
tools.log(` Recovered XML content: ${xmlContent.length} chars`);
console.log(` Recovered XML content: ${xmlContent.length} chars`);
}
} else {
tools.log(` ⚠ No recovery possible from ${damageTest.name}`);
console.log(` ⚠ No recovery possible from ${damageTest.name}`);
}
// Clean up
await plugins.fs.remove(damagedPath);
await plugins.fs.unlink(damagedPath);
} catch (extractionError) {
tools.log(`${damageTest.name} extraction failed: ${extractionError.message.substring(0, 80)}...`);
console.log(`${damageTest.name} extraction failed: ${extractionError.message.substring(0, 80)}...`);
expect(extractionError.message).toBeTruthy();
}
}
} catch (error) {
tools.log(`Structural damage test failed: ${error.message}`);
console.log(`Structural damage test failed: ${error.message}`);
throw error;
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('pdf-corrupted-structural', duration);
console.log(`Test completed in ${duration}ms`);
});
tap.test('PDF-09: Corrupted PDF Recovery - Attachment Corruption', async (tools) => {
@ -345,16 +342,16 @@ tap.test('PDF-09: Corrupted PDF Recovery - Attachment Corruption', async (tools)
// Test scenarios where the XML attachment itself is corrupted
try {
const validPdfs = await CorpusLoader.getFiles('ZUGFERD_V1');
const validPdfs = await CorpusLoader.getFiles('ZUGFERD_V1_CORRECT');
if (validPdfs.length === 0) {
tools.log('⚠ No valid PDF files found for attachment corruption testing');
console.log('⚠ No valid PDF files found for attachment corruption testing');
return;
}
const basePdf = validPdfs[0];
tools.log(`Testing attachment corruption scenarios with: ${plugins.path.basename(basePdf)}`);
console.log(`Testing attachment corruption scenarios with: ${plugins.path.basename(basePdf)}`);
// First, try to extract XML from the original file to understand the structure
let originalXml = null;
@ -363,11 +360,11 @@ tap.test('PDF-09: Corrupted PDF Recovery - Attachment Corruption', async (tools)
const originalResult = await originalInvoice.fromFile(basePdf);
if (originalResult) {
originalXml = await originalInvoice.toXmlString();
tools.log(`Original XML length: ${originalXml.length} chars`);
originalXml = await originalInvoice.toXmlString('ubl');
console.log(`Original XML length: ${originalXml.length} chars`);
}
} catch (originalError) {
tools.log(`Could not extract original XML: ${originalError.message}`);
console.log(`Could not extract original XML: ${originalError.message}`);
}
// Test various attachment corruption scenarios
@ -391,7 +388,7 @@ tap.test('PDF-09: Corrupted PDF Recovery - Attachment Corruption', async (tools)
];
for (const attachmentTest of attachmentTests) {
tools.log(`Testing ${attachmentTest.name}: ${attachmentTest.description}`);
console.log(`Testing ${attachmentTest.name}: ${attachmentTest.description}`);
try {
const invoice = new EInvoice();
@ -401,7 +398,7 @@ tap.test('PDF-09: Corrupted PDF Recovery - Attachment Corruption', async (tools)
if (extractionResult) {
// If we got any result, test the robustness of the extraction
const extractedXml = await invoice.toXmlString();
const extractedXml = await invoice.toXmlString('ubl');
if (extractedXml) {
// Test XML integrity
@ -412,24 +409,24 @@ tap.test('PDF-09: Corrupted PDF Recovery - Attachment Corruption', async (tools)
isBalanced: (extractedXml.match(/</g) || []).length === (extractedXml.match(/>/g) || []).length
};
tools.log(` XML Integrity Checks:`);
tools.log(` Has XML Declaration: ${integrityChecks.hasXmlDeclaration}`);
tools.log(` Has Root Element: ${integrityChecks.hasRootElement}`);
tools.log(` Has Closing Tags: ${integrityChecks.hasClosingTags}`);
tools.log(` Tags Balanced: ${integrityChecks.isBalanced}`);
console.log(` XML Integrity Checks:`);
console.log(` Has XML Declaration: ${integrityChecks.hasXmlDeclaration}`);
console.log(` Has Root Element: ${integrityChecks.hasRootElement}`);
console.log(` Has Closing Tags: ${integrityChecks.hasClosingTags}`);
console.log(` Tags Balanced: ${integrityChecks.isBalanced}`);
if (Object.values(integrityChecks).every(check => check === true)) {
tools.log(`${attachmentTest.name}: XML integrity maintained`);
console.log(`${attachmentTest.name}: XML integrity maintained`);
} else {
tools.log(`${attachmentTest.name}: XML integrity issues detected`);
console.log(`${attachmentTest.name}: XML integrity issues detected`);
}
}
} else {
tools.log(`${attachmentTest.name}: No XML extracted`);
console.log(`${attachmentTest.name}: No XML extracted`);
}
} catch (extractionError) {
tools.log(`${attachmentTest.name} extraction failed: ${extractionError.message.substring(0, 80)}...`);
console.log(`${attachmentTest.name} extraction failed: ${extractionError.message.substring(0, 80)}...`);
// Verify error contains useful information
expect(extractionError.message).toBeTruthy();
@ -439,18 +436,18 @@ tap.test('PDF-09: Corrupted PDF Recovery - Attachment Corruption', async (tools)
if (errorMessage.includes('corrupt') ||
errorMessage.includes('malformed') ||
errorMessage.includes('damaged')) {
tools.log(` ✓ Error message indicates corruption: helpful for debugging`);
console.log(` ✓ Error message indicates corruption: helpful for debugging`);
}
}
}
} catch (error) {
tools.log(`Attachment corruption test failed: ${error.message}`);
console.log(`Attachment corruption test failed: ${error.message}`);
throw error;
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('pdf-corrupted-attachment', duration);
console.log(`Test completed in ${duration}ms`);
});
tap.test('PDF-09: Corrupted PDF Recovery - Error Reporting Quality', async (tools) => {
@ -476,10 +473,10 @@ tap.test('PDF-09: Corrupted PDF Recovery - Error Reporting Quality', async (tool
];
for (const errorTest of errorReportingTests) {
tools.log(`Testing error reporting for: ${errorTest.name}`);
console.log(`Testing error reporting for: ${errorTest.name}`);
const corruptedPath = plugins.path.join(process.cwd(), '.nogit', `error-${errorTest.name.toLowerCase().replace(/\s+/g, '-')}.pdf`);
await plugins.fs.ensureDir(plugins.path.dirname(corruptedPath));
await plugins.fs.mkdir(plugins.path.dirname(corruptedPath), { recursive: true });
try {
// Create corrupted file
@ -493,10 +490,10 @@ tap.test('PDF-09: Corrupted PDF Recovery - Error Reporting Quality', async (tool
try {
await invoice.fromFile(corruptedPath);
tools.log(` ⚠ Expected error for ${errorTest.name} but operation succeeded`);
console.log(` ⚠ Expected error for ${errorTest.name} but operation succeeded`);
} catch (extractionError) {
tools.log(` ✓ Error caught for ${errorTest.name}`);
tools.log(` Error message: ${extractionError.message}`);
console.log(` ✓ Error caught for ${errorTest.name}`);
console.log(` Error message: ${extractionError.message}`);
// Analyze error message quality
const errorMessage = extractionError.message.toLowerCase();
@ -510,35 +507,35 @@ tap.test('PDF-09: Corrupted PDF Recovery - Error Reporting Quality', async (tool
errorMessage.includes('corrupt')
};
tools.log(` Message Quality Analysis:`);
tools.log(` Descriptive (>20 chars): ${messageQuality.isDescriptive}`);
tools.log(` Contains file info: ${messageQuality.containsFileInfo}`);
tools.log(` Contains error type: ${messageQuality.containsErrorType}`);
tools.log(` Is actionable: ${messageQuality.isActionable}`);
console.log(` Message Quality Analysis:`);
console.log(` Descriptive (>20 chars): ${messageQuality.isDescriptive}`);
console.log(` Contains file info: ${messageQuality.containsFileInfo}`);
console.log(` Contains error type: ${messageQuality.containsErrorType}`);
console.log(` Is actionable: ${messageQuality.isActionable}`);
// Error message should be helpful
expect(messageQuality.isDescriptive).toBeTrue();
if (messageQuality.containsFileInfo && messageQuality.isActionable) {
tools.log(` ✓ High quality error message`);
console.log(` ✓ High quality error message`);
} else {
tools.log(` ⚠ Error message could be more helpful`);
console.log(` ⚠ Error message could be more helpful`);
}
// Check error object properties
if (extractionError.code) {
tools.log(` Error code: ${extractionError.code}`);
console.log(` Error code: ${extractionError.code}`);
}
if (extractionError.path) {
tools.log(` Error path: ${extractionError.path}`);
console.log(` Error path: ${extractionError.path}`);
}
}
} finally {
// Clean up
try {
await plugins.fs.remove(corruptedPath);
await plugins.fs.unlink(corruptedPath);
} catch (cleanupError) {
// Ignore cleanup errors
}
@ -546,29 +543,13 @@ tap.test('PDF-09: Corrupted PDF Recovery - Error Reporting Quality', async (tool
}
const duration = Date.now() - startTime;
PerformanceTracker.recordMetric('pdf-corrupted-error-reporting', duration);
console.log(`Test completed in ${duration}ms`);
});
tap.test('PDF-09: Performance Summary', async (tools) => {
const operations = [
'pdf-corrupted-truncated',
'pdf-corrupted-header',
'pdf-corrupted-random',
'pdf-corrupted-structural',
'pdf-corrupted-attachment',
'pdf-corrupted-error-reporting'
];
tools.log(`\n=== Corrupted PDF Recovery Performance Summary ===`);
for (const operation of operations) {
const summary = await PerformanceTracker.getSummary(operation);
if (summary) {
tools.log(`${operation}:`);
tools.log(` avg=${summary.average}ms, min=${summary.min}ms, max=${summary.max}ms, p95=${summary.p95}ms`);
}
}
tools.log(`\nCorrupted PDF recovery testing completed.`);
tools.log(`Note: Most corruption tests expect failures - this is normal and indicates proper error handling.`);
});
tap.test('PDF-09: Test Summary', async (tools) => {
console.log(`\n=== Corrupted PDF Recovery Test Summary ===`);
console.log(`\nCorrupted PDF recovery testing completed.`);
console.log(`Note: Most corruption tests expect failures - this is normal and indicates proper error handling.`);
});
tap.start();