einvoice/test/suite/einvoice_edge-cases/test.edge-05.zero-byte-pdf.ts
2025-05-26 04:04:51 +00:00

526 lines
16 KiB
TypeScript

import { tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { PerformanceTracker } from '../performance.tracker.js';
const performanceTracker = new PerformanceTracker('EDGE-05: Zero-Byte PDFs');
tap.test('EDGE-05: Zero-Byte PDFs - should handle zero-byte and minimal PDF files', async (t) => {
const einvoice = new EInvoice();
// Test 1: Truly zero-byte PDF
const zeroByteFile = await performanceTracker.measureAsync(
'truly-zero-byte-pdf',
async () => {
const zeroPDF = Buffer.alloc(0);
try {
const result = await einvoice.extractFromPDF(zeroPDF);
return {
handled: true,
hasContent: !!result,
hasXML: result?.xml !== undefined,
hasAttachments: result?.attachments?.length > 0,
error: null,
bufferSize: zeroPDF.length
};
} catch (error) {
return {
handled: true,
hasContent: false,
error: error.message,
errorType: error.constructor.name,
bufferSize: zeroPDF.length
};
}
}
);
t.ok(zeroByteFile.handled, 'Zero-byte PDF was handled');
t.notOk(zeroByteFile.hasContent, 'Zero-byte PDF has no content');
t.equal(zeroByteFile.bufferSize, 0, 'Buffer size is zero');
// Test 2: Minimal PDF structure
const minimalPDFStructure = await performanceTracker.measureAsync(
'minimal-pdf-structure',
async () => {
const minimalPDFs = [
{
name: 'header-only',
content: Buffer.from('%PDF-1.4')
},
{
name: 'header-and-eof',
content: Buffer.from('%PDF-1.4\n%%EOF')
},
{
name: 'empty-catalog',
content: Buffer.from(
'%PDF-1.4\n' +
'1 0 obj\n<< /Type /Catalog >>\nendobj\n' +
'xref\n0 2\n' +
'0000000000 65535 f\n' +
'0000000009 00000 n\n' +
'trailer\n<< /Size 2 /Root 1 0 R >>\n' +
'startxref\n64\n%%EOF'
)
},
{
name: 'single-empty-page',
content: Buffer.from(
'%PDF-1.4\n' +
'1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n' +
'2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n' +
'xref\n0 3\n' +
'0000000000 65535 f\n' +
'0000000009 00000 n\n' +
'0000000052 00000 n\n' +
'trailer\n<< /Size 3 /Root 1 0 R >>\n' +
'startxref\n110\n%%EOF'
)
}
];
const results = [];
for (const pdf of minimalPDFs) {
try {
const result = await einvoice.extractFromPDF(pdf.content);
results.push({
name: pdf.name,
size: pdf.content.length,
processed: true,
hasXML: !!result?.xml,
hasAttachments: result?.attachments?.length > 0,
hasMetadata: !!result?.metadata
});
} catch (error) {
results.push({
name: pdf.name,
size: pdf.content.length,
processed: false,
error: error.message
});
}
}
return results;
}
);
minimalPDFStructure.forEach(result => {
t.ok(result.processed || result.error, `Minimal PDF ${result.name} was processed`);
t.notOk(result.hasXML, `Minimal PDF ${result.name} has no XML`);
});
// Test 3: Truncated PDF files
const truncatedPDFs = await performanceTracker.measureAsync(
'truncated-pdf-files',
async () => {
// Start with a valid PDF structure and truncate at different points
const fullPDF = Buffer.from(
'%PDF-1.4\n' +
'1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n' +
'2 0 obj\n<< /Type /Pages /Count 1 /Kids [3 0 R] >>\nendobj\n' +
'3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\nendobj\n' +
'xref\n0 4\n' +
'0000000000 65535 f\n' +
'0000000009 00000 n\n' +
'0000000052 00000 n\n' +
'0000000110 00000 n\n' +
'trailer\n<< /Size 4 /Root 1 0 R >>\n' +
'startxref\n196\n%%EOF'
);
const truncationPoints = [
{ name: 'after-header', bytes: 10 },
{ name: 'mid-object', bytes: 50 },
{ name: 'before-xref', bytes: 150 },
{ name: 'mid-xref', bytes: 250 },
{ name: 'before-eof', bytes: fullPDF.length - 5 }
];
const results = [];
for (const point of truncationPoints) {
const truncated = fullPDF.slice(0, point.bytes);
try {
const result = await einvoice.extractFromPDF(truncated);
results.push({
truncationPoint: point.name,
size: truncated.length,
recovered: true,
hasPartialData: !!result
});
} catch (error) {
results.push({
truncationPoint: point.name,
size: truncated.length,
recovered: false,
error: error.message,
isCorruptionError: error.message.includes('corrupt') || error.message.includes('truncated')
});
}
}
return results;
}
);
truncatedPDFs.forEach(result => {
t.ok(!result.recovered || result.isCorruptionError,
`Truncated PDF at ${result.truncationPoint} should fail or be detected as corrupt`);
});
// Test 4: PDF with zero-byte attachment
const zeroByteAttachment = await performanceTracker.measureAsync(
'pdf-with-zero-byte-attachment',
async () => {
// Create a PDF with an embedded file of zero bytes
const pdfWithEmptyAttachment = Buffer.from(
'%PDF-1.4\n' +
'1 0 obj\n<< /Type /Catalog /Names 2 0 R >>\nendobj\n' +
'2 0 obj\n<< /EmbeddedFiles 3 0 R >>\nendobj\n' +
'3 0 obj\n<< /Names [(empty.xml) 4 0 R] >>\nendobj\n' +
'4 0 obj\n<< /Type /Filespec /F (empty.xml) /EF << /F 5 0 R >> >>\nendobj\n' +
'5 0 obj\n<< /Type /EmbeddedFile /Length 0 >>\nstream\n\nendstream\nendobj\n' +
'xref\n0 6\n' +
'0000000000 65535 f\n' +
'0000000009 00000 n\n' +
'0000000062 00000 n\n' +
'0000000103 00000 n\n' +
'0000000151 00000 n\n' +
'0000000229 00000 n\n' +
'trailer\n<< /Size 6 /Root 1 0 R >>\n' +
'startxref\n307\n%%EOF'
);
try {
const result = await einvoice.extractFromPDF(pdfWithEmptyAttachment);
return {
processed: true,
hasAttachments: result?.attachments?.length > 0,
attachmentCount: result?.attachments?.length || 0,
firstAttachmentSize: result?.attachments?.[0]?.size || 0,
firstAttachmentName: result?.attachments?.[0]?.name || null
};
} catch (error) {
return {
processed: false,
error: error.message
};
}
}
);
t.ok(zeroByteAttachment.processed, 'PDF with zero-byte attachment was processed');
if (zeroByteAttachment.hasAttachments) {
t.equal(zeroByteAttachment.firstAttachmentSize, 0, 'Attachment size is zero');
}
// Test 5: PDF with only metadata
const metadataOnlyPDF = await performanceTracker.measureAsync(
'pdf-with-only-metadata',
async () => {
const pdfWithMetadata = Buffer.from(
'%PDF-1.4\n' +
'1 0 obj\n<< /Type /Catalog /Metadata 2 0 R >>\nendobj\n' +
'2 0 obj\n<< /Type /Metadata /Subtype /XML /Length 100 >>\n' +
'stream\n' +
'<?xml version="1.0"?><x:xmpmeta xmlns:x="adobe:ns:meta/"><rdf:RDF></rdf:RDF></x:xmpmeta>\n' +
'endstream\nendobj\n' +
'xref\n0 3\n' +
'0000000000 65535 f\n' +
'0000000009 00000 n\n' +
'0000000068 00000 n\n' +
'trailer\n<< /Size 3 /Root 1 0 R >>\n' +
'startxref\n259\n%%EOF'
);
try {
const result = await einvoice.extractFromPDF(pdfWithMetadata);
return {
processed: true,
hasMetadata: !!result?.metadata,
hasXML: !!result?.xml,
hasContent: !!result?.content,
isEmpty: !result?.xml && !result?.attachments?.length
};
} catch (error) {
return {
processed: false,
error: error.message
};
}
}
);
t.ok(metadataOnlyPDF.processed, 'PDF with only metadata was processed');
t.ok(metadataOnlyPDF.isEmpty, 'PDF with only metadata has no invoice content');
// Test 6: Compressed empty streams
const compressedEmptyStreams = await performanceTracker.measureAsync(
'compressed-empty-streams',
async () => {
const compressionMethods = [
{ name: 'flate', filter: '/FlateDecode' },
{ name: 'lzw', filter: '/LZWDecode' },
{ name: 'ascii85', filter: '/ASCII85Decode' },
{ name: 'asciihex', filter: '/ASCIIHexDecode' }
];
const results = [];
for (const method of compressionMethods) {
const pdf = Buffer.from(
'%PDF-1.4\n' +
`1 0 obj\n<< /Length 0 /Filter ${method.filter} >>\n` +
'stream\n\nendstream\nendobj\n' +
'xref\n0 2\n' +
'0000000000 65535 f\n' +
'0000000009 00000 n\n' +
'trailer\n<< /Size 2 >>\n' +
'startxref\n100\n%%EOF'
);
try {
const result = await einvoice.processPDFStream(pdf);
results.push({
method: method.name,
handled: true,
decompressed: true
});
} catch (error) {
results.push({
method: method.name,
handled: true,
error: error.message
});
}
}
return results;
}
);
compressedEmptyStreams.forEach(result => {
t.ok(result.handled, `Empty ${result.method} stream was handled`);
});
// Test 7: Zero-page PDF
const zeroPagePDF = await performanceTracker.measureAsync(
'zero-page-pdf',
async () => {
const zeroPagesPDF = Buffer.from(
'%PDF-1.4\n' +
'1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n' +
'2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n' +
'xref\n0 3\n' +
'0000000000 65535 f\n' +
'0000000009 00000 n\n' +
'0000000058 00000 n\n' +
'trailer\n<< /Size 3 /Root 1 0 R >>\n' +
'startxref\n115\n%%EOF'
);
try {
const result = await einvoice.extractFromPDF(zeroPagesPDF);
return {
processed: true,
pageCount: result?.pageCount || 0,
hasContent: !!result?.content,
canExtractXML: !!result?.xml
};
} catch (error) {
return {
processed: false,
error: error.message
};
}
}
);
t.ok(zeroPagePDF.processed || zeroPagePDF.error, 'Zero-page PDF was handled');
if (zeroPagePDF.processed) {
t.equal(zeroPagePDF.pageCount, 0, 'Page count is zero');
}
// Test 8: PDF with empty form fields
const emptyFormFields = await performanceTracker.measureAsync(
'pdf-with-empty-form-fields',
async () => {
const formPDF = Buffer.from(
'%PDF-1.4\n' +
'1 0 obj\n<< /Type /Catalog /AcroForm 2 0 R >>\nendobj\n' +
'2 0 obj\n<< /Fields [] >>\nendobj\n' +
'xref\n0 3\n' +
'0000000000 65535 f\n' +
'0000000009 00000 n\n' +
'0000000065 00000 n\n' +
'trailer\n<< /Size 3 /Root 1 0 R >>\n' +
'startxref\n100\n%%EOF'
);
try {
const result = await einvoice.extractFromPDF(formPDF);
return {
processed: true,
hasForm: !!result?.form,
formFieldCount: result?.form?.fields?.length || 0,
hasData: !!result?.data
};
} catch (error) {
return {
processed: false,
error: error.message
};
}
}
);
t.ok(emptyFormFields.processed, 'PDF with empty form fields was processed');
// Test 9: Recovery attempts on zero-byte files
const recoveryAttempts = await performanceTracker.measureAsync(
'recovery-attempts-zero-byte',
async () => {
const corruptScenarios = [
{
name: 'no-header',
content: Buffer.from('This is not a PDF')
},
{
name: 'binary-garbage',
content: Buffer.from([0xFF, 0xFE, 0xFD, 0xFC, 0x00, 0x01, 0x02, 0x03])
},
{
name: 'html-instead',
content: Buffer.from('<html><body>Not a PDF</body></html>')
},
{
name: 'partial-header',
content: Buffer.from('%PDF-')
},
{
name: 'wrong-version',
content: Buffer.from('%PDF-99.9\n%%EOF')
}
];
const results = [];
for (const scenario of corruptScenarios) {
try {
const result = await einvoice.extractFromPDF(scenario.content, {
attemptRecovery: true
});
results.push({
scenario: scenario.name,
recovered: !!result,
hasAnyData: !!result?.xml || !!result?.attachments?.length
});
} catch (error) {
results.push({
scenario: scenario.name,
recovered: false,
errorMessage: error.message,
recognized: error.message.includes('PDF') || error.message.includes('format')
});
}
}
return results;
}
);
recoveryAttempts.forEach(result => {
t.ok(!result.recovered, `Recovery should fail for ${result.scenario}`);
t.ok(result.recognized, `Error should recognize invalid PDF format`);
});
// Test 10: Batch processing with zero-byte PDFs
const batchWithZeroBytes = await performanceTracker.measureAsync(
'batch-processing-zero-byte',
async () => {
const batch = [
{ name: 'normal', content: createValidPDF() },
{ name: 'zero-byte', content: Buffer.alloc(0) },
{ name: 'normal2', content: createValidPDF() },
{ name: 'header-only', content: Buffer.from('%PDF-1.4') },
{ name: 'normal3', content: createValidPDF() }
];
const results = {
total: batch.length,
successful: 0,
failed: 0,
skipped: 0,
errors: []
};
for (const item of batch) {
try {
const result = await einvoice.extractFromPDF(item.content);
if (result?.xml || result?.attachments?.length) {
results.successful++;
} else {
results.skipped++;
}
} catch (error) {
results.failed++;
results.errors.push({
name: item.name,
error: error.message
});
}
}
return results;
}
);
t.equal(batchWithZeroBytes.total,
batchWithZeroBytes.successful + batchWithZeroBytes.failed + batchWithZeroBytes.skipped,
'All batch items were processed');
t.ok(batchWithZeroBytes.failed > 0, 'Some zero-byte PDFs failed as expected');
// Print performance summary
performanceTracker.printSummary();
});
// Helper function to create a valid PDF with invoice attachment
function createValidPDF(): Buffer {
return Buffer.from(
'%PDF-1.4\n' +
'1 0 obj\n<< /Type /Catalog /Names 2 0 R >>\nendobj\n' +
'2 0 obj\n<< /EmbeddedFiles 3 0 R >>\nendobj\n' +
'3 0 obj\n<< /Names [(invoice.xml) 4 0 R] >>\nendobj\n' +
'4 0 obj\n<< /Type /Filespec /F (invoice.xml) /EF << /F 5 0 R >> >>\nendobj\n' +
'5 0 obj\n<< /Type /EmbeddedFile /Length 50 >>\nstream\n' +
'<?xml version="1.0"?><Invoice><ID>TEST</ID></Invoice>\n' +
'endstream\nendobj\n' +
'xref\n0 6\n' +
'0000000000 65535 f\n' +
'0000000009 00000 n\n' +
'0000000062 00000 n\n' +
'0000000103 00000 n\n' +
'0000000151 00000 n\n' +
'0000000229 00000 n\n' +
'trailer\n<< /Size 6 /Root 1 0 R >>\n' +
'startxref\n350\n%%EOF'
);
}
// Run the test
tap.start();