526 lines
16 KiB
TypeScript
526 lines
16 KiB
TypeScript
import { tap } from '@git.zone/tstest/tapbundle';
|
|
import * as plugins from '../plugins.js';
|
|
import { EInvoice } from '../../../ts/index.js';
|
|
import { PerformanceTracker } from '../performance.tracker.js';
|
|
|
|
const performanceTracker = new PerformanceTracker('EDGE-05: Zero-Byte PDFs');
|
|
|
|
tap.test('EDGE-05: Zero-Byte PDFs - should handle zero-byte and minimal PDF files', async (t) => {
|
|
const einvoice = new EInvoice();
|
|
|
|
// Test 1: Truly zero-byte PDF
|
|
const zeroByteFile = await performanceTracker.measureAsync(
|
|
'truly-zero-byte-pdf',
|
|
async () => {
|
|
const zeroPDF = Buffer.alloc(0);
|
|
|
|
try {
|
|
const result = await einvoice.extractFromPDF(zeroPDF);
|
|
|
|
return {
|
|
handled: true,
|
|
hasContent: !!result,
|
|
hasXML: result?.xml !== undefined,
|
|
hasAttachments: result?.attachments?.length > 0,
|
|
error: null,
|
|
bufferSize: zeroPDF.length
|
|
};
|
|
} catch (error) {
|
|
return {
|
|
handled: true,
|
|
hasContent: false,
|
|
error: error.message,
|
|
errorType: error.constructor.name,
|
|
bufferSize: zeroPDF.length
|
|
};
|
|
}
|
|
}
|
|
);
|
|
|
|
t.ok(zeroByteFile.handled, 'Zero-byte PDF was handled');
|
|
t.notOk(zeroByteFile.hasContent, 'Zero-byte PDF has no content');
|
|
t.equal(zeroByteFile.bufferSize, 0, 'Buffer size is zero');
|
|
|
|
// Test 2: Minimal PDF structure
|
|
const minimalPDFStructure = await performanceTracker.measureAsync(
|
|
'minimal-pdf-structure',
|
|
async () => {
|
|
const minimalPDFs = [
|
|
{
|
|
name: 'header-only',
|
|
content: Buffer.from('%PDF-1.4')
|
|
},
|
|
{
|
|
name: 'header-and-eof',
|
|
content: Buffer.from('%PDF-1.4\n%%EOF')
|
|
},
|
|
{
|
|
name: 'empty-catalog',
|
|
content: Buffer.from(
|
|
'%PDF-1.4\n' +
|
|
'1 0 obj\n<< /Type /Catalog >>\nendobj\n' +
|
|
'xref\n0 2\n' +
|
|
'0000000000 65535 f\n' +
|
|
'0000000009 00000 n\n' +
|
|
'trailer\n<< /Size 2 /Root 1 0 R >>\n' +
|
|
'startxref\n64\n%%EOF'
|
|
)
|
|
},
|
|
{
|
|
name: 'single-empty-page',
|
|
content: Buffer.from(
|
|
'%PDF-1.4\n' +
|
|
'1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n' +
|
|
'2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n' +
|
|
'xref\n0 3\n' +
|
|
'0000000000 65535 f\n' +
|
|
'0000000009 00000 n\n' +
|
|
'0000000052 00000 n\n' +
|
|
'trailer\n<< /Size 3 /Root 1 0 R >>\n' +
|
|
'startxref\n110\n%%EOF'
|
|
)
|
|
}
|
|
];
|
|
|
|
const results = [];
|
|
|
|
for (const pdf of minimalPDFs) {
|
|
try {
|
|
const result = await einvoice.extractFromPDF(pdf.content);
|
|
|
|
results.push({
|
|
name: pdf.name,
|
|
size: pdf.content.length,
|
|
processed: true,
|
|
hasXML: !!result?.xml,
|
|
hasAttachments: result?.attachments?.length > 0,
|
|
hasMetadata: !!result?.metadata
|
|
});
|
|
} catch (error) {
|
|
results.push({
|
|
name: pdf.name,
|
|
size: pdf.content.length,
|
|
processed: false,
|
|
error: error.message
|
|
});
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
);
|
|
|
|
minimalPDFStructure.forEach(result => {
|
|
t.ok(result.processed || result.error, `Minimal PDF ${result.name} was processed`);
|
|
t.notOk(result.hasXML, `Minimal PDF ${result.name} has no XML`);
|
|
});
|
|
|
|
// Test 3: Truncated PDF files
|
|
const truncatedPDFs = await performanceTracker.measureAsync(
|
|
'truncated-pdf-files',
|
|
async () => {
|
|
// Start with a valid PDF structure and truncate at different points
|
|
const fullPDF = Buffer.from(
|
|
'%PDF-1.4\n' +
|
|
'1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n' +
|
|
'2 0 obj\n<< /Type /Pages /Count 1 /Kids [3 0 R] >>\nendobj\n' +
|
|
'3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\nendobj\n' +
|
|
'xref\n0 4\n' +
|
|
'0000000000 65535 f\n' +
|
|
'0000000009 00000 n\n' +
|
|
'0000000052 00000 n\n' +
|
|
'0000000110 00000 n\n' +
|
|
'trailer\n<< /Size 4 /Root 1 0 R >>\n' +
|
|
'startxref\n196\n%%EOF'
|
|
);
|
|
|
|
const truncationPoints = [
|
|
{ name: 'after-header', bytes: 10 },
|
|
{ name: 'mid-object', bytes: 50 },
|
|
{ name: 'before-xref', bytes: 150 },
|
|
{ name: 'mid-xref', bytes: 250 },
|
|
{ name: 'before-eof', bytes: fullPDF.length - 5 }
|
|
];
|
|
|
|
const results = [];
|
|
|
|
for (const point of truncationPoints) {
|
|
const truncated = fullPDF.slice(0, point.bytes);
|
|
|
|
try {
|
|
const result = await einvoice.extractFromPDF(truncated);
|
|
|
|
results.push({
|
|
truncationPoint: point.name,
|
|
size: truncated.length,
|
|
recovered: true,
|
|
hasPartialData: !!result
|
|
});
|
|
} catch (error) {
|
|
results.push({
|
|
truncationPoint: point.name,
|
|
size: truncated.length,
|
|
recovered: false,
|
|
error: error.message,
|
|
isCorruptionError: error.message.includes('corrupt') || error.message.includes('truncated')
|
|
});
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
);
|
|
|
|
truncatedPDFs.forEach(result => {
|
|
t.ok(!result.recovered || result.isCorruptionError,
|
|
`Truncated PDF at ${result.truncationPoint} should fail or be detected as corrupt`);
|
|
});
|
|
|
|
// Test 4: PDF with zero-byte attachment
|
|
const zeroByteAttachment = await performanceTracker.measureAsync(
|
|
'pdf-with-zero-byte-attachment',
|
|
async () => {
|
|
// Create a PDF with an embedded file of zero bytes
|
|
const pdfWithEmptyAttachment = Buffer.from(
|
|
'%PDF-1.4\n' +
|
|
'1 0 obj\n<< /Type /Catalog /Names 2 0 R >>\nendobj\n' +
|
|
'2 0 obj\n<< /EmbeddedFiles 3 0 R >>\nendobj\n' +
|
|
'3 0 obj\n<< /Names [(empty.xml) 4 0 R] >>\nendobj\n' +
|
|
'4 0 obj\n<< /Type /Filespec /F (empty.xml) /EF << /F 5 0 R >> >>\nendobj\n' +
|
|
'5 0 obj\n<< /Type /EmbeddedFile /Length 0 >>\nstream\n\nendstream\nendobj\n' +
|
|
'xref\n0 6\n' +
|
|
'0000000000 65535 f\n' +
|
|
'0000000009 00000 n\n' +
|
|
'0000000062 00000 n\n' +
|
|
'0000000103 00000 n\n' +
|
|
'0000000151 00000 n\n' +
|
|
'0000000229 00000 n\n' +
|
|
'trailer\n<< /Size 6 /Root 1 0 R >>\n' +
|
|
'startxref\n307\n%%EOF'
|
|
);
|
|
|
|
try {
|
|
const result = await einvoice.extractFromPDF(pdfWithEmptyAttachment);
|
|
|
|
return {
|
|
processed: true,
|
|
hasAttachments: result?.attachments?.length > 0,
|
|
attachmentCount: result?.attachments?.length || 0,
|
|
firstAttachmentSize: result?.attachments?.[0]?.size || 0,
|
|
firstAttachmentName: result?.attachments?.[0]?.name || null
|
|
};
|
|
} catch (error) {
|
|
return {
|
|
processed: false,
|
|
error: error.message
|
|
};
|
|
}
|
|
}
|
|
);
|
|
|
|
t.ok(zeroByteAttachment.processed, 'PDF with zero-byte attachment was processed');
|
|
if (zeroByteAttachment.hasAttachments) {
|
|
t.equal(zeroByteAttachment.firstAttachmentSize, 0, 'Attachment size is zero');
|
|
}
|
|
|
|
// Test 5: PDF with only metadata
|
|
const metadataOnlyPDF = await performanceTracker.measureAsync(
|
|
'pdf-with-only-metadata',
|
|
async () => {
|
|
const pdfWithMetadata = Buffer.from(
|
|
'%PDF-1.4\n' +
|
|
'1 0 obj\n<< /Type /Catalog /Metadata 2 0 R >>\nendobj\n' +
|
|
'2 0 obj\n<< /Type /Metadata /Subtype /XML /Length 100 >>\n' +
|
|
'stream\n' +
|
|
'<?xml version="1.0"?><x:xmpmeta xmlns:x="adobe:ns:meta/"><rdf:RDF></rdf:RDF></x:xmpmeta>\n' +
|
|
'endstream\nendobj\n' +
|
|
'xref\n0 3\n' +
|
|
'0000000000 65535 f\n' +
|
|
'0000000009 00000 n\n' +
|
|
'0000000068 00000 n\n' +
|
|
'trailer\n<< /Size 3 /Root 1 0 R >>\n' +
|
|
'startxref\n259\n%%EOF'
|
|
);
|
|
|
|
try {
|
|
const result = await einvoice.extractFromPDF(pdfWithMetadata);
|
|
|
|
return {
|
|
processed: true,
|
|
hasMetadata: !!result?.metadata,
|
|
hasXML: !!result?.xml,
|
|
hasContent: !!result?.content,
|
|
isEmpty: !result?.xml && !result?.attachments?.length
|
|
};
|
|
} catch (error) {
|
|
return {
|
|
processed: false,
|
|
error: error.message
|
|
};
|
|
}
|
|
}
|
|
);
|
|
|
|
t.ok(metadataOnlyPDF.processed, 'PDF with only metadata was processed');
|
|
t.ok(metadataOnlyPDF.isEmpty, 'PDF with only metadata has no invoice content');
|
|
|
|
// Test 6: Compressed empty streams
|
|
const compressedEmptyStreams = await performanceTracker.measureAsync(
|
|
'compressed-empty-streams',
|
|
async () => {
|
|
const compressionMethods = [
|
|
{ name: 'flate', filter: '/FlateDecode' },
|
|
{ name: 'lzw', filter: '/LZWDecode' },
|
|
{ name: 'ascii85', filter: '/ASCII85Decode' },
|
|
{ name: 'asciihex', filter: '/ASCIIHexDecode' }
|
|
];
|
|
|
|
const results = [];
|
|
|
|
for (const method of compressionMethods) {
|
|
const pdf = Buffer.from(
|
|
'%PDF-1.4\n' +
|
|
`1 0 obj\n<< /Length 0 /Filter ${method.filter} >>\n` +
|
|
'stream\n\nendstream\nendobj\n' +
|
|
'xref\n0 2\n' +
|
|
'0000000000 65535 f\n' +
|
|
'0000000009 00000 n\n' +
|
|
'trailer\n<< /Size 2 >>\n' +
|
|
'startxref\n100\n%%EOF'
|
|
);
|
|
|
|
try {
|
|
const result = await einvoice.processPDFStream(pdf);
|
|
|
|
results.push({
|
|
method: method.name,
|
|
handled: true,
|
|
decompressed: true
|
|
});
|
|
} catch (error) {
|
|
results.push({
|
|
method: method.name,
|
|
handled: true,
|
|
error: error.message
|
|
});
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
);
|
|
|
|
compressedEmptyStreams.forEach(result => {
|
|
t.ok(result.handled, `Empty ${result.method} stream was handled`);
|
|
});
|
|
|
|
// Test 7: Zero-page PDF
|
|
const zeroPagePDF = await performanceTracker.measureAsync(
|
|
'zero-page-pdf',
|
|
async () => {
|
|
const zeroPagesPDF = Buffer.from(
|
|
'%PDF-1.4\n' +
|
|
'1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n' +
|
|
'2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n' +
|
|
'xref\n0 3\n' +
|
|
'0000000000 65535 f\n' +
|
|
'0000000009 00000 n\n' +
|
|
'0000000058 00000 n\n' +
|
|
'trailer\n<< /Size 3 /Root 1 0 R >>\n' +
|
|
'startxref\n115\n%%EOF'
|
|
);
|
|
|
|
try {
|
|
const result = await einvoice.extractFromPDF(zeroPagesPDF);
|
|
|
|
return {
|
|
processed: true,
|
|
pageCount: result?.pageCount || 0,
|
|
hasContent: !!result?.content,
|
|
canExtractXML: !!result?.xml
|
|
};
|
|
} catch (error) {
|
|
return {
|
|
processed: false,
|
|
error: error.message
|
|
};
|
|
}
|
|
}
|
|
);
|
|
|
|
t.ok(zeroPagePDF.processed || zeroPagePDF.error, 'Zero-page PDF was handled');
|
|
if (zeroPagePDF.processed) {
|
|
t.equal(zeroPagePDF.pageCount, 0, 'Page count is zero');
|
|
}
|
|
|
|
// Test 8: PDF with empty form fields
|
|
const emptyFormFields = await performanceTracker.measureAsync(
|
|
'pdf-with-empty-form-fields',
|
|
async () => {
|
|
const formPDF = Buffer.from(
|
|
'%PDF-1.4\n' +
|
|
'1 0 obj\n<< /Type /Catalog /AcroForm 2 0 R >>\nendobj\n' +
|
|
'2 0 obj\n<< /Fields [] >>\nendobj\n' +
|
|
'xref\n0 3\n' +
|
|
'0000000000 65535 f\n' +
|
|
'0000000009 00000 n\n' +
|
|
'0000000065 00000 n\n' +
|
|
'trailer\n<< /Size 3 /Root 1 0 R >>\n' +
|
|
'startxref\n100\n%%EOF'
|
|
);
|
|
|
|
try {
|
|
const result = await einvoice.extractFromPDF(formPDF);
|
|
|
|
return {
|
|
processed: true,
|
|
hasForm: !!result?.form,
|
|
formFieldCount: result?.form?.fields?.length || 0,
|
|
hasData: !!result?.data
|
|
};
|
|
} catch (error) {
|
|
return {
|
|
processed: false,
|
|
error: error.message
|
|
};
|
|
}
|
|
}
|
|
);
|
|
|
|
t.ok(emptyFormFields.processed, 'PDF with empty form fields was processed');
|
|
|
|
// Test 9: Recovery attempts on zero-byte files
|
|
const recoveryAttempts = await performanceTracker.measureAsync(
|
|
'recovery-attempts-zero-byte',
|
|
async () => {
|
|
const corruptScenarios = [
|
|
{
|
|
name: 'no-header',
|
|
content: Buffer.from('This is not a PDF')
|
|
},
|
|
{
|
|
name: 'binary-garbage',
|
|
content: Buffer.from([0xFF, 0xFE, 0xFD, 0xFC, 0x00, 0x01, 0x02, 0x03])
|
|
},
|
|
{
|
|
name: 'html-instead',
|
|
content: Buffer.from('<html><body>Not a PDF</body></html>')
|
|
},
|
|
{
|
|
name: 'partial-header',
|
|
content: Buffer.from('%PDF-')
|
|
},
|
|
{
|
|
name: 'wrong-version',
|
|
content: Buffer.from('%PDF-99.9\n%%EOF')
|
|
}
|
|
];
|
|
|
|
const results = [];
|
|
|
|
for (const scenario of corruptScenarios) {
|
|
try {
|
|
const result = await einvoice.extractFromPDF(scenario.content, {
|
|
attemptRecovery: true
|
|
});
|
|
|
|
results.push({
|
|
scenario: scenario.name,
|
|
recovered: !!result,
|
|
hasAnyData: !!result?.xml || !!result?.attachments?.length
|
|
});
|
|
} catch (error) {
|
|
results.push({
|
|
scenario: scenario.name,
|
|
recovered: false,
|
|
errorMessage: error.message,
|
|
recognized: error.message.includes('PDF') || error.message.includes('format')
|
|
});
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
);
|
|
|
|
recoveryAttempts.forEach(result => {
|
|
t.ok(!result.recovered, `Recovery should fail for ${result.scenario}`);
|
|
t.ok(result.recognized, `Error should recognize invalid PDF format`);
|
|
});
|
|
|
|
// Test 10: Batch processing with zero-byte PDFs
|
|
const batchWithZeroBytes = await performanceTracker.measureAsync(
|
|
'batch-processing-zero-byte',
|
|
async () => {
|
|
const batch = [
|
|
{ name: 'normal', content: createValidPDF() },
|
|
{ name: 'zero-byte', content: Buffer.alloc(0) },
|
|
{ name: 'normal2', content: createValidPDF() },
|
|
{ name: 'header-only', content: Buffer.from('%PDF-1.4') },
|
|
{ name: 'normal3', content: createValidPDF() }
|
|
];
|
|
|
|
const results = {
|
|
total: batch.length,
|
|
successful: 0,
|
|
failed: 0,
|
|
skipped: 0,
|
|
errors: []
|
|
};
|
|
|
|
for (const item of batch) {
|
|
try {
|
|
const result = await einvoice.extractFromPDF(item.content);
|
|
|
|
if (result?.xml || result?.attachments?.length) {
|
|
results.successful++;
|
|
} else {
|
|
results.skipped++;
|
|
}
|
|
} catch (error) {
|
|
results.failed++;
|
|
results.errors.push({
|
|
name: item.name,
|
|
error: error.message
|
|
});
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
);
|
|
|
|
t.equal(batchWithZeroBytes.total,
|
|
batchWithZeroBytes.successful + batchWithZeroBytes.failed + batchWithZeroBytes.skipped,
|
|
'All batch items were processed');
|
|
t.ok(batchWithZeroBytes.failed > 0, 'Some zero-byte PDFs failed as expected');
|
|
|
|
// Print performance summary
|
|
performanceTracker.printSummary();
|
|
});
|
|
|
|
// Helper function to create a valid PDF with invoice attachment
|
|
function createValidPDF(): Buffer {
|
|
return Buffer.from(
|
|
'%PDF-1.4\n' +
|
|
'1 0 obj\n<< /Type /Catalog /Names 2 0 R >>\nendobj\n' +
|
|
'2 0 obj\n<< /EmbeddedFiles 3 0 R >>\nendobj\n' +
|
|
'3 0 obj\n<< /Names [(invoice.xml) 4 0 R] >>\nendobj\n' +
|
|
'4 0 obj\n<< /Type /Filespec /F (invoice.xml) /EF << /F 5 0 R >> >>\nendobj\n' +
|
|
'5 0 obj\n<< /Type /EmbeddedFile /Length 50 >>\nstream\n' +
|
|
'<?xml version="1.0"?><Invoice><ID>TEST</ID></Invoice>\n' +
|
|
'endstream\nendobj\n' +
|
|
'xref\n0 6\n' +
|
|
'0000000000 65535 f\n' +
|
|
'0000000009 00000 n\n' +
|
|
'0000000062 00000 n\n' +
|
|
'0000000103 00000 n\n' +
|
|
'0000000151 00000 n\n' +
|
|
'0000000229 00000 n\n' +
|
|
'trailer\n<< /Size 6 /Root 1 0 R >>\n' +
|
|
'startxref\n350\n%%EOF'
|
|
);
|
|
}
|
|
|
|
// Run the test
|
|
tap.start(); |