This commit is contained in:
2025-04-03 16:41:10 +00:00
parent 21650f1181
commit a932d68f86
34 changed files with 1265 additions and 2987 deletions

View File

@ -1,4 +1,4 @@
import { PDFDocument } from 'pdf-lib';
import { PDFDocument, AFRelationship } from 'pdf-lib';
import type { IPdf } from '../../interfaces/common.js';
/**
@ -31,8 +31,11 @@ export class PDFEmbedder {
// Use pdf-lib's .attach() to embed the XML
pdfDoc.attach(xmlBuffer, filename, {
mimeType: 'application/xml',
mimeType: 'text/xml',
description: description,
creationDate: new Date(),
modificationDate: new Date(),
afRelationship: AFRelationship.Alternative,
});
// Save the modified PDF

View File

@ -79,16 +79,29 @@ export class PDFExtractor {
}
// Decompress and decode the XML content
const xmlCompressedBytes = xmlFile.getContents().buffer;
const xmlBytes = pako.inflate(xmlCompressedBytes);
const xmlContent = new TextDecoder('utf-8').decode(xmlBytes);
try {
const xmlCompressedBytes = xmlFile.getContents().buffer;
const xmlBytes = pako.inflate(xmlCompressedBytes);
const xmlContent = new TextDecoder('utf-8').decode(xmlBytes);
console.log(`Successfully extracted XML from PDF file. File name: ${xmlFileName}`);
return xmlContent;
console.log(`Successfully extracted XML from PDF file. File name: ${xmlFileName}`);
return xmlContent;
} catch (decompressError) {
// Try without decompression
console.log('Decompression failed, trying without decompression...');
try {
const xmlBytes = xmlFile.getContents();
const xmlContent = new TextDecoder('utf-8').decode(xmlBytes);
console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${xmlFileName}`);
return xmlContent;
} catch (decodeError) {
console.error('Error decoding XML content:', decodeError);
return null;
}
}
} catch (error) {
console.error('Error extracting or parsing embedded XML from PDF:', error);
throw error;
return null;
}
}
}