This commit is contained in:
2025-04-03 16:41:10 +00:00
parent 21650f1181
commit a932d68f86
34 changed files with 1265 additions and 2987 deletions

View File

@ -79,16 +79,29 @@ export class PDFExtractor {
}
// Decompress and decode the XML content
const xmlCompressedBytes = xmlFile.getContents().buffer;
const xmlBytes = pako.inflate(xmlCompressedBytes);
const xmlContent = new TextDecoder('utf-8').decode(xmlBytes);
try {
const xmlCompressedBytes = xmlFile.getContents().buffer;
const xmlBytes = pako.inflate(xmlCompressedBytes);
const xmlContent = new TextDecoder('utf-8').decode(xmlBytes);
console.log(`Successfully extracted XML from PDF file. File name: ${xmlFileName}`);
return xmlContent;
console.log(`Successfully extracted XML from PDF file. File name: ${xmlFileName}`);
return xmlContent;
} catch (decompressError) {
// Try without decompression
console.log('Decompression failed, trying without decompression...');
try {
const xmlBytes = xmlFile.getContents();
const xmlContent = new TextDecoder('utf-8').decode(xmlBytes);
console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${xmlFileName}`);
return xmlContent;
} catch (decodeError) {
console.error('Error decoding XML content:', decodeError);
return null;
}
}
} catch (error) {
console.error('Error extracting or parsing embedded XML from PDF:', error);
throw error;
return null;
}
}
}