working

2025-04-03 16:41:10 +00:00
parent 21650f1181
commit a932d68f86
34 changed files with 1265 additions and 2987 deletions
--- a/ts/formats/pdf/pdf.embedder.ts
+++ b/ts/formats/pdf/pdf.embedder.ts
@@ -1,4 +1,4 @@
-import { PDFDocument } from 'pdf-lib';
+import { PDFDocument, AFRelationship } from 'pdf-lib';
 import type { IPdf } from '../../interfaces/common.js';

 /**
@@ -31,8 +31,11 @@ export class PDFEmbedder {

      // Use pdf-lib's .attach() to embed the XML
      pdfDoc.attach(xmlBuffer, filename, {
-        mimeType: 'application/xml',
+        mimeType: 'text/xml',
        description: description,
+        creationDate: new Date(),
+        modificationDate: new Date(),
+        afRelationship: AFRelationship.Alternative,
      });

      // Save the modified PDF
--- a/ts/formats/pdf/pdf.extractor.ts
+++ b/ts/formats/pdf/pdf.extractor.ts
@@ -79,16 +79,29 @@ export class PDFExtractor {
      }

      // Decompress and decode the XML content
-      const xmlCompressedBytes = xmlFile.getContents().buffer;
-      const xmlBytes = pako.inflate(xmlCompressedBytes);
-      const xmlContent = new TextDecoder('utf-8').decode(xmlBytes);
+      try {
+        const xmlCompressedBytes = xmlFile.getContents().buffer;
+        const xmlBytes = pako.inflate(xmlCompressedBytes);
+        const xmlContent = new TextDecoder('utf-8').decode(xmlBytes);

-      console.log(`Successfully extracted XML from PDF file. File name: ${xmlFileName}`);
-      
-      return xmlContent;
+        console.log(`Successfully extracted XML from PDF file. File name: ${xmlFileName}`);
+        return xmlContent;
+      } catch (decompressError) {
+        // Try without decompression
+        console.log('Decompression failed, trying without decompression...');
+        try {
+          const xmlBytes = xmlFile.getContents();
+          const xmlContent = new TextDecoder('utf-8').decode(xmlBytes);
+          console.log(`Successfully extracted uncompressed XML from PDF file. File name: ${xmlFileName}`);
+          return xmlContent;
+        } catch (decodeError) {
+          console.error('Error decoding XML content:', decodeError);
+          return null;
+        }
+      }
    } catch (error) {
      console.error('Error extracting or parsing embedded XML from PDF:', error);
-      throw error;
+      return null;
    }
  }
 }