fix(core): Refactor module imports to use the centralized plugins module and update relative paths across the codebase. Also remove the obsolete test file (test/test.other-formats-corpus.ts) and update file metadata in test outputs.
This commit is contained in:
@ -1,4 +1,4 @@
|
||||
import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib';
|
||||
import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from '../../../plugins.js';
|
||||
import { BaseXMLExtractor } from './base.extractor.js';
|
||||
|
||||
/**
|
||||
@ -15,48 +15,48 @@ export class AssociatedFilesExtractor extends BaseXMLExtractor {
|
||||
public async extractXml(pdfBuffer: Uint8Array | Buffer): Promise<string | null> {
|
||||
try {
|
||||
const pdfDoc = await PDFDocument.load(pdfBuffer);
|
||||
|
||||
|
||||
// Try to find associated files via the AF entry in the catalog
|
||||
const afArray = pdfDoc.catalog.lookup(PDFName.of('AF'));
|
||||
if (!(afArray instanceof PDFArray)) {
|
||||
console.warn('No AF (Associated Files) entry found in PDF catalog');
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
// Process each associated file
|
||||
for (let i = 0; i < afArray.size(); i++) {
|
||||
const fileSpec = afArray.lookup(i);
|
||||
if (!(fileSpec instanceof PDFDict)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Get the file name
|
||||
const fileNameObj = fileSpec.lookup(PDFName.of('F')) || fileSpec.lookup(PDFName.of('UF'));
|
||||
if (!(fileNameObj instanceof PDFString)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
const fileName = fileNameObj.decodeText();
|
||||
|
||||
|
||||
// Check if it's a known invoice XML file name
|
||||
const isKnownFileName = this.knownFileNames.some(
|
||||
knownName => fileName.toLowerCase() === knownName.toLowerCase()
|
||||
);
|
||||
|
||||
|
||||
// Check if it's any XML file or has invoice-related keywords
|
||||
const isXmlFile = fileName.toLowerCase().endsWith('.xml') ||
|
||||
const isXmlFile = fileName.toLowerCase().endsWith('.xml') ||
|
||||
fileName.toLowerCase().includes('zugferd') ||
|
||||
fileName.toLowerCase().includes('factur-x') ||
|
||||
fileName.toLowerCase().includes('xrechnung') ||
|
||||
fileName.toLowerCase().includes('invoice');
|
||||
|
||||
|
||||
if (isKnownFileName || isXmlFile) {
|
||||
// Get the embedded file dictionary
|
||||
const efDict = fileSpec.lookup(PDFName.of('EF'));
|
||||
if (!(efDict instanceof PDFDict)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Get the file stream
|
||||
const fileStream = efDict.lookup(PDFName.of('F'));
|
||||
if (fileStream instanceof PDFRawStream) {
|
||||
@ -67,7 +67,7 @@ export class AssociatedFilesExtractor extends BaseXMLExtractor {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
console.warn('No valid XML found in associated files');
|
||||
return null;
|
||||
} catch (error) {
|
||||
|
@ -1,4 +1,4 @@
|
||||
import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from 'pdf-lib';
|
||||
import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from '../../../plugins.js';
|
||||
import { BaseXMLExtractor } from './base.extractor.js';
|
||||
|
||||
/**
|
||||
@ -47,19 +47,19 @@ export class StandardXMLExtractor extends BaseXMLExtractor {
|
||||
|
||||
// Get the filename as string
|
||||
const fileName = fileNameObj.decodeText();
|
||||
|
||||
|
||||
// Check if it's a known invoice XML file name
|
||||
const isKnownFileName = this.knownFileNames.some(
|
||||
knownName => fileName.toLowerCase() === knownName.toLowerCase()
|
||||
);
|
||||
|
||||
|
||||
// Check if it's any XML file or has invoice-related keywords
|
||||
const isXmlFile = fileName.toLowerCase().endsWith('.xml') ||
|
||||
const isXmlFile = fileName.toLowerCase().endsWith('.xml') ||
|
||||
fileName.toLowerCase().includes('zugferd') ||
|
||||
fileName.toLowerCase().includes('factur-x') ||
|
||||
fileName.toLowerCase().includes('xrechnung') ||
|
||||
fileName.toLowerCase().includes('invoice');
|
||||
|
||||
|
||||
if (isKnownFileName || isXmlFile) {
|
||||
const efDictObj = fileSpecObj.lookup(PDFName.of('EF'));
|
||||
if (!(efDictObj instanceof PDFDict)) {
|
||||
|
@ -1,4 +1,4 @@
|
||||
import { PDFDocument, AFRelationship } from 'pdf-lib';
|
||||
import { PDFDocument, AFRelationship } from '../../plugins.js';
|
||||
import type { IPdf } from '../../interfaces/common.js';
|
||||
|
||||
/**
|
||||
|
Reference in New Issue
Block a user