import * as plugins from './plugins.js'; import * as interfaces from './interfaces.js'; import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString, } from 'pdf-lib'; import { ZugferdXmlEncoder } from './classes.encoder.js'; import { ZUGFeRDXmlDecoder } from './classes.decoder.js'; export class XInvoice { private xmlString: string; private letterData: plugins.tsclass.business.ILetter; private pdfUint8Array: Uint8Array; private encoderInstance = new ZugferdXmlEncoder(); private decoderInstance: ZUGFeRDXmlDecoder; constructor() { // Decoder will be initialized when we have XML data } public async addPdfBuffer(pdfBuffer: Uint8Array | Buffer): Promise { this.pdfUint8Array = Uint8Array.from(pdfBuffer); } public async addXmlString(xmlString: string): Promise { // Basic XML validation - just check if it starts with { this.letterData = letterData; } public async getXInvoice(): Promise { // lets check requirements if (!this.pdfUint8Array) { throw new Error('No PDF buffer provided!'); } if (!this.xmlString || !this.letterData) { // TODO: check if document already has xml throw new Error('No XML string or letter data provided!'); } try { const pdfDoc = await PDFDocument.load(this.pdfUint8Array); // Convert the XML string to a Uint8Array const xmlBuffer = new TextEncoder().encode(this.xmlString); // Use pdf-lib's .attach() to embed the XML pdfDoc.attach(xmlBuffer, plugins.path.basename('invoice.xml'), { mimeType: 'application/xml', description: 'XRechnung XML Invoice', }); // Save back into this.pdfUint8Array const modifiedPdfBytes = await pdfDoc.save(); this.pdfUint8Array = modifiedPdfBytes; console.log(`PDF Buffer updated with new XML attachment!`); } catch (error) { console.error('Error embedding XML into PDF:', error); throw error; } } /** * Reads the XML embedded in a PDF and returns it as a string. * Validates that it's a properly formatted XInvoice/ZUGFeRD document. */ public async getXmlData(): Promise { if (!this.pdfUint8Array) { throw new Error('No PDF buffer provided! Use addPdfBuffer() first.'); } try { const pdfDoc = await PDFDocument.load(this.pdfUint8Array); // Get the document's metadata dictionary const namesDictObj = pdfDoc.catalog.lookup(PDFName.of('Names')); if (!(namesDictObj instanceof PDFDict)) { throw new Error('No Names dictionary found in PDF! This PDF does not contain embedded files.'); } const embeddedFilesDictObj = namesDictObj.lookup(PDFName.of('EmbeddedFiles')); if (!(embeddedFilesDictObj instanceof PDFDict)) { throw new Error('No EmbeddedFiles dictionary found! This PDF does not contain embedded files.'); } const filesSpecObj = embeddedFilesDictObj.lookup(PDFName.of('Names')); if (!(filesSpecObj instanceof PDFArray)) { throw new Error('No files specified in EmbeddedFiles dictionary!'); } // Try to find an XML file in the embedded files let xmlFile: PDFRawStream | undefined; let xmlFileName: string | undefined; for (let i = 0; i < filesSpecObj.size(); i += 2) { const fileNameObj = filesSpecObj.lookup(i); const fileSpecObj = filesSpecObj.lookup(i + 1); if (!(fileNameObj instanceof PDFString)) { continue; } if (!(fileSpecObj instanceof PDFDict)) { continue; } // Get the filename as string - using string access since value() might not be available in all contexts const fileName = fileNameObj.toString(); // Check if it's an XML file (simple check - improved would check MIME type) if (fileName.toLowerCase().includes('.xml')) { const efDictObj = fileSpecObj.lookup(PDFName.of('EF')); if (!(efDictObj instanceof PDFDict)) { continue; } const maybeStream = efDictObj.lookup(PDFName.of('F')); if (maybeStream instanceof PDFRawStream) { // Found an XML file - save it xmlFile = maybeStream; xmlFileName = fileName; break; } } } // If no XML file was found, throw an error if (!xmlFile) { throw new Error('No embedded XML file found in the PDF!'); } // Decompress and decode the XML content const xmlCompressedBytes = xmlFile.getContents().buffer; const xmlBytes = plugins.pako.inflate(xmlCompressedBytes); const xmlContent = new TextDecoder('utf-8').decode(xmlBytes); // Store this XML string this.xmlString = xmlContent; // Initialize the decoder with the XML string if needed if (!this.decoderInstance) { this.decoderInstance = new ZUGFeRDXmlDecoder(xmlContent); } // Validate the XML format const format = this.identifyXmlFormat(xmlContent); // Log information about the extracted XML console.log(`Successfully extracted ${format} XML from PDF file. File name: ${xmlFileName}`); return xmlContent; } catch (error) { console.error('Error extracting or parsing embedded XML from PDF:', error); throw error; } } /** * Validates the format of an XML document and returns the identified format */ private identifyXmlFormat(xmlContent: string): string { // Simple detection based on string content // Check for ZUGFeRD/CII if (xmlContent.includes('CrossIndustryInvoice') || xmlContent.includes('rsm:') || xmlContent.includes('ram:')) { return 'ZUGFeRD/CII'; } // Check for UBL if (xmlContent.includes(' { if (!this.xmlString && !this.pdfUint8Array) { throw new Error('No XML string or PDF buffer provided!'); } let localXmlString = this.xmlString; if (!localXmlString) { localXmlString = await this.getXmlData(); } return this.parseXmlToInvoice(localXmlString); } /** * Parses XML content into a structured IXInvoice object * Supports different XML invoice formats (ZUGFeRD, UBL, CII) */ private parseXmlToInvoice(xmlContent: string): interfaces.IXInvoice { if (!xmlContent) { throw new Error('No XML content provided for parsing'); } try { // Initialize the decoder with XML content if not already done this.decoderInstance = new ZUGFeRDXmlDecoder(xmlContent); // First, attempt to identify the XML format const format = this.identifyXmlFormat(xmlContent); // Parse XML based on detected format switch (format) { case 'ZUGFeRD/CII': return this.parseCIIFormat(xmlContent); case 'UBL': return this.parseUBLFormat(xmlContent); case 'FatturaPA': return this.parseFatturaPAFormat(xmlContent); default: // If format unrecognized, try generic parsing return this.parseGenericXml(xmlContent); } } catch (error) { console.error('Error parsing XML to invoice structure:', error); throw new Error(`Failed to parse XML: ${error.message}`); } } /** * Helper to extract XML values using regex */ private extractXmlValueByRegex(xmlContent: string, tagName: string): string { const regex = new RegExp(`<${tagName}[^>]*>([^<]+)`, 'i'); const match = xmlContent.match(regex); return match ? match[1].trim() : ''; } /** * Parses CII/ZUGFeRD format XML */ private parseCIIFormat(xmlContent: string): interfaces.IXInvoice { // For demo implementation, just extract basic information using string operations try { // Extract invoice number - basic pattern matching let invoiceNumber = 'Unknown'; const invoiceNumberMatch = xmlContent.match(/([^<]+)<\/ram:ID>/); if (invoiceNumberMatch && invoiceNumberMatch[1]) { invoiceNumber = invoiceNumberMatch[1].trim(); } // Extract date - basic pattern matching let dateIssued = new Date().toISOString().split('T')[0]; const dateMatch = xmlContent.match(/]*>([^<]+)<\/udt:DateTimeString>/); if (dateMatch && dateMatch[1]) { dateIssued = dateMatch[1].trim(); } // Extract seller name - basic pattern matching let sellerName = 'Unknown Seller'; const sellerMatch = xmlContent.match(/.*?([^<]+)<\/ram:Name>/s); if (sellerMatch && sellerMatch[1]) { sellerName = sellerMatch[1].trim(); } // Extract buyer name - basic pattern matching let buyerName = 'Unknown Buyer'; const buyerMatch = xmlContent.match(/.*?([^<]+)<\/ram:Name>/s); if (buyerMatch && buyerMatch[1]) { buyerName = buyerMatch[1].trim(); } // For this demo implementation, create a minimal invoice structure return { InvoiceNumber: invoiceNumber, DateIssued: dateIssued, Seller: { Name: sellerName, Address: { Street: 'Unknown', City: 'Unknown', PostalCode: 'Unknown', Country: 'Unknown', }, Contact: { Email: 'unknown@example.com', Phone: 'Unknown', }, }, Buyer: { Name: buyerName, Address: { Street: 'Unknown', City: 'Unknown', PostalCode: 'Unknown', Country: 'Unknown', }, Contact: { Email: 'unknown@example.com', Phone: 'Unknown', }, }, Items: [ { Description: 'Unknown Item', Quantity: 1, UnitPrice: 0, TotalPrice: 0, }, ], TotalAmount: 0, }; } catch (error) { console.error('Error parsing CII format:', error); return this.parseGenericXml(xmlContent); // Fallback } } /** * Parses UBL format XML */ private parseUBLFormat(xmlContent: string): interfaces.IXInvoice { // Simplified UBL parsing - just extract basic fields try { const invoiceNumber = this.extractXmlValueByRegex(xmlContent, 'cbc:ID'); const dateIssued = this.extractXmlValueByRegex(xmlContent, 'cbc:IssueDate'); const sellerName = this.extractXmlValueByRegex(xmlContent, 'cac:AccountingSupplierParty.*?cbc:Name'); const buyerName = this.extractXmlValueByRegex(xmlContent, 'cac:AccountingCustomerParty.*?cbc:Name'); return { InvoiceNumber: invoiceNumber || 'Unknown', DateIssued: dateIssued || new Date().toISOString().split('T')[0], Seller: { Name: sellerName || 'Unknown Seller', Address: { Street: 'Unknown', City: 'Unknown', PostalCode: 'Unknown', Country: 'Unknown', }, Contact: { Email: 'unknown@example.com', Phone: 'Unknown', }, }, Buyer: { Name: buyerName || 'Unknown Buyer', Address: { Street: 'Unknown', City: 'Unknown', PostalCode: 'Unknown', Country: 'Unknown', }, Contact: { Email: 'unknown@example.com', Phone: 'Unknown', }, }, Items: [ { Description: 'Unknown Item', Quantity: 1, UnitPrice: 0, TotalPrice: 0, }, ], TotalAmount: 0, }; } catch (error) { console.error('Error parsing UBL format:', error); return this.parseGenericXml(xmlContent); } } /** * Parses fatturaPA format XML */ private parseFatturaPAFormat(xmlContent: string): interfaces.IXInvoice { // In a full implementation, this would have fatturaPA-specific parsing // For now, using a simplified generic parser return this.parseGenericXml(xmlContent); } /** * Generic XML parser that attempts to extract invoice data * from any XML structure */ private parseGenericXml(xmlContent: string): interfaces.IXInvoice { // For now, returning a placeholder structure // This would be replaced with more intelligent parsing return { InvoiceNumber: '(Unknown format - invoice number not extracted)', DateIssued: new Date().toISOString().split('T')[0], Seller: { Name: 'Unknown Seller (format not recognized)', Address: { Street: 'Unknown', City: 'Unknown', PostalCode: 'Unknown', Country: 'Unknown', }, Contact: { Email: 'unknown@example.com', Phone: 'Unknown', }, }, Buyer: { Name: 'Unknown Buyer (format not recognized)', Address: { Street: 'Unknown', City: 'Unknown', PostalCode: 'Unknown', Country: 'Unknown', }, Contact: { Email: 'unknown@example.com', Phone: 'Unknown', }, }, Items: [ { Description: 'Unknown items (invoice format not recognized)', Quantity: 1, UnitPrice: 0, TotalPrice: 0, }, ], TotalAmount: 0, }; } }