457 lines
14 KiB
TypeScript
457 lines
14 KiB
TypeScript
import * as plugins from './plugins.js';
|
|
import * as interfaces from './interfaces.js';
|
|
import {
|
|
PDFDocument,
|
|
PDFDict,
|
|
PDFName,
|
|
PDFRawStream,
|
|
PDFArray,
|
|
PDFString,
|
|
} from 'pdf-lib';
|
|
import { FacturXEncoder } from './formats/facturx.encoder.js';
|
|
import { DecoderFactory } from './formats/decoder.factory.js';
|
|
import { BaseDecoder } from './formats/base.decoder.js';
|
|
|
|
export class XInvoice {
|
|
private xmlString: string;
|
|
private letterData: plugins.tsclass.business.ILetter;
|
|
private pdfUint8Array: Uint8Array;
|
|
|
|
private encoderInstance = new FacturXEncoder();
|
|
private decoderInstance: BaseDecoder;
|
|
|
|
constructor() {
|
|
// Decoder will be initialized when we have XML data
|
|
}
|
|
|
|
public async addPdfBuffer(pdfBuffer: Uint8Array | Buffer): Promise<void> {
|
|
this.pdfUint8Array = Uint8Array.from(pdfBuffer);
|
|
}
|
|
|
|
public async addXmlString(xmlString: string): Promise<void> {
|
|
// Basic XML validation - just check if it starts with <?xml
|
|
if (!xmlString || !xmlString.trim().startsWith('<?xml')) {
|
|
throw new Error('Invalid XML: Missing XML declaration');
|
|
}
|
|
|
|
// Store the XML string
|
|
this.xmlString = xmlString;
|
|
|
|
// Initialize the decoder with the XML string using the factory
|
|
this.decoderInstance = DecoderFactory.createDecoder(xmlString);
|
|
}
|
|
|
|
public async addLetterData(letterData: plugins.tsclass.business.ILetter): Promise<void> {
|
|
this.letterData = letterData;
|
|
}
|
|
|
|
public async getXInvoice(): Promise<void> {
|
|
// lets check requirements
|
|
if (!this.pdfUint8Array) {
|
|
throw new Error('No PDF buffer provided!');
|
|
}
|
|
if (!this.xmlString || !this.letterData) {
|
|
// TODO: check if document already has xml
|
|
|
|
throw new Error('No XML string or letter data provided!');
|
|
}
|
|
|
|
|
|
try {
|
|
const pdfDoc = await PDFDocument.load(this.pdfUint8Array);
|
|
|
|
// Convert the XML string to a Uint8Array
|
|
const xmlBuffer = new TextEncoder().encode(this.xmlString);
|
|
|
|
// Use pdf-lib's .attach() to embed the XML
|
|
pdfDoc.attach(xmlBuffer, plugins.path.basename('invoice.xml'), {
|
|
mimeType: 'application/xml',
|
|
description: 'XRechnung XML Invoice',
|
|
});
|
|
|
|
// Save back into this.pdfUint8Array
|
|
const modifiedPdfBytes = await pdfDoc.save();
|
|
this.pdfUint8Array = modifiedPdfBytes;
|
|
console.log(`PDF Buffer updated with new XML attachment!`);
|
|
} catch (error) {
|
|
console.error('Error embedding XML into PDF:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Reads the XML embedded in a PDF and returns it as a string.
|
|
* Validates that it's a properly formatted XInvoice/ZUGFeRD document.
|
|
*/
|
|
public async getXmlData(): Promise<string> {
|
|
if (!this.pdfUint8Array) {
|
|
throw new Error('No PDF buffer provided! Use addPdfBuffer() first.');
|
|
}
|
|
|
|
try {
|
|
const pdfDoc = await PDFDocument.load(this.pdfUint8Array);
|
|
|
|
// Get the document's metadata dictionary
|
|
const namesDictObj = pdfDoc.catalog.lookup(PDFName.of('Names'));
|
|
if (!(namesDictObj instanceof PDFDict)) {
|
|
throw new Error('No Names dictionary found in PDF! This PDF does not contain embedded files.');
|
|
}
|
|
|
|
const embeddedFilesDictObj = namesDictObj.lookup(PDFName.of('EmbeddedFiles'));
|
|
if (!(embeddedFilesDictObj instanceof PDFDict)) {
|
|
throw new Error('No EmbeddedFiles dictionary found! This PDF does not contain embedded files.');
|
|
}
|
|
|
|
const filesSpecObj = embeddedFilesDictObj.lookup(PDFName.of('Names'));
|
|
if (!(filesSpecObj instanceof PDFArray)) {
|
|
throw new Error('No files specified in EmbeddedFiles dictionary!');
|
|
}
|
|
|
|
// Try to find an XML file in the embedded files
|
|
let xmlFile: PDFRawStream | undefined;
|
|
let xmlFileName: string | undefined;
|
|
|
|
for (let i = 0; i < filesSpecObj.size(); i += 2) {
|
|
const fileNameObj = filesSpecObj.lookup(i);
|
|
const fileSpecObj = filesSpecObj.lookup(i + 1);
|
|
|
|
if (!(fileNameObj instanceof PDFString)) {
|
|
continue;
|
|
}
|
|
if (!(fileSpecObj instanceof PDFDict)) {
|
|
continue;
|
|
}
|
|
|
|
// Get the filename as string - using string access since value() might not be available in all contexts
|
|
const fileName = fileNameObj.toString();
|
|
|
|
// Check if it's an XML file (simple check - improved would check MIME type)
|
|
if (fileName.toLowerCase().includes('.xml')) {
|
|
const efDictObj = fileSpecObj.lookup(PDFName.of('EF'));
|
|
if (!(efDictObj instanceof PDFDict)) {
|
|
continue;
|
|
}
|
|
|
|
const maybeStream = efDictObj.lookup(PDFName.of('F'));
|
|
if (maybeStream instanceof PDFRawStream) {
|
|
// Found an XML file - save it
|
|
xmlFile = maybeStream;
|
|
xmlFileName = fileName;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If no XML file was found, throw an error
|
|
if (!xmlFile) {
|
|
throw new Error('No embedded XML file found in the PDF!');
|
|
}
|
|
|
|
// Decompress and decode the XML content
|
|
const xmlCompressedBytes = xmlFile.getContents().buffer;
|
|
const xmlBytes = plugins.pako.inflate(xmlCompressedBytes);
|
|
const xmlContent = new TextDecoder('utf-8').decode(xmlBytes);
|
|
|
|
// Store this XML string
|
|
this.xmlString = xmlContent;
|
|
|
|
// Initialize the decoder with the XML string if needed
|
|
if (!this.decoderInstance) {
|
|
this.decoderInstance = DecoderFactory.createDecoder(xmlContent);
|
|
}
|
|
|
|
// Validate the XML format
|
|
const format = this.identifyXmlFormat(xmlContent);
|
|
|
|
// Log information about the extracted XML
|
|
console.log(`Successfully extracted ${format} XML from PDF file. File name: ${xmlFileName}`);
|
|
|
|
return xmlContent;
|
|
} catch (error) {
|
|
console.error('Error extracting or parsing embedded XML from PDF:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Validates the format of an XML document and returns the identified format
|
|
*/
|
|
private identifyXmlFormat(xmlContent: string): string {
|
|
// Simple detection based on string content
|
|
|
|
// Check for ZUGFeRD/CII
|
|
if (xmlContent.includes('CrossIndustryInvoice') ||
|
|
xmlContent.includes('rsm:') ||
|
|
xmlContent.includes('ram:')) {
|
|
return 'ZUGFeRD/CII';
|
|
}
|
|
|
|
// Check for UBL
|
|
if (xmlContent.includes('<Invoice') ||
|
|
xmlContent.includes('ubl:Invoice') ||
|
|
xmlContent.includes('oasis:names:specification:ubl')) {
|
|
return 'UBL';
|
|
}
|
|
|
|
// Check for FatturaPA
|
|
if (xmlContent.includes('FatturaElettronica') ||
|
|
xmlContent.includes('fatturapa.gov.it')) {
|
|
return 'FatturaPA';
|
|
}
|
|
|
|
// For unknown formats, return generic
|
|
return 'Unknown';
|
|
}
|
|
|
|
public async getParsedXmlData(): Promise<interfaces.IXInvoice> {
|
|
if (!this.xmlString && !this.pdfUint8Array) {
|
|
throw new Error('No XML string or PDF buffer provided!');
|
|
}
|
|
|
|
let localXmlString = this.xmlString;
|
|
if (!localXmlString) {
|
|
localXmlString = await this.getXmlData();
|
|
}
|
|
|
|
return this.parseXmlToInvoice(localXmlString);
|
|
}
|
|
|
|
/**
|
|
* Parses XML content into a structured IXInvoice object
|
|
* Supports different XML invoice formats (ZUGFeRD, UBL, CII)
|
|
*/
|
|
private parseXmlToInvoice(xmlContent: string): interfaces.IXInvoice {
|
|
if (!xmlContent) {
|
|
throw new Error('No XML content provided for parsing');
|
|
}
|
|
|
|
try {
|
|
// Initialize the decoder with XML content if not already done
|
|
this.decoderInstance = DecoderFactory.createDecoder(xmlContent);
|
|
|
|
// First, attempt to identify the XML format
|
|
const format = this.identifyXmlFormat(xmlContent);
|
|
|
|
// Parse XML based on detected format
|
|
switch (format) {
|
|
case 'ZUGFeRD/CII':
|
|
return this.parseCIIFormat(xmlContent);
|
|
|
|
case 'UBL':
|
|
return this.parseUBLFormat(xmlContent);
|
|
|
|
case 'FatturaPA':
|
|
return this.parseFatturaPAFormat(xmlContent);
|
|
|
|
default:
|
|
// If format unrecognized, try generic parsing
|
|
return this.parseGenericXml(xmlContent);
|
|
}
|
|
} catch (error) {
|
|
console.error('Error parsing XML to invoice structure:', error);
|
|
throw new Error(`Failed to parse XML: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Helper to extract XML values using regex
|
|
*/
|
|
private extractXmlValueByRegex(xmlContent: string, tagName: string): string {
|
|
const regex = new RegExp(`<${tagName}[^>]*>([^<]+)</${tagName}>`, 'i');
|
|
const match = xmlContent.match(regex);
|
|
return match ? match[1].trim() : '';
|
|
}
|
|
|
|
/**
|
|
* Parses CII/ZUGFeRD format XML
|
|
*/
|
|
private parseCIIFormat(xmlContent: string): interfaces.IXInvoice {
|
|
// For demo implementation, just extract basic information using string operations
|
|
try {
|
|
// Extract invoice number - basic pattern matching
|
|
let invoiceNumber = 'Unknown';
|
|
const invoiceNumberMatch = xmlContent.match(/<ram:ID>([^<]+)<\/ram:ID>/);
|
|
if (invoiceNumberMatch && invoiceNumberMatch[1]) {
|
|
invoiceNumber = invoiceNumberMatch[1].trim();
|
|
}
|
|
|
|
// Extract date - basic pattern matching
|
|
let dateIssued = new Date().toISOString().split('T')[0];
|
|
const dateMatch = xmlContent.match(/<udt:DateTimeString[^>]*>([^<]+)<\/udt:DateTimeString>/);
|
|
if (dateMatch && dateMatch[1]) {
|
|
dateIssued = dateMatch[1].trim();
|
|
}
|
|
|
|
// Extract seller name - basic pattern matching
|
|
let sellerName = 'Unknown Seller';
|
|
const sellerMatch = xmlContent.match(/<ram:SellerTradeParty>.*?<ram:Name>([^<]+)<\/ram:Name>/s);
|
|
if (sellerMatch && sellerMatch[1]) {
|
|
sellerName = sellerMatch[1].trim();
|
|
}
|
|
|
|
// Extract buyer name - basic pattern matching
|
|
let buyerName = 'Unknown Buyer';
|
|
const buyerMatch = xmlContent.match(/<ram:BuyerTradeParty>.*?<ram:Name>([^<]+)<\/ram:Name>/s);
|
|
if (buyerMatch && buyerMatch[1]) {
|
|
buyerName = buyerMatch[1].trim();
|
|
}
|
|
|
|
// For this demo implementation, create a minimal invoice structure
|
|
return {
|
|
InvoiceNumber: invoiceNumber,
|
|
DateIssued: dateIssued,
|
|
Seller: {
|
|
Name: sellerName,
|
|
Address: {
|
|
Street: 'Unknown',
|
|
City: 'Unknown',
|
|
PostalCode: 'Unknown',
|
|
Country: 'Unknown',
|
|
},
|
|
Contact: {
|
|
Email: 'unknown@example.com',
|
|
Phone: 'Unknown',
|
|
},
|
|
},
|
|
Buyer: {
|
|
Name: buyerName,
|
|
Address: {
|
|
Street: 'Unknown',
|
|
City: 'Unknown',
|
|
PostalCode: 'Unknown',
|
|
Country: 'Unknown',
|
|
},
|
|
Contact: {
|
|
Email: 'unknown@example.com',
|
|
Phone: 'Unknown',
|
|
},
|
|
},
|
|
Items: [
|
|
{
|
|
Description: 'Unknown Item',
|
|
Quantity: 1,
|
|
UnitPrice: 0,
|
|
TotalPrice: 0,
|
|
},
|
|
],
|
|
TotalAmount: 0,
|
|
};
|
|
} catch (error) {
|
|
console.error('Error parsing CII format:', error);
|
|
return this.parseGenericXml(xmlContent); // Fallback
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parses UBL format XML
|
|
*/
|
|
private parseUBLFormat(xmlContent: string): interfaces.IXInvoice {
|
|
// Simplified UBL parsing - just extract basic fields
|
|
try {
|
|
const invoiceNumber = this.extractXmlValueByRegex(xmlContent, 'cbc:ID');
|
|
const dateIssued = this.extractXmlValueByRegex(xmlContent, 'cbc:IssueDate');
|
|
const sellerName = this.extractXmlValueByRegex(xmlContent, 'cac:AccountingSupplierParty.*?cbc:Name');
|
|
const buyerName = this.extractXmlValueByRegex(xmlContent, 'cac:AccountingCustomerParty.*?cbc:Name');
|
|
|
|
return {
|
|
InvoiceNumber: invoiceNumber || 'Unknown',
|
|
DateIssued: dateIssued || new Date().toISOString().split('T')[0],
|
|
Seller: {
|
|
Name: sellerName || 'Unknown Seller',
|
|
Address: {
|
|
Street: 'Unknown',
|
|
City: 'Unknown',
|
|
PostalCode: 'Unknown',
|
|
Country: 'Unknown',
|
|
},
|
|
Contact: {
|
|
Email: 'unknown@example.com',
|
|
Phone: 'Unknown',
|
|
},
|
|
},
|
|
Buyer: {
|
|
Name: buyerName || 'Unknown Buyer',
|
|
Address: {
|
|
Street: 'Unknown',
|
|
City: 'Unknown',
|
|
PostalCode: 'Unknown',
|
|
Country: 'Unknown',
|
|
},
|
|
Contact: {
|
|
Email: 'unknown@example.com',
|
|
Phone: 'Unknown',
|
|
},
|
|
},
|
|
Items: [
|
|
{
|
|
Description: 'Unknown Item',
|
|
Quantity: 1,
|
|
UnitPrice: 0,
|
|
TotalPrice: 0,
|
|
},
|
|
],
|
|
TotalAmount: 0,
|
|
};
|
|
} catch (error) {
|
|
console.error('Error parsing UBL format:', error);
|
|
return this.parseGenericXml(xmlContent);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parses fatturaPA format XML
|
|
*/
|
|
private parseFatturaPAFormat(xmlContent: string): interfaces.IXInvoice {
|
|
// In a full implementation, this would have fatturaPA-specific parsing
|
|
// For now, using a simplified generic parser
|
|
return this.parseGenericXml(xmlContent);
|
|
}
|
|
|
|
/**
|
|
* Generic XML parser that attempts to extract invoice data
|
|
* from any XML structure
|
|
*/
|
|
private parseGenericXml(xmlContent: string): interfaces.IXInvoice {
|
|
// For now, returning a placeholder structure
|
|
// This would be replaced with more intelligent parsing
|
|
return {
|
|
InvoiceNumber: '(Unknown format - invoice number not extracted)',
|
|
DateIssued: new Date().toISOString().split('T')[0],
|
|
Seller: {
|
|
Name: 'Unknown Seller (format not recognized)',
|
|
Address: {
|
|
Street: 'Unknown',
|
|
City: 'Unknown',
|
|
PostalCode: 'Unknown',
|
|
Country: 'Unknown',
|
|
},
|
|
Contact: {
|
|
Email: 'unknown@example.com',
|
|
Phone: 'Unknown',
|
|
},
|
|
},
|
|
Buyer: {
|
|
Name: 'Unknown Buyer (format not recognized)',
|
|
Address: {
|
|
Street: 'Unknown',
|
|
City: 'Unknown',
|
|
PostalCode: 'Unknown',
|
|
Country: 'Unknown',
|
|
},
|
|
Contact: {
|
|
Email: 'unknown@example.com',
|
|
Phone: 'Unknown',
|
|
},
|
|
},
|
|
Items: [
|
|
{
|
|
Description: 'Unknown items (invoice format not recognized)',
|
|
Quantity: 1,
|
|
UnitPrice: 0,
|
|
TotalPrice: 0,
|
|
},
|
|
],
|
|
TotalAmount: 0,
|
|
};
|
|
}
|
|
} |