xinvoice/ts/classes.xinvoice.ts
2025-03-17 16:30:23 +00:00

457 lines
14 KiB
TypeScript

import * as plugins from './plugins.js';
import * as interfaces from './interfaces.js';
import {
PDFDocument,
PDFDict,
PDFName,
PDFRawStream,
PDFArray,
PDFString,
} from 'pdf-lib';
import { FacturXEncoder } from './formats/facturx.encoder.js';
import { DecoderFactory } from './formats/decoder.factory.js';
import { BaseDecoder } from './formats/base.decoder.js';
export class XInvoice {
private xmlString: string;
private letterData: plugins.tsclass.business.ILetter;
private pdfUint8Array: Uint8Array;
private encoderInstance = new FacturXEncoder();
private decoderInstance: BaseDecoder;
constructor() {
// Decoder will be initialized when we have XML data
}
public async addPdfBuffer(pdfBuffer: Uint8Array | Buffer): Promise<void> {
this.pdfUint8Array = Uint8Array.from(pdfBuffer);
}
public async addXmlString(xmlString: string): Promise<void> {
// Basic XML validation - just check if it starts with <?xml
if (!xmlString || !xmlString.trim().startsWith('<?xml')) {
throw new Error('Invalid XML: Missing XML declaration');
}
// Store the XML string
this.xmlString = xmlString;
// Initialize the decoder with the XML string using the factory
this.decoderInstance = DecoderFactory.createDecoder(xmlString);
}
public async addLetterData(letterData: plugins.tsclass.business.ILetter): Promise<void> {
this.letterData = letterData;
}
public async getXInvoice(): Promise<void> {
// lets check requirements
if (!this.pdfUint8Array) {
throw new Error('No PDF buffer provided!');
}
if (!this.xmlString || !this.letterData) {
// TODO: check if document already has xml
throw new Error('No XML string or letter data provided!');
}
try {
const pdfDoc = await PDFDocument.load(this.pdfUint8Array);
// Convert the XML string to a Uint8Array
const xmlBuffer = new TextEncoder().encode(this.xmlString);
// Use pdf-lib's .attach() to embed the XML
pdfDoc.attach(xmlBuffer, plugins.path.basename('invoice.xml'), {
mimeType: 'application/xml',
description: 'XRechnung XML Invoice',
});
// Save back into this.pdfUint8Array
const modifiedPdfBytes = await pdfDoc.save();
this.pdfUint8Array = modifiedPdfBytes;
console.log(`PDF Buffer updated with new XML attachment!`);
} catch (error) {
console.error('Error embedding XML into PDF:', error);
throw error;
}
}
/**
* Reads the XML embedded in a PDF and returns it as a string.
* Validates that it's a properly formatted XInvoice/ZUGFeRD document.
*/
public async getXmlData(): Promise<string> {
if (!this.pdfUint8Array) {
throw new Error('No PDF buffer provided! Use addPdfBuffer() first.');
}
try {
const pdfDoc = await PDFDocument.load(this.pdfUint8Array);
// Get the document's metadata dictionary
const namesDictObj = pdfDoc.catalog.lookup(PDFName.of('Names'));
if (!(namesDictObj instanceof PDFDict)) {
throw new Error('No Names dictionary found in PDF! This PDF does not contain embedded files.');
}
const embeddedFilesDictObj = namesDictObj.lookup(PDFName.of('EmbeddedFiles'));
if (!(embeddedFilesDictObj instanceof PDFDict)) {
throw new Error('No EmbeddedFiles dictionary found! This PDF does not contain embedded files.');
}
const filesSpecObj = embeddedFilesDictObj.lookup(PDFName.of('Names'));
if (!(filesSpecObj instanceof PDFArray)) {
throw new Error('No files specified in EmbeddedFiles dictionary!');
}
// Try to find an XML file in the embedded files
let xmlFile: PDFRawStream | undefined;
let xmlFileName: string | undefined;
for (let i = 0; i < filesSpecObj.size(); i += 2) {
const fileNameObj = filesSpecObj.lookup(i);
const fileSpecObj = filesSpecObj.lookup(i + 1);
if (!(fileNameObj instanceof PDFString)) {
continue;
}
if (!(fileSpecObj instanceof PDFDict)) {
continue;
}
// Get the filename as string - using string access since value() might not be available in all contexts
const fileName = fileNameObj.toString();
// Check if it's an XML file (simple check - improved would check MIME type)
if (fileName.toLowerCase().includes('.xml')) {
const efDictObj = fileSpecObj.lookup(PDFName.of('EF'));
if (!(efDictObj instanceof PDFDict)) {
continue;
}
const maybeStream = efDictObj.lookup(PDFName.of('F'));
if (maybeStream instanceof PDFRawStream) {
// Found an XML file - save it
xmlFile = maybeStream;
xmlFileName = fileName;
break;
}
}
}
// If no XML file was found, throw an error
if (!xmlFile) {
throw new Error('No embedded XML file found in the PDF!');
}
// Decompress and decode the XML content
const xmlCompressedBytes = xmlFile.getContents().buffer;
const xmlBytes = plugins.pako.inflate(xmlCompressedBytes);
const xmlContent = new TextDecoder('utf-8').decode(xmlBytes);
// Store this XML string
this.xmlString = xmlContent;
// Initialize the decoder with the XML string if needed
if (!this.decoderInstance) {
this.decoderInstance = DecoderFactory.createDecoder(xmlContent);
}
// Validate the XML format
const format = this.identifyXmlFormat(xmlContent);
// Log information about the extracted XML
console.log(`Successfully extracted ${format} XML from PDF file. File name: ${xmlFileName}`);
return xmlContent;
} catch (error) {
console.error('Error extracting or parsing embedded XML from PDF:', error);
throw error;
}
}
/**
* Validates the format of an XML document and returns the identified format
*/
private identifyXmlFormat(xmlContent: string): string {
// Simple detection based on string content
// Check for ZUGFeRD/CII
if (xmlContent.includes('CrossIndustryInvoice') ||
xmlContent.includes('rsm:') ||
xmlContent.includes('ram:')) {
return 'ZUGFeRD/CII';
}
// Check for UBL
if (xmlContent.includes('<Invoice') ||
xmlContent.includes('ubl:Invoice') ||
xmlContent.includes('oasis:names:specification:ubl')) {
return 'UBL';
}
// Check for FatturaPA
if (xmlContent.includes('FatturaElettronica') ||
xmlContent.includes('fatturapa.gov.it')) {
return 'FatturaPA';
}
// For unknown formats, return generic
return 'Unknown';
}
public async getParsedXmlData(): Promise<interfaces.IXInvoice> {
if (!this.xmlString && !this.pdfUint8Array) {
throw new Error('No XML string or PDF buffer provided!');
}
let localXmlString = this.xmlString;
if (!localXmlString) {
localXmlString = await this.getXmlData();
}
return this.parseXmlToInvoice(localXmlString);
}
/**
* Parses XML content into a structured IXInvoice object
* Supports different XML invoice formats (ZUGFeRD, UBL, CII)
*/
private parseXmlToInvoice(xmlContent: string): interfaces.IXInvoice {
if (!xmlContent) {
throw new Error('No XML content provided for parsing');
}
try {
// Initialize the decoder with XML content if not already done
this.decoderInstance = DecoderFactory.createDecoder(xmlContent);
// First, attempt to identify the XML format
const format = this.identifyXmlFormat(xmlContent);
// Parse XML based on detected format
switch (format) {
case 'ZUGFeRD/CII':
return this.parseCIIFormat(xmlContent);
case 'UBL':
return this.parseUBLFormat(xmlContent);
case 'FatturaPA':
return this.parseFatturaPAFormat(xmlContent);
default:
// If format unrecognized, try generic parsing
return this.parseGenericXml(xmlContent);
}
} catch (error) {
console.error('Error parsing XML to invoice structure:', error);
throw new Error(`Failed to parse XML: ${error.message}`);
}
}
/**
* Helper to extract XML values using regex
*/
private extractXmlValueByRegex(xmlContent: string, tagName: string): string {
const regex = new RegExp(`<${tagName}[^>]*>([^<]+)</${tagName}>`, 'i');
const match = xmlContent.match(regex);
return match ? match[1].trim() : '';
}
/**
* Parses CII/ZUGFeRD format XML
*/
private parseCIIFormat(xmlContent: string): interfaces.IXInvoice {
// For demo implementation, just extract basic information using string operations
try {
// Extract invoice number - basic pattern matching
let invoiceNumber = 'Unknown';
const invoiceNumberMatch = xmlContent.match(/<ram:ID>([^<]+)<\/ram:ID>/);
if (invoiceNumberMatch && invoiceNumberMatch[1]) {
invoiceNumber = invoiceNumberMatch[1].trim();
}
// Extract date - basic pattern matching
let dateIssued = new Date().toISOString().split('T')[0];
const dateMatch = xmlContent.match(/<udt:DateTimeString[^>]*>([^<]+)<\/udt:DateTimeString>/);
if (dateMatch && dateMatch[1]) {
dateIssued = dateMatch[1].trim();
}
// Extract seller name - basic pattern matching
let sellerName = 'Unknown Seller';
const sellerMatch = xmlContent.match(/<ram:SellerTradeParty>.*?<ram:Name>([^<]+)<\/ram:Name>/s);
if (sellerMatch && sellerMatch[1]) {
sellerName = sellerMatch[1].trim();
}
// Extract buyer name - basic pattern matching
let buyerName = 'Unknown Buyer';
const buyerMatch = xmlContent.match(/<ram:BuyerTradeParty>.*?<ram:Name>([^<]+)<\/ram:Name>/s);
if (buyerMatch && buyerMatch[1]) {
buyerName = buyerMatch[1].trim();
}
// For this demo implementation, create a minimal invoice structure
return {
InvoiceNumber: invoiceNumber,
DateIssued: dateIssued,
Seller: {
Name: sellerName,
Address: {
Street: 'Unknown',
City: 'Unknown',
PostalCode: 'Unknown',
Country: 'Unknown',
},
Contact: {
Email: 'unknown@example.com',
Phone: 'Unknown',
},
},
Buyer: {
Name: buyerName,
Address: {
Street: 'Unknown',
City: 'Unknown',
PostalCode: 'Unknown',
Country: 'Unknown',
},
Contact: {
Email: 'unknown@example.com',
Phone: 'Unknown',
},
},
Items: [
{
Description: 'Unknown Item',
Quantity: 1,
UnitPrice: 0,
TotalPrice: 0,
},
],
TotalAmount: 0,
};
} catch (error) {
console.error('Error parsing CII format:', error);
return this.parseGenericXml(xmlContent); // Fallback
}
}
/**
* Parses UBL format XML
*/
private parseUBLFormat(xmlContent: string): interfaces.IXInvoice {
// Simplified UBL parsing - just extract basic fields
try {
const invoiceNumber = this.extractXmlValueByRegex(xmlContent, 'cbc:ID');
const dateIssued = this.extractXmlValueByRegex(xmlContent, 'cbc:IssueDate');
const sellerName = this.extractXmlValueByRegex(xmlContent, 'cac:AccountingSupplierParty.*?cbc:Name');
const buyerName = this.extractXmlValueByRegex(xmlContent, 'cac:AccountingCustomerParty.*?cbc:Name');
return {
InvoiceNumber: invoiceNumber || 'Unknown',
DateIssued: dateIssued || new Date().toISOString().split('T')[0],
Seller: {
Name: sellerName || 'Unknown Seller',
Address: {
Street: 'Unknown',
City: 'Unknown',
PostalCode: 'Unknown',
Country: 'Unknown',
},
Contact: {
Email: 'unknown@example.com',
Phone: 'Unknown',
},
},
Buyer: {
Name: buyerName || 'Unknown Buyer',
Address: {
Street: 'Unknown',
City: 'Unknown',
PostalCode: 'Unknown',
Country: 'Unknown',
},
Contact: {
Email: 'unknown@example.com',
Phone: 'Unknown',
},
},
Items: [
{
Description: 'Unknown Item',
Quantity: 1,
UnitPrice: 0,
TotalPrice: 0,
},
],
TotalAmount: 0,
};
} catch (error) {
console.error('Error parsing UBL format:', error);
return this.parseGenericXml(xmlContent);
}
}
/**
* Parses fatturaPA format XML
*/
private parseFatturaPAFormat(xmlContent: string): interfaces.IXInvoice {
// In a full implementation, this would have fatturaPA-specific parsing
// For now, using a simplified generic parser
return this.parseGenericXml(xmlContent);
}
/**
* Generic XML parser that attempts to extract invoice data
* from any XML structure
*/
private parseGenericXml(xmlContent: string): interfaces.IXInvoice {
// For now, returning a placeholder structure
// This would be replaced with more intelligent parsing
return {
InvoiceNumber: '(Unknown format - invoice number not extracted)',
DateIssued: new Date().toISOString().split('T')[0],
Seller: {
Name: 'Unknown Seller (format not recognized)',
Address: {
Street: 'Unknown',
City: 'Unknown',
PostalCode: 'Unknown',
Country: 'Unknown',
},
Contact: {
Email: 'unknown@example.com',
Phone: 'Unknown',
},
},
Buyer: {
Name: 'Unknown Buyer (format not recognized)',
Address: {
Street: 'Unknown',
City: 'Unknown',
PostalCode: 'Unknown',
Country: 'Unknown',
},
Contact: {
Email: 'unknown@example.com',
Phone: 'Unknown',
},
},
Items: [
{
Description: 'Unknown items (invoice format not recognized)',
Quantity: 1,
UnitPrice: 0,
TotalPrice: 0,
},
],
TotalAmount: 0,
};
}
}