xinvoice/test/test.focused-corpus.ts
2025-04-03 17:21:36 +00:00

280 lines
11 KiB
TypeScript

import { tap, expect } from '@push.rocks/tapbundle';
import { XInvoice } from '../ts/classes.xinvoice.js';
import { InvoiceFormat } from '../ts/interfaces/common.js';
import * as fs from 'fs/promises';
import * as path from 'path';
// Test a focused subset of corpus files
tap.test('XInvoice should handle a focused subset of corpus files', async () => {
// Get a small subset of files for focused testing
const ciiFiles = await findFiles(path.join(process.cwd(), 'test/assets/corpus/XML-Rechnung/CII'), '.xml', 5);
const ublFiles = await findFiles(path.join(process.cwd(), 'test/assets/corpus/XML-Rechnung/UBL'), '.xml', 5);
const zugferdV2Files = await findFiles(path.join(process.cwd(), 'test/assets/corpus/ZUGFeRDv2/correct/intarsys/EN16931'), '.pdf', 5);
// Log the number of files found
console.log(`Found ${ciiFiles.length} CII files for focused testing`);
console.log(`Found ${ublFiles.length} UBL files for focused testing`);
console.log(`Found ${zugferdV2Files.length} ZUGFeRD v2 files for focused testing`);
// Test CII files
console.log('\nTesting CII files:');
for (const file of ciiFiles) {
console.log(`\nTesting file: ${path.basename(file)}`);
await testXmlFile(file, InvoiceFormat.CII);
}
// Test UBL files
console.log('\nTesting UBL files:');
for (const file of ublFiles) {
console.log(`\nTesting file: ${path.basename(file)}`);
await testXmlFile(file, InvoiceFormat.UBL);
}
// Test ZUGFeRD v2 files
console.log('\nTesting ZUGFeRD v2 files:');
for (const file of zugferdV2Files) {
console.log(`\nTesting file: ${path.basename(file)}`);
await testPdfFile(file);
}
// Create a test directory for output
const testDir = path.join(process.cwd(), 'test', 'output', 'focused');
await fs.mkdir(testDir, { recursive: true });
// Success - we're just testing individual files
expect(true).toBeTrue();
});
/**
* Tests an XML file
* @param file File to test
* @param expectedFormat Expected format
*/
async function testXmlFile(file: string, expectedFormat: InvoiceFormat): Promise<void> {
try {
// Read the file
const xmlContent = await fs.readFile(file, 'utf8');
// Create XInvoice from XML
const xinvoice = await XInvoice.fromXml(xmlContent);
// Check that the XInvoice instance has the expected properties
if (xinvoice && xinvoice.from && xinvoice.to && xinvoice.items) {
// Check that the format is detected correctly
const format = xinvoice.getFormat();
const isCorrectFormat = format === expectedFormat ||
(expectedFormat === InvoiceFormat.CII && format === InvoiceFormat.FACTURX) ||
(expectedFormat === InvoiceFormat.FACTURX && format === InvoiceFormat.CII) ||
(expectedFormat === InvoiceFormat.UBL && format === InvoiceFormat.XRECHNUNG) ||
(expectedFormat === InvoiceFormat.XRECHNUNG && format === InvoiceFormat.UBL);
if (isCorrectFormat) {
// Try to export the invoice back to XML
try {
let exportFormat = 'facturx';
if (format === InvoiceFormat.UBL || format === InvoiceFormat.XRECHNUNG) {
exportFormat = 'xrechnung';
}
const exportedXml = await xinvoice.exportXml(exportFormat as any);
if (exportedXml) {
console.log('✅ Success: File loaded, format detected correctly, and exported successfully');
console.log(`Format: ${format}`);
console.log(`From: ${xinvoice.from.name}`);
console.log(`To: ${xinvoice.to.name}`);
console.log(`Items: ${xinvoice.items.length}`);
// Save the exported XML for inspection
const testDir = path.join(process.cwd(), 'test', 'output', 'focused');
await fs.mkdir(testDir, { recursive: true });
await fs.writeFile(path.join(testDir, `${path.basename(file)}-exported.xml`), exportedXml);
} else {
console.log('❌ Failed to export valid XML');
}
} catch (exportError) {
console.log(`❌ Export error: ${exportError.message}`);
}
} else {
console.log(`❌ Wrong format detected: ${format}, expected: ${expectedFormat}`);
}
} else {
console.log('❌ Missing required properties');
}
} catch (error) {
console.log(`❌ Error processing the file: ${error.message}`);
}
}
/**
* Tests a PDF file
* @param file File to test
*/
async function testPdfFile(file: string): Promise<void> {
try {
// Read the file
const pdfBuffer = await fs.readFile(file);
// Extract XML from PDF
const { PDFExtractor } = await import('../ts/formats/pdf/pdf.extractor.js');
const extractor = new PDFExtractor();
const xmlContent = await extractor.extractXml(pdfBuffer);
// Save the raw XML content for inspection, even if it's invalid
const testDir = path.join(process.cwd(), 'test', 'output', 'focused');
await fs.mkdir(testDir, { recursive: true });
// Try to get the raw XML content directly from the PDF
try {
const pdfDoc = await import('pdf-lib').then(lib => lib.PDFDocument.load(pdfBuffer));
const namesDictObj = pdfDoc.catalog.lookup(await import('pdf-lib').then(lib => lib.PDFName.of('Names')));
if (namesDictObj) {
const embeddedFilesDictObj = namesDictObj.lookup(await import('pdf-lib').then(lib => lib.PDFName.of('EmbeddedFiles')));
if (embeddedFilesDictObj) {
const filesSpecObj = embeddedFilesDictObj.lookup(await import('pdf-lib').then(lib => lib.PDFName.of('Names')));
if (filesSpecObj && filesSpecObj.size && filesSpecObj.size() > 0) {
for (let i = 0; i < filesSpecObj.size(); i += 2) {
const fileNameObj = filesSpecObj.lookup(i);
const fileSpecObj = filesSpecObj.lookup(i + 1);
if (fileNameObj && fileSpecObj) {
const fileName = fileNameObj.toString();
console.log(`Found embedded file: ${fileName}`);
const efDictObj = fileSpecObj.lookup(await import('pdf-lib').then(lib => lib.PDFName.of('EF')));
if (efDictObj) {
const maybeStream = efDictObj.lookup(await import('pdf-lib').then(lib => lib.PDFName.of('F')));
if (maybeStream) {
try {
const xmlBytes = maybeStream.getContents();
const rawXmlContent = new TextDecoder('utf-8').decode(xmlBytes);
await fs.writeFile(path.join(testDir, `${path.basename(file)}-raw-${fileName}.xml`), rawXmlContent);
console.log(`Saved raw XML content from ${fileName}`);
} catch (streamError) {
console.log(`Error extracting stream content: ${streamError.message}`);
}
}
}
}
}
}
}
}
} catch (pdfError) {
console.log(`Error inspecting PDF structure: ${pdfError.message}`);
}
if (xmlContent) {
console.log('✅ Successfully extracted XML from PDF');
// Save the extracted XML for inspection
await fs.writeFile(path.join(testDir, `${path.basename(file)}-extracted.xml`), xmlContent);
// Try to create XInvoice from the extracted XML
try {
const xinvoice = await XInvoice.fromXml(xmlContent);
if (xinvoice && xinvoice.from && xinvoice.to && xinvoice.items) {
console.log('✅ Successfully created XInvoice from extracted XML');
console.log(`Format: ${xinvoice.getFormat()}`);
console.log(`From: ${xinvoice.from.name}`);
console.log(`To: ${xinvoice.to.name}`);
console.log(`Items: ${xinvoice.items.length}`);
// Try to export the invoice back to XML
try {
const exportedXml = await xinvoice.exportXml('facturx');
if (exportedXml) {
console.log('✅ Successfully exported XInvoice back to XML');
// Save the exported XML for inspection
await fs.writeFile(path.join(testDir, `${path.basename(file)}-reexported.xml`), exportedXml);
} else {
console.log('❌ Failed to export valid XML');
}
} catch (exportError) {
console.log(`❌ Export error: ${exportError.message}`);
}
} else {
console.log('❌ Missing required properties in created XInvoice');
}
} catch (xmlError) {
console.log(`❌ Error creating XInvoice from extracted XML: ${xmlError.message}`);
}
} else {
console.log('❌ No XML found in PDF');
}
// Try to create XInvoice directly from PDF
try {
const xinvoice = await XInvoice.fromPdf(pdfBuffer);
if (xinvoice && xinvoice.from && xinvoice.to && xinvoice.items) {
console.log('✅ Successfully created XInvoice directly from PDF');
console.log(`Format: ${xinvoice.getFormat()}`);
console.log(`From: ${xinvoice.from.name}`);
console.log(`To: ${xinvoice.to.name}`);
console.log(`Items: ${xinvoice.items.length}`);
} else {
console.log('❌ Missing required properties in created XInvoice');
}
} catch (pdfError) {
console.log(`❌ Error creating XInvoice directly from PDF: ${pdfError.message}`);
}
} catch (error) {
console.log(`❌ Error processing the file: ${error.message}`);
}
}
/**
* Recursively finds files with a specific extension in a directory
* @param dir Directory to search
* @param extension File extension to look for
* @param limit Maximum number of files to return
* @returns Array of file paths
*/
async function findFiles(dir: string, extension: string, limit?: number): Promise<string[]> {
try {
const files = await fs.readdir(dir, { withFileTypes: true });
const result: string[] = [];
for (const file of files) {
if (limit && result.length >= limit) {
break;
}
const filePath = path.join(dir, file.name);
if (file.isDirectory()) {
// Recursively search subdirectories
const remainingLimit = limit ? limit - result.length : undefined;
const subDirFiles = await findFiles(filePath, extension, remainingLimit);
result.push(...subDirFiles);
if (limit && result.length >= limit) {
break;
}
} else if (file.name.toLowerCase().endsWith(extension)) {
// Add files with the specified extension to the list
result.push(filePath);
}
}
return result;
} catch (error) {
console.error(`Error finding files in ${dir}:`, error);
return [];
}
}
// Run the tests
tap.start();