einvoice/test/test.format-detection.ts

260 lines
10 KiB
TypeScript

import { tap, expect } from '@push.rocks/tapbundle';
import { EInvoice } from '../ts/einvoice.js';
import { InvoiceFormat } from '../ts/interfaces/common.js';
import { FormatDetector } from '../ts/formats/utils/format.detector.js';
import { TestFileHelpers, TestFileCategories, InvoiceAssertions, PerformanceUtils } from './test-utils.js';
import * as path from 'path';
/**
* Comprehensive format detection tests using the corpus assets
*/
// Test format detection for CII XML-Rechnung files
tap.test('Format Detection - CII XML-Rechnung files', async () => {
const files = await TestFileHelpers.getTestFiles(TestFileCategories.CII_XMLRECHNUNG, '*.xml');
console.log(`Testing ${files.length} CII XML-Rechnung files`);
for (const file of files) {
const xmlBuffer = await TestFileHelpers.loadTestFile(file);
const xmlString = xmlBuffer.toString('utf-8');
const { result: format, duration } = await PerformanceUtils.measure(
'cii-detection',
async () => FormatDetector.detectFormat(xmlString)
);
// CII files should be detected as either CII or XRechnung
const validFormats = [InvoiceFormat.CII, InvoiceFormat.XRECHNUNG];
expect(validFormats).toContain(format);
console.log(`${path.basename(file)}: ${format} (${duration.toFixed(2)}ms)`);
}
});
// Test format detection for UBL XML-Rechnung files
tap.test('Format Detection - UBL XML-Rechnung files', async () => {
const files = await TestFileHelpers.getTestFiles(TestFileCategories.UBL_XMLRECHNUNG, '*.xml');
console.log(`Testing ${files.length} UBL XML-Rechnung files`);
for (const file of files) {
const xmlBuffer = await TestFileHelpers.loadTestFile(file);
const xmlString = xmlBuffer.toString('utf-8');
const { result: format, duration } = await PerformanceUtils.measure(
'ubl-detection',
async () => FormatDetector.detectFormat(xmlString)
);
// UBL files should be detected as either UBL or XRechnung
const validFormats = [InvoiceFormat.UBL, InvoiceFormat.XRECHNUNG];
expect(validFormats).toContain(format);
console.log(`${path.basename(file)}: ${format} (${duration.toFixed(2)}ms)`);
}
});
// Test format detection for PEPPOL files
tap.test('Format Detection - PEPPOL large invoice samples', async () => {
const files = await TestFileHelpers.getTestFiles(TestFileCategories.PEPPOL, '*.xml');
console.log(`Testing ${files.length} PEPPOL files`);
for (const file of files) {
const xmlBuffer = await TestFileHelpers.loadTestFile(file);
const xmlString = xmlBuffer.toString('utf-8');
const { result: format, duration } = await PerformanceUtils.measure(
'peppol-detection',
async () => FormatDetector.detectFormat(xmlString)
);
// PEPPOL files are typically UBL format
expect(format).toEqual(InvoiceFormat.UBL);
console.log(`${path.basename(file)}: ${format} (${duration.toFixed(2)}ms)`);
}
});
// Test format detection for FatturaPA files
tap.test('Format Detection - FatturaPA Italian invoice format', async () => {
const files = await TestFileHelpers.getTestFiles(TestFileCategories.FATTURAPA, '*.xml');
console.log(`Testing ${files.length} FatturaPA files`);
let detectedCount = 0;
for (const file of files) {
try {
const xmlBuffer = await TestFileHelpers.loadTestFile(file);
const xmlString = xmlBuffer.toString('utf-8');
const { result: format, duration } = await PerformanceUtils.measure(
'fatturapa-detection',
async () => FormatDetector.detectFormat(xmlString)
);
// FatturaPA detection might not be fully implemented yet
if (format === InvoiceFormat.FATTURAPA) {
detectedCount++;
}
console.log(`${format === InvoiceFormat.FATTURAPA ? '✓' : '○'} ${path.basename(file)}: ${format} (${duration.toFixed(2)}ms)`);
} catch (error) {
console.log(`${path.basename(file)}: Error - ${error.message}`);
}
}
// Log if FatturaPA detection needs implementation
if (detectedCount === 0 && files.length > 0) {
console.log('Note: FatturaPA format detection may need implementation');
}
});
// Test format detection for EN16931 examples
tap.test('Format Detection - EN16931 example files', async () => {
// Test CII examples
const ciiFiles = await TestFileHelpers.getTestFiles(TestFileCategories.EN16931_EXAMPLES_CII, '*.xml');
console.log(`Testing ${ciiFiles.length} EN16931 CII examples`);
for (const file of ciiFiles) {
const xmlBuffer = await TestFileHelpers.loadTestFile(file);
const xmlString = xmlBuffer.toString('utf-8');
const format = FormatDetector.detectFormat(xmlString);
expect([InvoiceFormat.CII, InvoiceFormat.FACTURX, InvoiceFormat.XRECHNUNG]).toContain(format);
console.log(`${path.basename(file)}: ${format}`);
}
// Test UBL examples
const ublFiles = await TestFileHelpers.getTestFiles(TestFileCategories.EN16931_EXAMPLES_UBL, '*.xml');
console.log(`Testing ${ublFiles.length} EN16931 UBL examples`);
for (const file of ublFiles) {
const xmlBuffer = await TestFileHelpers.loadTestFile(file);
const xmlString = xmlBuffer.toString('utf-8');
const format = FormatDetector.detectFormat(xmlString);
expect([InvoiceFormat.UBL, InvoiceFormat.XRECHNUNG]).toContain(format);
console.log(`${path.basename(file)}: ${format}`);
}
});
// Test format detection with malformed/edge case files
tap.test('Format Detection - Edge cases and error handling', async () => {
// Test empty XML
const emptyFormat = FormatDetector.detectFormat('');
expect(emptyFormat).toEqual(InvoiceFormat.UNKNOWN);
console.log('✓ Empty string returns UNKNOWN');
// Test non-XML content
const textFormat = FormatDetector.detectFormat('This is not XML');
expect(textFormat).toEqual(InvoiceFormat.UNKNOWN);
console.log('✓ Non-XML text returns UNKNOWN');
// Test minimal XML
const minimalFormat = FormatDetector.detectFormat('<?xml version="1.0"?><root></root>');
expect(minimalFormat).toEqual(InvoiceFormat.UNKNOWN);
console.log('✓ Minimal XML returns UNKNOWN');
// Test with BOM
const bomXml = '\ufeff<?xml version="1.0"?><Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"></Invoice>';
const bomFormat = FormatDetector.detectFormat(bomXml);
expect(bomFormat).toEqual(InvoiceFormat.UBL);
console.log('✓ XML with BOM is handled correctly');
});
// Test format detection performance
tap.test('Format Detection - Performance benchmarks', async () => {
console.log('\nPerformance Benchmarks:');
// Test with small file
const smallXml = '<?xml version="1.0"?><Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"><ID>123</ID></Invoice>';
const smallTimes: number[] = [];
for (let i = 0; i < 100; i++) {
const start = performance.now();
FormatDetector.detectFormat(smallXml);
smallTimes.push(performance.now() - start);
}
const avgSmall = smallTimes.reduce((a, b) => a + b) / smallTimes.length;
console.log(`Small XML (${smallXml.length} bytes): avg ${avgSmall.toFixed(3)}ms`);
expect(avgSmall).toBeLessThan(1); // Should be very fast
// Test with large file (if available)
try {
const largeFiles = await TestFileHelpers.getTestFiles(TestFileCategories.PEPPOL, 'Large*.xml');
if (largeFiles.length > 0) {
const largeBuffer = await TestFileHelpers.loadTestFile(largeFiles[0]);
const largeXml = largeBuffer.toString('utf-8');
const largeTimes: number[] = [];
for (let i = 0; i < 10; i++) {
const start = performance.now();
FormatDetector.detectFormat(largeXml);
largeTimes.push(performance.now() - start);
}
const avgLarge = largeTimes.reduce((a, b) => a + b) / largeTimes.length;
console.log(`Large XML (${largeXml.length} bytes): avg ${avgLarge.toFixed(3)}ms`);
expect(avgLarge).toBeLessThan(10); // Should still be reasonably fast
}
} catch (error) {
console.log('Large file test skipped - no large files available');
}
});
// Test format detection from PDF embedded XML
tap.test('Format Detection - ZUGFeRD PDFs with embedded XML', async () => {
const pdfFiles = await TestFileHelpers.getTestFiles(TestFileCategories.ZUGFERD_V2_CORRECT, '*.pdf');
console.log(`Testing ${pdfFiles.length} ZUGFeRD v2 PDF files`);
let successCount = 0;
for (const file of pdfFiles.slice(0, 5)) { // Test first 5 files for speed
try {
const pdfBuffer = await TestFileHelpers.loadTestFile(file);
const einvoice = await EInvoice.fromPdf(pdfBuffer);
const format = einvoice.getFormat();
expect([InvoiceFormat.ZUGFERD, InvoiceFormat.FACTURX]).toContain(format);
successCount++;
console.log(`${path.basename(file)}: ${format}`);
} catch (error) {
console.log(`${path.basename(file)}: PDF extraction not available`);
}
}
if (successCount > 0) {
console.log(`Successfully detected format from ${successCount} PDF files`);
}
});
// Generate performance report
tap.test('Format Detection - Performance Summary', async () => {
const report = PerformanceUtils.generateReport();
console.log('\n' + report);
// Check that detection is generally fast
const ciiStats = PerformanceUtils.getStats('cii-detection');
if (ciiStats) {
expect(ciiStats.avg).toBeLessThan(5); // Average should be under 5ms
console.log(`CII detection average: ${ciiStats.avg.toFixed(2)}ms`);
}
const ublStats = PerformanceUtils.getStats('ubl-detection');
if (ublStats) {
expect(ublStats.avg).toBeLessThan(5); // Average should be under 5ms
console.log(`UBL detection average: ${ublStats.avg.toFixed(2)}ms`);
}
});
// Test the confidence scoring (if implemented)
tap.test('Format Detection - Confidence scoring', async () => {
// This test is for future implementation when confidence scoring is added
console.log('Confidence scoring tests - placeholder for future implementation');
// Example of what we might test:
// const result = FormatDetector.detectFormatWithConfidence(xml);
// expect(result.format).toEqual(InvoiceFormat.UBL);
// expect(result.confidence).toBeGreaterThan(0.8);
});
tap.start();