260 lines
10 KiB
TypeScript
260 lines
10 KiB
TypeScript
import { tap, expect } from '@push.rocks/tapbundle';
|
|
import { EInvoice } from '../ts/einvoice.js';
|
|
import { InvoiceFormat } from '../ts/interfaces/common.js';
|
|
import { FormatDetector } from '../ts/formats/utils/format.detector.js';
|
|
import { TestFileHelpers, TestFileCategories, InvoiceAssertions, PerformanceUtils } from './test-utils.js';
|
|
import * as path from 'path';
|
|
|
|
/**
|
|
* Comprehensive format detection tests using the corpus assets
|
|
*/
|
|
|
|
// Test format detection for CII XML-Rechnung files
|
|
tap.test('Format Detection - CII XML-Rechnung files', async () => {
|
|
const files = await TestFileHelpers.getTestFiles(TestFileCategories.CII_XMLRECHNUNG, '*.xml');
|
|
console.log(`Testing ${files.length} CII XML-Rechnung files`);
|
|
|
|
for (const file of files) {
|
|
const xmlBuffer = await TestFileHelpers.loadTestFile(file);
|
|
const xmlString = xmlBuffer.toString('utf-8');
|
|
|
|
const { result: format, duration } = await PerformanceUtils.measure(
|
|
'cii-detection',
|
|
async () => FormatDetector.detectFormat(xmlString)
|
|
);
|
|
|
|
// CII files should be detected as either CII or XRechnung
|
|
const validFormats = [InvoiceFormat.CII, InvoiceFormat.XRECHNUNG];
|
|
expect(validFormats).toContain(format);
|
|
|
|
console.log(`✓ ${path.basename(file)}: ${format} (${duration.toFixed(2)}ms)`);
|
|
}
|
|
});
|
|
|
|
// Test format detection for UBL XML-Rechnung files
|
|
tap.test('Format Detection - UBL XML-Rechnung files', async () => {
|
|
const files = await TestFileHelpers.getTestFiles(TestFileCategories.UBL_XMLRECHNUNG, '*.xml');
|
|
console.log(`Testing ${files.length} UBL XML-Rechnung files`);
|
|
|
|
for (const file of files) {
|
|
const xmlBuffer = await TestFileHelpers.loadTestFile(file);
|
|
const xmlString = xmlBuffer.toString('utf-8');
|
|
|
|
const { result: format, duration } = await PerformanceUtils.measure(
|
|
'ubl-detection',
|
|
async () => FormatDetector.detectFormat(xmlString)
|
|
);
|
|
|
|
// UBL files should be detected as either UBL or XRechnung
|
|
const validFormats = [InvoiceFormat.UBL, InvoiceFormat.XRECHNUNG];
|
|
expect(validFormats).toContain(format);
|
|
|
|
console.log(`✓ ${path.basename(file)}: ${format} (${duration.toFixed(2)}ms)`);
|
|
}
|
|
});
|
|
|
|
// Test format detection for PEPPOL files
|
|
tap.test('Format Detection - PEPPOL large invoice samples', async () => {
|
|
const files = await TestFileHelpers.getTestFiles(TestFileCategories.PEPPOL, '*.xml');
|
|
console.log(`Testing ${files.length} PEPPOL files`);
|
|
|
|
for (const file of files) {
|
|
const xmlBuffer = await TestFileHelpers.loadTestFile(file);
|
|
const xmlString = xmlBuffer.toString('utf-8');
|
|
|
|
const { result: format, duration } = await PerformanceUtils.measure(
|
|
'peppol-detection',
|
|
async () => FormatDetector.detectFormat(xmlString)
|
|
);
|
|
|
|
// PEPPOL files are typically UBL format
|
|
expect(format).toEqual(InvoiceFormat.UBL);
|
|
|
|
console.log(`✓ ${path.basename(file)}: ${format} (${duration.toFixed(2)}ms)`);
|
|
}
|
|
});
|
|
|
|
// Test format detection for FatturaPA files
|
|
tap.test('Format Detection - FatturaPA Italian invoice format', async () => {
|
|
const files = await TestFileHelpers.getTestFiles(TestFileCategories.FATTURAPA, '*.xml');
|
|
console.log(`Testing ${files.length} FatturaPA files`);
|
|
|
|
let detectedCount = 0;
|
|
for (const file of files) {
|
|
try {
|
|
const xmlBuffer = await TestFileHelpers.loadTestFile(file);
|
|
const xmlString = xmlBuffer.toString('utf-8');
|
|
|
|
const { result: format, duration } = await PerformanceUtils.measure(
|
|
'fatturapa-detection',
|
|
async () => FormatDetector.detectFormat(xmlString)
|
|
);
|
|
|
|
// FatturaPA detection might not be fully implemented yet
|
|
if (format === InvoiceFormat.FATTURAPA) {
|
|
detectedCount++;
|
|
}
|
|
|
|
console.log(`${format === InvoiceFormat.FATTURAPA ? '✓' : '○'} ${path.basename(file)}: ${format} (${duration.toFixed(2)}ms)`);
|
|
} catch (error) {
|
|
console.log(`✗ ${path.basename(file)}: Error - ${error.message}`);
|
|
}
|
|
}
|
|
|
|
// Log if FatturaPA detection needs implementation
|
|
if (detectedCount === 0 && files.length > 0) {
|
|
console.log('Note: FatturaPA format detection may need implementation');
|
|
}
|
|
});
|
|
|
|
// Test format detection for EN16931 examples
|
|
tap.test('Format Detection - EN16931 example files', async () => {
|
|
// Test CII examples
|
|
const ciiFiles = await TestFileHelpers.getTestFiles(TestFileCategories.EN16931_EXAMPLES_CII, '*.xml');
|
|
console.log(`Testing ${ciiFiles.length} EN16931 CII examples`);
|
|
|
|
for (const file of ciiFiles) {
|
|
const xmlBuffer = await TestFileHelpers.loadTestFile(file);
|
|
const xmlString = xmlBuffer.toString('utf-8');
|
|
|
|
const format = FormatDetector.detectFormat(xmlString);
|
|
expect([InvoiceFormat.CII, InvoiceFormat.FACTURX, InvoiceFormat.XRECHNUNG]).toContain(format);
|
|
console.log(`✓ ${path.basename(file)}: ${format}`);
|
|
}
|
|
|
|
// Test UBL examples
|
|
const ublFiles = await TestFileHelpers.getTestFiles(TestFileCategories.EN16931_EXAMPLES_UBL, '*.xml');
|
|
console.log(`Testing ${ublFiles.length} EN16931 UBL examples`);
|
|
|
|
for (const file of ublFiles) {
|
|
const xmlBuffer = await TestFileHelpers.loadTestFile(file);
|
|
const xmlString = xmlBuffer.toString('utf-8');
|
|
|
|
const format = FormatDetector.detectFormat(xmlString);
|
|
expect([InvoiceFormat.UBL, InvoiceFormat.XRECHNUNG]).toContain(format);
|
|
console.log(`✓ ${path.basename(file)}: ${format}`);
|
|
}
|
|
});
|
|
|
|
// Test format detection with malformed/edge case files
|
|
tap.test('Format Detection - Edge cases and error handling', async () => {
|
|
// Test empty XML
|
|
const emptyFormat = FormatDetector.detectFormat('');
|
|
expect(emptyFormat).toEqual(InvoiceFormat.UNKNOWN);
|
|
console.log('✓ Empty string returns UNKNOWN');
|
|
|
|
// Test non-XML content
|
|
const textFormat = FormatDetector.detectFormat('This is not XML');
|
|
expect(textFormat).toEqual(InvoiceFormat.UNKNOWN);
|
|
console.log('✓ Non-XML text returns UNKNOWN');
|
|
|
|
// Test minimal XML
|
|
const minimalFormat = FormatDetector.detectFormat('<?xml version="1.0"?><root></root>');
|
|
expect(minimalFormat).toEqual(InvoiceFormat.UNKNOWN);
|
|
console.log('✓ Minimal XML returns UNKNOWN');
|
|
|
|
// Test with BOM
|
|
const bomXml = '\ufeff<?xml version="1.0"?><Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"></Invoice>';
|
|
const bomFormat = FormatDetector.detectFormat(bomXml);
|
|
expect(bomFormat).toEqual(InvoiceFormat.UBL);
|
|
console.log('✓ XML with BOM is handled correctly');
|
|
});
|
|
|
|
// Test format detection performance
|
|
tap.test('Format Detection - Performance benchmarks', async () => {
|
|
console.log('\nPerformance Benchmarks:');
|
|
|
|
// Test with small file
|
|
const smallXml = '<?xml version="1.0"?><Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"><ID>123</ID></Invoice>';
|
|
const smallTimes: number[] = [];
|
|
|
|
for (let i = 0; i < 100; i++) {
|
|
const start = performance.now();
|
|
FormatDetector.detectFormat(smallXml);
|
|
smallTimes.push(performance.now() - start);
|
|
}
|
|
|
|
const avgSmall = smallTimes.reduce((a, b) => a + b) / smallTimes.length;
|
|
console.log(`Small XML (${smallXml.length} bytes): avg ${avgSmall.toFixed(3)}ms`);
|
|
expect(avgSmall).toBeLessThan(1); // Should be very fast
|
|
|
|
// Test with large file (if available)
|
|
try {
|
|
const largeFiles = await TestFileHelpers.getTestFiles(TestFileCategories.PEPPOL, 'Large*.xml');
|
|
if (largeFiles.length > 0) {
|
|
const largeBuffer = await TestFileHelpers.loadTestFile(largeFiles[0]);
|
|
const largeXml = largeBuffer.toString('utf-8');
|
|
|
|
const largeTimes: number[] = [];
|
|
for (let i = 0; i < 10; i++) {
|
|
const start = performance.now();
|
|
FormatDetector.detectFormat(largeXml);
|
|
largeTimes.push(performance.now() - start);
|
|
}
|
|
|
|
const avgLarge = largeTimes.reduce((a, b) => a + b) / largeTimes.length;
|
|
console.log(`Large XML (${largeXml.length} bytes): avg ${avgLarge.toFixed(3)}ms`);
|
|
expect(avgLarge).toBeLessThan(10); // Should still be reasonably fast
|
|
}
|
|
} catch (error) {
|
|
console.log('Large file test skipped - no large files available');
|
|
}
|
|
});
|
|
|
|
// Test format detection from PDF embedded XML
|
|
tap.test('Format Detection - ZUGFeRD PDFs with embedded XML', async () => {
|
|
const pdfFiles = await TestFileHelpers.getTestFiles(TestFileCategories.ZUGFERD_V2_CORRECT, '*.pdf');
|
|
console.log(`Testing ${pdfFiles.length} ZUGFeRD v2 PDF files`);
|
|
|
|
let successCount = 0;
|
|
for (const file of pdfFiles.slice(0, 5)) { // Test first 5 files for speed
|
|
try {
|
|
const pdfBuffer = await TestFileHelpers.loadTestFile(file);
|
|
const einvoice = await EInvoice.fromPdf(pdfBuffer);
|
|
|
|
const format = einvoice.getFormat();
|
|
expect([InvoiceFormat.ZUGFERD, InvoiceFormat.FACTURX]).toContain(format);
|
|
|
|
successCount++;
|
|
console.log(`✓ ${path.basename(file)}: ${format}`);
|
|
} catch (error) {
|
|
console.log(`○ ${path.basename(file)}: PDF extraction not available`);
|
|
}
|
|
}
|
|
|
|
if (successCount > 0) {
|
|
console.log(`Successfully detected format from ${successCount} PDF files`);
|
|
}
|
|
});
|
|
|
|
// Generate performance report
|
|
tap.test('Format Detection - Performance Summary', async () => {
|
|
const report = PerformanceUtils.generateReport();
|
|
console.log('\n' + report);
|
|
|
|
// Check that detection is generally fast
|
|
const ciiStats = PerformanceUtils.getStats('cii-detection');
|
|
if (ciiStats) {
|
|
expect(ciiStats.avg).toBeLessThan(5); // Average should be under 5ms
|
|
console.log(`CII detection average: ${ciiStats.avg.toFixed(2)}ms`);
|
|
}
|
|
|
|
const ublStats = PerformanceUtils.getStats('ubl-detection');
|
|
if (ublStats) {
|
|
expect(ublStats.avg).toBeLessThan(5); // Average should be under 5ms
|
|
console.log(`UBL detection average: ${ublStats.avg.toFixed(2)}ms`);
|
|
}
|
|
});
|
|
|
|
// Test the confidence scoring (if implemented)
|
|
tap.test('Format Detection - Confidence scoring', async () => {
|
|
// This test is for future implementation when confidence scoring is added
|
|
console.log('Confidence scoring tests - placeholder for future implementation');
|
|
|
|
// Example of what we might test:
|
|
// const result = FormatDetector.detectFormatWithConfidence(xml);
|
|
// expect(result.format).toEqual(InvoiceFormat.UBL);
|
|
// expect(result.confidence).toBeGreaterThan(0.8);
|
|
});
|
|
|
|
tap.start(); |