/** * @file test.perf-10.cache-efficiency.ts * @description Performance tests for cache efficiency and optimization */ import { tap } from '@git.zone/tstest/tapbundle'; import * as plugins from '../../plugins.js'; import { EInvoice } from '../../../ts/index.js'; import { CorpusLoader } from '../../suite/corpus.loader.js'; import { PerformanceTracker } from '../../suite/performance.tracker.js'; import { FormatDetector } from '../../../ts/formats/utils/format.detector.js'; const performanceTracker = new PerformanceTracker('PERF-10: Cache Efficiency'); tap.test('PERF-10: Cache Efficiency - should demonstrate effective caching strategies', async (t) => { // Test 1: Format detection cache const formatDetectionCache = await performanceTracker.measureAsync( 'format-detection-cache', async () => { const results = { withoutCache: { iterations: 0, totalTime: 0, avgTime: 0 }, withCache: { iterations: 0, totalTime: 0, avgTime: 0, cacheHits: 0, cacheMisses: 0 }, improvement: null }; // Test data const testDocuments = [ { id: 'ubl-1', content: 'UBL-001' }, { id: 'cii-1', content: 'CII-001' }, { id: 'unknown-1', content: 'UNKNOWN-001' } ]; // Test without cache (baseline) const iterations = 100; const startWithoutCache = Date.now(); for (let i = 0; i < iterations; i++) { for (const doc of testDocuments) { FormatDetector.detectFormat(doc.content); results.withoutCache.iterations++; } } results.withoutCache.totalTime = Date.now() - startWithoutCache; results.withoutCache.avgTime = results.withoutCache.totalTime / results.withoutCache.iterations; // Implement simple cache const formatCache = new Map(); const cacheMaxAge = 60000; // 1 minute const detectFormatWithCache = async (content: string) => { // Create cache key from content hash const hash = Buffer.from(content).toString('base64').slice(0, 20); // Check cache const cached = formatCache.get(hash); if (cached && Date.now() - cached.timestamp < cacheMaxAge) { results.withCache.cacheHits++; return cached.format; } // Cache miss results.withCache.cacheMisses++; const format = FormatDetector.detectFormat(content); // Store in cache formatCache.set(hash, { format: format || 'unknown', timestamp: Date.now() }); return format; }; // Test with cache const startWithCache = Date.now(); for (let i = 0; i < iterations; i++) { for (const doc of testDocuments) { await detectFormatWithCache(doc.content); results.withCache.iterations++; } } results.withCache.totalTime = Date.now() - startWithCache; results.withCache.avgTime = results.withCache.totalTime / results.withCache.iterations; // Calculate improvement results.improvement = { speedup: (results.withoutCache.avgTime / results.withCache.avgTime).toFixed(2), timeReduction: ((results.withoutCache.totalTime - results.withCache.totalTime) / results.withoutCache.totalTime * 100).toFixed(2), hitRate: ((results.withCache.cacheHits / results.withCache.iterations) * 100).toFixed(2), efficiency: results.withCache.cacheHits > 0 ? ((results.withCache.cacheHits / (results.withCache.cacheHits + results.withCache.cacheMisses)) * 100).toFixed(2) : '0' }; return results; } ); // Test 2: Validation cache const validationCache = await performanceTracker.measureAsync( 'validation-cache', async () => { const results = { cacheStrategies: [], optimalStrategy: null }; // Test invoice const testInvoice = { format: 'ubl' as const, data: { documentType: 'INVOICE', invoiceNumber: 'CACHE-VAL-001', issueDate: '2024-03-05', seller: { name: 'Cache Test Seller', address: 'Address', country: 'US', taxId: 'US123' }, buyer: { name: 'Cache Test Buyer', address: 'Address', country: 'US', taxId: 'US456' }, items: Array.from({ length: 20 }, (_, i) => ({ description: `Item ${i + 1}`, quantity: 1, unitPrice: 100, vatRate: 10, lineTotal: 100 })), totals: { netAmount: 2000, vatAmount: 200, grossAmount: 2200 } } }; // Cache strategies to test const strategies = [ { name: 'No cache', cacheSize: 0, ttl: 0 }, { name: 'Small cache', cacheSize: 10, ttl: 30000 }, { name: 'Medium cache', cacheSize: 100, ttl: 60000 }, { name: 'Large cache', cacheSize: 1000, ttl: 300000 }, { name: 'LRU cache', cacheSize: 50, ttl: 120000, lru: true } ]; for (const strategy of strategies) { const cache = new Map(); let cacheHits = 0; let cacheMisses = 0; const validateWithCache = async (invoice: any) => { const key = JSON.stringify(invoice).slice(0, 50); // Simple key generation // Check cache const cached = cache.get(key); if (cached && Date.now() - cached.timestamp < strategy.ttl) { cacheHits++; cached.accessCount++; return cached.result; } // Cache miss cacheMisses++; // Mock validation result for performance testing const result = { valid: true, errors: [] }; // Cache management if (strategy.cacheSize > 0) { if (cache.size >= strategy.cacheSize) { if (strategy.lru) { // Remove least recently used let lruKey = ''; let minAccess = Infinity; for (const [k, v] of cache.entries()) { if (v.accessCount < minAccess) { minAccess = v.accessCount; lruKey = k; } } cache.delete(lruKey); } else { // Remove oldest const oldestKey = cache.keys().next().value; cache.delete(oldestKey); } } cache.set(key, { result, timestamp: Date.now(), accessCount: 1 }); } return result; }; // Test with mixed workload const workload = []; // Repeated validations of same invoice for (let i = 0; i < 50; i++) { workload.push(testInvoice); } // Variations of the invoice for (let i = 0; i < 30; i++) { const variation = JSON.parse(JSON.stringify(testInvoice)); variation.data.invoiceNumber = `CACHE-VAL-${i + 2}`; workload.push(variation); } // Repeat some variations for (let i = 0; i < 20; i++) { const variation = JSON.parse(JSON.stringify(testInvoice)); variation.data.invoiceNumber = `CACHE-VAL-${(i % 10) + 2}`; workload.push(variation); } // Process workload const startTime = Date.now(); for (const invoice of workload) { await validateWithCache(invoice); } const totalTime = Date.now() - startTime; results.cacheStrategies.push({ name: strategy.name, cacheSize: strategy.cacheSize, ttl: strategy.ttl, lru: strategy.lru || false, totalRequests: workload.length, cacheHits, cacheMisses, hitRate: ((cacheHits / workload.length) * 100).toFixed(2), totalTime, avgTime: (totalTime / workload.length).toFixed(2), finalCacheSize: cache.size, memoryUsage: (cache.size * 1024).toFixed(0) // Rough estimate in bytes }); } // Find optimal strategy const validStrategies = results.cacheStrategies.filter(s => s.cacheSize > 0); if (validStrategies.length > 0) { results.optimalStrategy = validStrategies.reduce((best, current) => { const bestScore = parseFloat(best.hitRate) / (parseFloat(best.avgTime) + 1); const currentScore = parseFloat(current.hitRate) / (parseFloat(current.avgTime) + 1); return currentScore > bestScore ? current : best; }); } return results; } ); // Test 3: Schema cache efficiency const schemaCache = await performanceTracker.measureAsync( 'schema-cache-efficiency', async () => { const results = { schemaCaching: { enabled: false, tests: [] }, improvement: null }; // Simulate schema validation with and without caching const schemas = { ubl: { size: 1024 * 50, parseTime: 50 }, // 50KB, 50ms parse time cii: { size: 1024 * 60, parseTime: 60 }, // 60KB, 60ms parse time zugferd: { size: 1024 * 80, parseTime: 80 }, // 80KB, 80ms parse time xrechnung: { size: 1024 * 70, parseTime: 70 } // 70KB, 70ms parse time }; const schemaCache = new Map(); const loadSchemaWithoutCache = async (format: string) => { const schema = schemas[format]; if (schema) { await new Promise(resolve => setTimeout(resolve, schema.parseTime)); return { format, size: schema.size }; } throw new Error(`Unknown schema format: ${format}`); }; const loadSchemaWithCache = async (format: string) => { const cached = schemaCache.get(format); if (cached) { results.schemaCaching.enabled = true; return cached.schema; } const schema = await loadSchemaWithoutCache(format); schemaCache.set(format, { schema, loadTime: Date.now() }); return schema; }; // Test workload const workload = []; const formats = Object.keys(schemas); // Initial load of each schema for (const format of formats) { workload.push(format); } // Repeated use of schemas for (let i = 0; i < 100; i++) { workload.push(formats[i % formats.length]); } // Test without cache const startWithoutCache = Date.now(); for (const format of workload) { await loadSchemaWithoutCache(format); } const timeWithoutCache = Date.now() - startWithoutCache; // Test with cache const startWithCache = Date.now(); for (const format of workload) { await loadSchemaWithCache(format); } const timeWithCache = Date.now() - startWithCache; // Calculate memory usage let totalCachedSize = 0; for (const format of schemaCache.keys()) { totalCachedSize += schemas[format].size; } results.improvement = { timeWithoutCache, timeWithCache, speedup: (timeWithoutCache / timeWithCache).toFixed(2), timeReduction: ((timeWithoutCache - timeWithCache) / timeWithoutCache * 100).toFixed(2), memoryCost: (totalCachedSize / 1024).toFixed(2), // KB schemasLoaded: workload.length, uniqueSchemas: schemaCache.size }; return results; } ); // Test 4: Corpus cache analysis const corpusCacheAnalysis = await performanceTracker.measureAsync( 'corpus-cache-analysis', async () => { const files = await CorpusLoader.loadPattern('**/*.xml'); const results = { cacheableOperations: { formatDetection: { count: 0, duplicates: 0 }, parsing: { count: 0, duplicates: 0 }, validation: { count: 0, duplicates: 0 } }, potentialSavings: null }; // Track unique content hashes const contentHashes = new Map(); const formatResults = new Map(); // Sample corpus files const sampleFiles = files.slice(0, 100); for (const file of sampleFiles) { try { const content = await plugins.fs.readFile(file.path, 'utf-8'); const hash = Buffer.from(content).toString('base64').slice(0, 32); // Track content duplicates const count = contentHashes.get(hash) || 0; contentHashes.set(hash, count + 1); if (count > 0) { results.cacheableOperations.formatDetection.duplicates++; results.cacheableOperations.parsing.duplicates++; results.cacheableOperations.validation.duplicates++; } // Perform operations const format = FormatDetector.detectFormat(content); results.cacheableOperations.formatDetection.count++; if (format && format !== 'unknown') { formatResults.set(hash, format); const invoice = await EInvoice.fromXml(content); results.cacheableOperations.parsing.count++; await invoice.validate(); results.cacheableOperations.validation.count++; } } catch (error) { // Skip failed files } } // Calculate potential savings const avgFormatDetectionTime = 5; // ms const avgParsingTime = 20; // ms const avgValidationTime = 50; // ms results.potentialSavings = { formatDetection: { duplicateRatio: (results.cacheableOperations.formatDetection.duplicates / results.cacheableOperations.formatDetection.count * 100).toFixed(2), timeSavings: results.cacheableOperations.formatDetection.duplicates * avgFormatDetectionTime }, parsing: { duplicateRatio: (results.cacheableOperations.parsing.duplicates / results.cacheableOperations.parsing.count * 100).toFixed(2), timeSavings: results.cacheableOperations.parsing.duplicates * avgParsingTime }, validation: { duplicateRatio: (results.cacheableOperations.validation.duplicates / results.cacheableOperations.validation.count * 100).toFixed(2), timeSavings: results.cacheableOperations.validation.duplicates * avgValidationTime }, totalTimeSavings: results.cacheableOperations.formatDetection.duplicates * avgFormatDetectionTime + results.cacheableOperations.parsing.duplicates * avgParsingTime + results.cacheableOperations.validation.duplicates * avgValidationTime, memoryCost: contentHashes.size * 100 // Rough estimate: 100 bytes per cached item }; return results; } ); // Test 5: Cache invalidation strategies const cacheInvalidation = await performanceTracker.measureAsync( 'cache-invalidation-strategies', async () => { const results = { strategies: [], bestStrategy: null }; // Test different invalidation strategies const strategies = [ { name: 'TTL only', ttl: 60000, maxSize: Infinity, policy: 'ttl' }, { name: 'Size limited', ttl: Infinity, maxSize: 50, policy: 'fifo' }, { name: 'LRU with TTL', ttl: 120000, maxSize: 100, policy: 'lru' }, { name: 'Adaptive', ttl: 60000, maxSize: 100, policy: 'adaptive' } ]; for (const strategy of strategies) { const cache = new Map(); let hits = 0; let misses = 0; let evictions = 0; const cacheGet = (key: string) => { const entry = cache.get(key); if (!entry) { misses++; return null; } // Check TTL if (strategy.ttl !== Infinity && Date.now() - entry.timestamp > strategy.ttl) { cache.delete(key); evictions++; misses++; return null; } // Update access info entry.accessCount++; entry.lastAccess = Date.now(); hits++; return entry.data; }; const cacheSet = (key: string, data: any, size: number = 1) => { // Check size limit if (cache.size >= strategy.maxSize) { let keyToEvict = ''; switch (strategy.policy) { case 'fifo': keyToEvict = cache.keys().next().value; break; case 'lru': let oldestAccess = Infinity; for (const [k, v] of cache.entries()) { if (v.lastAccess < oldestAccess) { oldestAccess = v.lastAccess; keyToEvict = k; } } break; case 'adaptive': // Evict based on access frequency and age let lowestScore = Infinity; for (const [k, v] of cache.entries()) { const age = Date.now() - v.timestamp; const score = v.accessCount / (age / 1000); if (score < lowestScore) { lowestScore = score; keyToEvict = k; } } break; } if (keyToEvict) { cache.delete(keyToEvict); evictions++; } } cache.set(key, { data, timestamp: Date.now(), accessCount: 0, lastAccess: Date.now(), size }); }; // Simulate workload with temporal locality const workloadSize = 500; const uniqueItems = 200; const workload = []; // Generate workload with patterns for (let i = 0; i < workloadSize; i++) { if (i < 100) { // Initial unique accesses workload.push(`item-${i % uniqueItems}`); } else if (i < 300) { // Repeated access to popular items workload.push(`item-${Math.floor(Math.random() * 20)}`); } else { // Mixed access pattern if (Math.random() < 0.3) { // Access recent item workload.push(`item-${Math.floor(Math.random() * 50)}`); } else { // Access any item workload.push(`item-${Math.floor(Math.random() * uniqueItems)}`); } } } // Process workload const startTime = Date.now(); for (const key of workload) { const cached = cacheGet(key); if (!cached) { // Simulate data generation const data = { key, value: Math.random() }; cacheSet(key, data); } } const totalTime = Date.now() - startTime; results.strategies.push({ name: strategy.name, policy: strategy.policy, ttl: strategy.ttl, maxSize: strategy.maxSize, hits, misses, hitRate: ((hits / (hits + misses)) * 100).toFixed(2), evictions, evictionRate: ((evictions / workloadSize) * 100).toFixed(2), finalCacheSize: cache.size, totalTime, avgAccessTime: (totalTime / workloadSize).toFixed(2) }); } // Find best strategy results.bestStrategy = results.strategies.reduce((best, current) => { const bestScore = parseFloat(best.hitRate) - parseFloat(best.evictionRate); const currentScore = parseFloat(current.hitRate) - parseFloat(current.evictionRate); return currentScore > bestScore ? current : best; }); return results; } ); // Summary console.log('\n=== PERF-10: Cache Efficiency Test Summary ==='); console.log('\nFormat Detection Cache:'); console.log(` Without cache: ${formatDetectionCache.withoutCache.totalTime}ms for ${formatDetectionCache.withoutCache.iterations} ops`); console.log(` With cache: ${formatDetectionCache.withCache.totalTime}ms for ${formatDetectionCache.withCache.iterations} ops`); console.log(` Cache hits: ${formatDetectionCache.withCache.cacheHits}, misses: ${formatDetectionCache.withCache.cacheMisses}`); console.log(` Speedup: ${formatDetectionCache.improvement.speedup}x`); console.log(` Hit rate: ${formatDetectionCache.improvement.hitRate}%`); console.log(` Time reduction: ${formatDetectionCache.improvement.timeReduction}%`); console.log('\nValidation Cache Strategies:'); console.log(' Strategy | Size | TTL | Requests | Hits | Hit Rate | Avg Time | Memory'); console.log(' -------------|------|--------|----------|------|----------|----------|--------'); validationCache.cacheStrategies.forEach((strategy: any) => { console.log(` ${strategy.name.padEnd(12)} | ${String(strategy.cacheSize).padEnd(4)} | ${String(strategy.ttl).padEnd(6)} | ${String(strategy.totalRequests).padEnd(8)} | ${String(strategy.cacheHits).padEnd(4)} | ${strategy.hitRate.padEnd(8)}% | ${strategy.avgTime.padEnd(8)}ms | ${strategy.memoryUsage}B`); }); if (validationCache.optimalStrategy) { console.log(` Optimal strategy: ${validationCache.optimalStrategy.name}`); } console.log('\nSchema Cache Efficiency:'); console.log(` Without cache: ${schemaCache.improvement.timeWithoutCache}ms`); console.log(` With cache: ${schemaCache.improvement.timeWithCache}ms`); console.log(` Speedup: ${schemaCache.improvement.speedup}x`); console.log(` Time reduction: ${schemaCache.improvement.timeReduction}%`); console.log(` Memory cost: ${schemaCache.improvement.memoryCost}KB`); console.log(` Schemas loaded: ${schemaCache.improvement.schemasLoaded}, unique: ${schemaCache.improvement.uniqueSchemas}`); console.log('\nCorpus Cache Analysis:'); console.log(' Operation | Count | Duplicates | Ratio | Time Savings'); console.log(' -----------------|-------|------------|--------|-------------'); ['formatDetection', 'parsing', 'validation'].forEach(op => { const stats = corpusCacheAnalysis.cacheableOperations[op]; const savings = corpusCacheAnalysis.potentialSavings[op]; console.log(` ${op.padEnd(16)} | ${String(stats.count).padEnd(5)} | ${String(stats.duplicates).padEnd(10)} | ${savings.duplicateRatio.padEnd(6)}% | ${savings.timeSavings}ms`); }); console.log(` Total potential time savings: ${corpusCacheAnalysis.potentialSavings.totalTimeSavings}ms`); console.log(` Estimated memory cost: ${(corpusCacheAnalysis.potentialSavings.memoryCost / 1024).toFixed(2)}KB`); console.log('\nCache Invalidation Strategies:'); console.log(' Strategy | Policy | Hits | Hit Rate | Evictions | Final Size'); console.log(' --------------|----------|------|----------|-----------|------------'); cacheInvalidation.strategies.forEach((strategy: any) => { console.log(` ${strategy.name.padEnd(13)} | ${strategy.policy.padEnd(8)} | ${String(strategy.hits).padEnd(4)} | ${strategy.hitRate.padEnd(8)}% | ${String(strategy.evictions).padEnd(9)} | ${strategy.finalCacheSize}`); }); if (cacheInvalidation.bestStrategy) { console.log(` Best strategy: ${cacheInvalidation.bestStrategy.name} (${cacheInvalidation.bestStrategy.hitRate}% hit rate)`); } // Performance targets check console.log('\n=== Performance Targets Check ==='); const cacheSpeedup = parseFloat(formatDetectionCache.improvement.speedup); const targetSpeedup = 2; // Target: >2x speedup with caching console.log(`Cache speedup: ${cacheSpeedup}x ${cacheSpeedup > targetSpeedup ? '✅' : '⚠️'} (target: >${targetSpeedup}x)`); // Overall performance summary console.log('\n=== Overall Performance Summary ==='); console.log(performanceTracker.getSummary()); }); tap.start();