einvoice/test/suite/einvoice_performance/test.perf-10.cache-efficiency.ts

713 lines
25 KiB
TypeScript
Raw Normal View History

2025-05-25 19:45:37 +00:00
/**
* @file test.perf-10.cache-efficiency.ts
* @description Performance tests for cache efficiency and optimization
*/
import { tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../../suite/corpus.loader.js';
import { PerformanceTracker } from '../../suite/performance.tracker.js';
2025-05-29 13:35:36 +00:00
import { FormatDetector } from '../../../ts/formats/utils/format.detector.js';
2025-05-25 19:45:37 +00:00
const performanceTracker = new PerformanceTracker('PERF-10: Cache Efficiency');
tap.test('PERF-10: Cache Efficiency - should demonstrate effective caching strategies', async (t) => {
// Test 1: Format detection cache
const formatDetectionCache = await performanceTracker.measureAsync(
'format-detection-cache',
async () => {
const results = {
withoutCache: {
iterations: 0,
totalTime: 0,
avgTime: 0
},
withCache: {
iterations: 0,
totalTime: 0,
avgTime: 0,
cacheHits: 0,
cacheMisses: 0
},
improvement: null
};
// Test data
const testDocuments = [
{
id: 'ubl-1',
content: '<?xml version="1.0"?><Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"><ID>UBL-001</ID></Invoice>'
},
{
id: 'cii-1',
content: '<?xml version="1.0"?><rsm:CrossIndustryInvoice xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100"><ID>CII-001</ID></rsm:CrossIndustryInvoice>'
},
{
id: 'unknown-1',
content: '<?xml version="1.0"?><UnknownFormat><ID>UNKNOWN-001</ID></UnknownFormat>'
}
];
// Test without cache (baseline)
const iterations = 100;
const startWithoutCache = Date.now();
for (let i = 0; i < iterations; i++) {
for (const doc of testDocuments) {
2025-05-29 13:35:36 +00:00
FormatDetector.detectFormat(doc.content);
2025-05-25 19:45:37 +00:00
results.withoutCache.iterations++;
}
}
results.withoutCache.totalTime = Date.now() - startWithoutCache;
results.withoutCache.avgTime = results.withoutCache.totalTime / results.withoutCache.iterations;
// Implement simple cache
const formatCache = new Map<string, { format: string; timestamp: number }>();
const cacheMaxAge = 60000; // 1 minute
const detectFormatWithCache = async (content: string) => {
// Create cache key from content hash
const hash = Buffer.from(content).toString('base64').slice(0, 20);
// Check cache
const cached = formatCache.get(hash);
if (cached && Date.now() - cached.timestamp < cacheMaxAge) {
results.withCache.cacheHits++;
return cached.format;
}
// Cache miss
results.withCache.cacheMisses++;
2025-05-29 13:35:36 +00:00
const format = FormatDetector.detectFormat(content);
2025-05-25 19:45:37 +00:00
// Store in cache
formatCache.set(hash, { format: format || 'unknown', timestamp: Date.now() });
return format;
};
// Test with cache
const startWithCache = Date.now();
for (let i = 0; i < iterations; i++) {
for (const doc of testDocuments) {
await detectFormatWithCache(doc.content);
results.withCache.iterations++;
}
}
results.withCache.totalTime = Date.now() - startWithCache;
results.withCache.avgTime = results.withCache.totalTime / results.withCache.iterations;
// Calculate improvement
results.improvement = {
speedup: (results.withoutCache.avgTime / results.withCache.avgTime).toFixed(2),
timeReduction: ((results.withoutCache.totalTime - results.withCache.totalTime) / results.withoutCache.totalTime * 100).toFixed(2),
hitRate: ((results.withCache.cacheHits / results.withCache.iterations) * 100).toFixed(2),
efficiency: results.withCache.cacheHits > 0 ?
((results.withCache.cacheHits / (results.withCache.cacheHits + results.withCache.cacheMisses)) * 100).toFixed(2) : '0'
};
return results;
}
);
// Test 2: Validation cache
const validationCache = await performanceTracker.measureAsync(
'validation-cache',
async () => {
const results = {
cacheStrategies: [],
optimalStrategy: null
};
// Test invoice
const testInvoice = {
format: 'ubl' as const,
data: {
documentType: 'INVOICE',
invoiceNumber: 'CACHE-VAL-001',
issueDate: '2024-03-05',
seller: { name: 'Cache Test Seller', address: 'Address', country: 'US', taxId: 'US123' },
buyer: { name: 'Cache Test Buyer', address: 'Address', country: 'US', taxId: 'US456' },
items: Array.from({ length: 20 }, (_, i) => ({
description: `Item ${i + 1}`,
quantity: 1,
unitPrice: 100,
vatRate: 10,
lineTotal: 100
})),
totals: { netAmount: 2000, vatAmount: 200, grossAmount: 2200 }
}
};
// Cache strategies to test
const strategies = [
{
name: 'No cache',
cacheSize: 0,
ttl: 0
},
{
name: 'Small cache',
cacheSize: 10,
ttl: 30000
},
{
name: 'Medium cache',
cacheSize: 100,
ttl: 60000
},
{
name: 'Large cache',
cacheSize: 1000,
ttl: 300000
},
{
name: 'LRU cache',
cacheSize: 50,
ttl: 120000,
lru: true
}
];
for (const strategy of strategies) {
const cache = new Map<string, { result: any; timestamp: number; accessCount: number }>();
let cacheHits = 0;
let cacheMisses = 0;
const validateWithCache = async (invoice: any) => {
const key = JSON.stringify(invoice).slice(0, 50); // Simple key generation
// Check cache
const cached = cache.get(key);
if (cached && Date.now() - cached.timestamp < strategy.ttl) {
cacheHits++;
cached.accessCount++;
return cached.result;
}
// Cache miss
cacheMisses++;
2025-05-29 13:35:36 +00:00
// Mock validation result for performance testing
const result = { valid: true, errors: [] };
2025-05-25 19:45:37 +00:00
// Cache management
if (strategy.cacheSize > 0) {
if (cache.size >= strategy.cacheSize) {
if (strategy.lru) {
// Remove least recently used
let lruKey = '';
let minAccess = Infinity;
for (const [k, v] of cache.entries()) {
if (v.accessCount < minAccess) {
minAccess = v.accessCount;
lruKey = k;
}
}
cache.delete(lruKey);
} else {
// Remove oldest
const oldestKey = cache.keys().next().value;
cache.delete(oldestKey);
}
}
cache.set(key, { result, timestamp: Date.now(), accessCount: 1 });
}
return result;
};
// Test with mixed workload
const workload = [];
// Repeated validations of same invoice
for (let i = 0; i < 50; i++) {
workload.push(testInvoice);
}
// Variations of the invoice
for (let i = 0; i < 30; i++) {
const variation = JSON.parse(JSON.stringify(testInvoice));
variation.data.invoiceNumber = `CACHE-VAL-${i + 2}`;
workload.push(variation);
}
// Repeat some variations
for (let i = 0; i < 20; i++) {
const variation = JSON.parse(JSON.stringify(testInvoice));
variation.data.invoiceNumber = `CACHE-VAL-${(i % 10) + 2}`;
workload.push(variation);
}
// Process workload
const startTime = Date.now();
for (const invoice of workload) {
await validateWithCache(invoice);
}
const totalTime = Date.now() - startTime;
results.cacheStrategies.push({
name: strategy.name,
cacheSize: strategy.cacheSize,
ttl: strategy.ttl,
lru: strategy.lru || false,
totalRequests: workload.length,
cacheHits,
cacheMisses,
hitRate: ((cacheHits / workload.length) * 100).toFixed(2),
totalTime,
avgTime: (totalTime / workload.length).toFixed(2),
finalCacheSize: cache.size,
memoryUsage: (cache.size * 1024).toFixed(0) // Rough estimate in bytes
});
}
// Find optimal strategy
const validStrategies = results.cacheStrategies.filter(s => s.cacheSize > 0);
if (validStrategies.length > 0) {
results.optimalStrategy = validStrategies.reduce((best, current) => {
const bestScore = parseFloat(best.hitRate) / (parseFloat(best.avgTime) + 1);
const currentScore = parseFloat(current.hitRate) / (parseFloat(current.avgTime) + 1);
return currentScore > bestScore ? current : best;
});
}
return results;
}
);
// Test 3: Schema cache efficiency
const schemaCache = await performanceTracker.measureAsync(
'schema-cache-efficiency',
async () => {
const results = {
schemaCaching: {
enabled: false,
tests: []
},
improvement: null
};
// Simulate schema validation with and without caching
const schemas = {
ubl: { size: 1024 * 50, parseTime: 50 }, // 50KB, 50ms parse time
cii: { size: 1024 * 60, parseTime: 60 }, // 60KB, 60ms parse time
zugferd: { size: 1024 * 80, parseTime: 80 }, // 80KB, 80ms parse time
xrechnung: { size: 1024 * 70, parseTime: 70 } // 70KB, 70ms parse time
};
const schemaCache = new Map<string, { schema: any; loadTime: number }>();
const loadSchemaWithoutCache = async (format: string) => {
const schema = schemas[format];
if (schema) {
await new Promise(resolve => setTimeout(resolve, schema.parseTime));
return { format, size: schema.size };
}
throw new Error(`Unknown schema format: ${format}`);
};
const loadSchemaWithCache = async (format: string) => {
const cached = schemaCache.get(format);
if (cached) {
results.schemaCaching.enabled = true;
return cached.schema;
}
const schema = await loadSchemaWithoutCache(format);
schemaCache.set(format, { schema, loadTime: Date.now() });
return schema;
};
// Test workload
const workload = [];
const formats = Object.keys(schemas);
// Initial load of each schema
for (const format of formats) {
workload.push(format);
}
// Repeated use of schemas
for (let i = 0; i < 100; i++) {
workload.push(formats[i % formats.length]);
}
// Test without cache
const startWithoutCache = Date.now();
for (const format of workload) {
await loadSchemaWithoutCache(format);
}
const timeWithoutCache = Date.now() - startWithoutCache;
// Test with cache
const startWithCache = Date.now();
for (const format of workload) {
await loadSchemaWithCache(format);
}
const timeWithCache = Date.now() - startWithCache;
// Calculate memory usage
let totalCachedSize = 0;
for (const format of schemaCache.keys()) {
totalCachedSize += schemas[format].size;
}
results.improvement = {
timeWithoutCache,
timeWithCache,
speedup: (timeWithoutCache / timeWithCache).toFixed(2),
timeReduction: ((timeWithoutCache - timeWithCache) / timeWithoutCache * 100).toFixed(2),
memoryCost: (totalCachedSize / 1024).toFixed(2), // KB
schemasLoaded: workload.length,
uniqueSchemas: schemaCache.size
};
return results;
}
);
// Test 4: Corpus cache analysis
const corpusCacheAnalysis = await performanceTracker.measureAsync(
'corpus-cache-analysis',
async () => {
2025-05-29 13:35:36 +00:00
const files = await CorpusLoader.loadPattern('**/*.xml');
2025-05-25 19:45:37 +00:00
const results = {
cacheableOperations: {
formatDetection: { count: 0, duplicates: 0 },
parsing: { count: 0, duplicates: 0 },
validation: { count: 0, duplicates: 0 }
},
potentialSavings: null
};
// Track unique content hashes
const contentHashes = new Map<string, number>();
const formatResults = new Map<string, string>();
// Sample corpus files
const sampleFiles = files.slice(0, 100);
for (const file of sampleFiles) {
try {
2025-05-29 13:35:36 +00:00
const content = await plugins.fs.readFile(file.path, 'utf-8');
2025-05-25 19:45:37 +00:00
const hash = Buffer.from(content).toString('base64').slice(0, 32);
// Track content duplicates
const count = contentHashes.get(hash) || 0;
contentHashes.set(hash, count + 1);
if (count > 0) {
results.cacheableOperations.formatDetection.duplicates++;
results.cacheableOperations.parsing.duplicates++;
results.cacheableOperations.validation.duplicates++;
}
// Perform operations
2025-05-29 13:35:36 +00:00
const format = FormatDetector.detectFormat(content);
2025-05-25 19:45:37 +00:00
results.cacheableOperations.formatDetection.count++;
if (format && format !== 'unknown') {
formatResults.set(hash, format);
2025-05-29 13:35:36 +00:00
const invoice = await EInvoice.fromXml(content);
2025-05-25 19:45:37 +00:00
results.cacheableOperations.parsing.count++;
2025-05-29 13:35:36 +00:00
await invoice.validate();
2025-05-25 19:45:37 +00:00
results.cacheableOperations.validation.count++;
}
} catch (error) {
// Skip failed files
}
}
// Calculate potential savings
const avgFormatDetectionTime = 5; // ms
const avgParsingTime = 20; // ms
const avgValidationTime = 50; // ms
results.potentialSavings = {
formatDetection: {
duplicateRatio: (results.cacheableOperations.formatDetection.duplicates /
results.cacheableOperations.formatDetection.count * 100).toFixed(2),
timeSavings: results.cacheableOperations.formatDetection.duplicates * avgFormatDetectionTime
},
parsing: {
duplicateRatio: (results.cacheableOperations.parsing.duplicates /
results.cacheableOperations.parsing.count * 100).toFixed(2),
timeSavings: results.cacheableOperations.parsing.duplicates * avgParsingTime
},
validation: {
duplicateRatio: (results.cacheableOperations.validation.duplicates /
results.cacheableOperations.validation.count * 100).toFixed(2),
timeSavings: results.cacheableOperations.validation.duplicates * avgValidationTime
},
totalTimeSavings: results.cacheableOperations.formatDetection.duplicates * avgFormatDetectionTime +
results.cacheableOperations.parsing.duplicates * avgParsingTime +
results.cacheableOperations.validation.duplicates * avgValidationTime,
memoryCost: contentHashes.size * 100 // Rough estimate: 100 bytes per cached item
};
return results;
}
);
// Test 5: Cache invalidation strategies
const cacheInvalidation = await performanceTracker.measureAsync(
'cache-invalidation-strategies',
async () => {
const results = {
strategies: [],
bestStrategy: null
};
// Test different invalidation strategies
const strategies = [
{
name: 'TTL only',
ttl: 60000,
maxSize: Infinity,
policy: 'ttl'
},
{
name: 'Size limited',
ttl: Infinity,
maxSize: 50,
policy: 'fifo'
},
{
name: 'LRU with TTL',
ttl: 120000,
maxSize: 100,
policy: 'lru'
},
{
name: 'Adaptive',
ttl: 60000,
maxSize: 100,
policy: 'adaptive'
}
];
for (const strategy of strategies) {
const cache = new Map<string, {
data: any;
timestamp: number;
accessCount: number;
lastAccess: number;
size: number;
}>();
let hits = 0;
let misses = 0;
let evictions = 0;
const cacheGet = (key: string) => {
const entry = cache.get(key);
if (!entry) {
misses++;
return null;
}
// Check TTL
if (strategy.ttl !== Infinity && Date.now() - entry.timestamp > strategy.ttl) {
cache.delete(key);
evictions++;
misses++;
return null;
}
// Update access info
entry.accessCount++;
entry.lastAccess = Date.now();
hits++;
return entry.data;
};
const cacheSet = (key: string, data: any, size: number = 1) => {
// Check size limit
if (cache.size >= strategy.maxSize) {
let keyToEvict = '';
switch (strategy.policy) {
case 'fifo':
keyToEvict = cache.keys().next().value;
break;
case 'lru':
let oldestAccess = Infinity;
for (const [k, v] of cache.entries()) {
if (v.lastAccess < oldestAccess) {
oldestAccess = v.lastAccess;
keyToEvict = k;
}
}
break;
case 'adaptive':
// Evict based on access frequency and age
let lowestScore = Infinity;
for (const [k, v] of cache.entries()) {
const age = Date.now() - v.timestamp;
const score = v.accessCount / (age / 1000);
if (score < lowestScore) {
lowestScore = score;
keyToEvict = k;
}
}
break;
}
if (keyToEvict) {
cache.delete(keyToEvict);
evictions++;
}
}
cache.set(key, {
data,
timestamp: Date.now(),
accessCount: 0,
lastAccess: Date.now(),
size
});
};
// Simulate workload with temporal locality
const workloadSize = 500;
const uniqueItems = 200;
const workload = [];
// Generate workload with patterns
for (let i = 0; i < workloadSize; i++) {
if (i < 100) {
// Initial unique accesses
workload.push(`item-${i % uniqueItems}`);
} else if (i < 300) {
// Repeated access to popular items
workload.push(`item-${Math.floor(Math.random() * 20)}`);
} else {
// Mixed access pattern
if (Math.random() < 0.3) {
// Access recent item
workload.push(`item-${Math.floor(Math.random() * 50)}`);
} else {
// Access any item
workload.push(`item-${Math.floor(Math.random() * uniqueItems)}`);
}
}
}
// Process workload
const startTime = Date.now();
for (const key of workload) {
const cached = cacheGet(key);
if (!cached) {
// Simulate data generation
const data = { key, value: Math.random() };
cacheSet(key, data);
}
}
const totalTime = Date.now() - startTime;
results.strategies.push({
name: strategy.name,
policy: strategy.policy,
ttl: strategy.ttl,
maxSize: strategy.maxSize,
hits,
misses,
hitRate: ((hits / (hits + misses)) * 100).toFixed(2),
evictions,
evictionRate: ((evictions / workloadSize) * 100).toFixed(2),
finalCacheSize: cache.size,
totalTime,
avgAccessTime: (totalTime / workloadSize).toFixed(2)
});
}
// Find best strategy
results.bestStrategy = results.strategies.reduce((best, current) => {
const bestScore = parseFloat(best.hitRate) - parseFloat(best.evictionRate);
const currentScore = parseFloat(current.hitRate) - parseFloat(current.evictionRate);
return currentScore > bestScore ? current : best;
});
return results;
}
);
// Summary
2025-05-29 13:35:36 +00:00
console.log('\n=== PERF-10: Cache Efficiency Test Summary ===');
2025-05-25 19:45:37 +00:00
2025-05-29 13:35:36 +00:00
console.log('\nFormat Detection Cache:');
console.log(` Without cache: ${formatDetectionCache.withoutCache.totalTime}ms for ${formatDetectionCache.withoutCache.iterations} ops`);
console.log(` With cache: ${formatDetectionCache.withCache.totalTime}ms for ${formatDetectionCache.withCache.iterations} ops`);
console.log(` Cache hits: ${formatDetectionCache.withCache.cacheHits}, misses: ${formatDetectionCache.withCache.cacheMisses}`);
console.log(` Speedup: ${formatDetectionCache.improvement.speedup}x`);
console.log(` Hit rate: ${formatDetectionCache.improvement.hitRate}%`);
console.log(` Time reduction: ${formatDetectionCache.improvement.timeReduction}%`);
2025-05-25 19:45:37 +00:00
2025-05-29 13:35:36 +00:00
console.log('\nValidation Cache Strategies:');
console.log(' Strategy | Size | TTL | Requests | Hits | Hit Rate | Avg Time | Memory');
console.log(' -------------|------|--------|----------|------|----------|----------|--------');
validationCache.cacheStrategies.forEach((strategy: any) => {
console.log(` ${strategy.name.padEnd(12)} | ${String(strategy.cacheSize).padEnd(4)} | ${String(strategy.ttl).padEnd(6)} | ${String(strategy.totalRequests).padEnd(8)} | ${String(strategy.cacheHits).padEnd(4)} | ${strategy.hitRate.padEnd(8)}% | ${strategy.avgTime.padEnd(8)}ms | ${strategy.memoryUsage}B`);
2025-05-25 19:45:37 +00:00
});
2025-05-29 13:35:36 +00:00
if (validationCache.optimalStrategy) {
console.log(` Optimal strategy: ${validationCache.optimalStrategy.name}`);
2025-05-25 19:45:37 +00:00
}
2025-05-29 13:35:36 +00:00
console.log('\nSchema Cache Efficiency:');
console.log(` Without cache: ${schemaCache.improvement.timeWithoutCache}ms`);
console.log(` With cache: ${schemaCache.improvement.timeWithCache}ms`);
console.log(` Speedup: ${schemaCache.improvement.speedup}x`);
console.log(` Time reduction: ${schemaCache.improvement.timeReduction}%`);
console.log(` Memory cost: ${schemaCache.improvement.memoryCost}KB`);
console.log(` Schemas loaded: ${schemaCache.improvement.schemasLoaded}, unique: ${schemaCache.improvement.uniqueSchemas}`);
2025-05-25 19:45:37 +00:00
2025-05-29 13:35:36 +00:00
console.log('\nCorpus Cache Analysis:');
console.log(' Operation | Count | Duplicates | Ratio | Time Savings');
console.log(' -----------------|-------|------------|--------|-------------');
2025-05-25 19:45:37 +00:00
['formatDetection', 'parsing', 'validation'].forEach(op => {
2025-05-29 13:35:36 +00:00
const stats = corpusCacheAnalysis.cacheableOperations[op];
const savings = corpusCacheAnalysis.potentialSavings[op];
console.log(` ${op.padEnd(16)} | ${String(stats.count).padEnd(5)} | ${String(stats.duplicates).padEnd(10)} | ${savings.duplicateRatio.padEnd(6)}% | ${savings.timeSavings}ms`);
2025-05-25 19:45:37 +00:00
});
2025-05-29 13:35:36 +00:00
console.log(` Total potential time savings: ${corpusCacheAnalysis.potentialSavings.totalTimeSavings}ms`);
console.log(` Estimated memory cost: ${(corpusCacheAnalysis.potentialSavings.memoryCost / 1024).toFixed(2)}KB`);
2025-05-25 19:45:37 +00:00
2025-05-29 13:35:36 +00:00
console.log('\nCache Invalidation Strategies:');
console.log(' Strategy | Policy | Hits | Hit Rate | Evictions | Final Size');
console.log(' --------------|----------|------|----------|-----------|------------');
cacheInvalidation.strategies.forEach((strategy: any) => {
console.log(` ${strategy.name.padEnd(13)} | ${strategy.policy.padEnd(8)} | ${String(strategy.hits).padEnd(4)} | ${strategy.hitRate.padEnd(8)}% | ${String(strategy.evictions).padEnd(9)} | ${strategy.finalCacheSize}`);
2025-05-25 19:45:37 +00:00
});
2025-05-29 13:35:36 +00:00
if (cacheInvalidation.bestStrategy) {
console.log(` Best strategy: ${cacheInvalidation.bestStrategy.name} (${cacheInvalidation.bestStrategy.hitRate}% hit rate)`);
2025-05-25 19:45:37 +00:00
}
// Performance targets check
2025-05-29 13:35:36 +00:00
console.log('\n=== Performance Targets Check ===');
const cacheSpeedup = parseFloat(formatDetectionCache.improvement.speedup);
2025-05-25 19:45:37 +00:00
const targetSpeedup = 2; // Target: >2x speedup with caching
2025-05-29 13:35:36 +00:00
console.log(`Cache speedup: ${cacheSpeedup}x ${cacheSpeedup > targetSpeedup ? '✅' : '⚠️'} (target: >${targetSpeedup}x)`);
2025-05-25 19:45:37 +00:00
// Overall performance summary
2025-05-29 13:35:36 +00:00
console.log('\n=== Overall Performance Summary ===');
console.log(performanceTracker.getSummary());
2025-05-25 19:45:37 +00:00
});
tap.start();