2025-05-25 19:45:37 +00:00
/ * *
* @file test . perf - 08 . large - files . ts
* @description Performance tests for large file processing
* /
import { tap } from '@git.zone/tstest/tapbundle' ;
import * as plugins from '../../plugins.js' ;
import { EInvoice } from '../../../ts/index.js' ;
import { CorpusLoader } from '../../suite/corpus.loader.js' ;
import { PerformanceTracker } from '../../suite/performance.tracker.js' ;
2025-05-29 13:35:36 +00:00
import { FormatDetector } from '../../../ts/formats/utils/format.detector.js' ;
2025-05-25 19:45:37 +00:00
const performanceTracker = new PerformanceTracker ( 'PERF-08: Large File Processing' ) ;
2025-05-29 13:35:36 +00:00
// Helper function to create UBL invoice XML
function createUBLInvoiceXML ( data : any ) : string {
const items = data . items . map ( ( item : any , idx : number ) = > `
< cac : InvoiceLine >
< cbc : ID > $ { idx + 1 } < / c b c : I D >
< cbc : InvoicedQuantity unitCode = "C62" > $ { item . quantity } < / c b c : I n v o i c e d Q u a n t i t y >
< cbc : LineExtensionAmount currencyID = "${data.currency || 'EUR'}" > $ { item . lineTotal } < / c b c : L i n e E x t e n s i o n A m o u n t >
< cac : Item >
< cbc : Description > $ { item . description } < / c b c : D e s c r i p t i o n >
< / c a c : I t e m >
< cac : Price >
< cbc : PriceAmount currencyID = "${data.currency || 'EUR'}" > $ { item . unitPrice } < / c b c : P r i c e A m o u n t >
< / c a c : P r i c e >
< / c a c : I n v o i c e L i n e > ` ) . j o i n ( ' ' ) ;
return ` <?xml version="1.0" encoding="UTF-8"?>
< Invoice xmlns = "urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns :cac = "urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns :cbc = "urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2" >
< cbc : UBLVersionID > 2.1 < / c b c : U B L V e r s i o n I D >
< cbc : ID > $ { data . invoiceNumber } < / c b c : I D >
< cbc : IssueDate > $ { data . issueDate } < / c b c : I s s u e D a t e >
< cbc : DueDate > $ { data . dueDate || data . issueDate } < / c b c : D u e D a t e >
< cbc : InvoiceTypeCode > 380 < / c b c : I n v o i c e T y p e C o d e >
< cbc : DocumentCurrencyCode > $ { data . currency || 'EUR' } < / c b c : D o c u m e n t C u r r e n c y C o d e >
< cac : AccountingSupplierParty >
< cac : Party >
< cac : PartyName >
< cbc : Name > $ { data . seller . name } < / c b c : N a m e >
< / c a c : P a r t y N a m e >
< cac : PostalAddress >
< cbc : StreetName > $ { data . seller . address } < / c b c : S t r e e t N a m e >
< cbc : CityName > $ { data . seller . city || '' } < / c b c : C i t y N a m e >
< cbc : PostalZone > $ { data . seller . postalCode || '' } < / c b c : P o s t a l Z o n e >
< cac : Country >
< cbc : IdentificationCode > $ { data . seller . country } < / c b c : I d e n t i f i c a t i o n C o d e >
< / c a c : C o u n t r y >
< / c a c : P o s t a l A d d r e s s >
< cac : PartyTaxScheme >
< cbc : CompanyID > $ { data . seller . taxId } < / c b c : C o m p a n y I D >
< cac : TaxScheme >
< cbc : ID > VAT < / c b c : I D >
< / c a c : T a x S c h e m e >
< / c a c : P a r t y T a x S c h e m e >
< / c a c : P a r t y >
< / c a c : A c c o u n t i n g S u p p l i e r P a r t y >
< cac : AccountingCustomerParty >
< cac : Party >
< cac : PartyName >
< cbc : Name > $ { data . buyer . name } < / c b c : N a m e >
< / c a c : P a r t y N a m e >
< cac : PostalAddress >
< cbc : StreetName > $ { data . buyer . address } < / c b c : S t r e e t N a m e >
< cbc : CityName > $ { data . buyer . city || '' } < / c b c : C i t y N a m e >
< cbc : PostalZone > $ { data . buyer . postalCode || '' } < / c b c : P o s t a l Z o n e >
< cac : Country >
< cbc : IdentificationCode > $ { data . buyer . country } < / c b c : I d e n t i f i c a t i o n C o d e >
< / c a c : C o u n t r y >
< / c a c : P o s t a l A d d r e s s >
< cac : PartyTaxScheme >
< cbc : CompanyID > $ { data . buyer . taxId } < / c b c : C o m p a n y I D >
< cac : TaxScheme >
< cbc : ID > VAT < / c b c : I D >
< / c a c : T a x S c h e m e >
< / c a c : P a r t y T a x S c h e m e >
< / c a c : P a r t y >
< / c a c : A c c o u n t i n g C u s t o m e r P a r t y >
< cac : TaxTotal >
< cbc : TaxAmount currencyID = "${data.currency || 'EUR'}" > $ { data . totals . vatAmount } < / c b c : T a x A m o u n t >
< / c a c : T a x T o t a l >
< cac : LegalMonetaryTotal >
< cbc : TaxExclusiveAmount currencyID = "${data.currency || 'EUR'}" > $ { data . totals . netAmount } < / c b c : T a x E x c l u s i v e A m o u n t >
< cbc : TaxInclusiveAmount currencyID = "${data.currency || 'EUR'}" > $ { data . totals . grossAmount } < / c b c : T a x I n c l u s i v e A m o u n t >
< cbc : PayableAmount currencyID = "${data.currency || 'EUR'}" > $ { data . totals . grossAmount } < / c b c : P a y a b l e A m o u n t >
< / c a c : L e g a l M o n e t a r y T o t a l >
$ { items }
< / Invoice > ` ;
}
2025-05-25 19:45:37 +00:00
tap . test ( 'PERF-08: Large File Processing - should handle large files efficiently' , async ( t ) = > {
// Test 1: Large PEPPOL file processing
const largePEPPOLProcessing = await performanceTracker . measureAsync (
'large-peppol-processing' ,
async ( ) = > {
2025-05-29 13:35:36 +00:00
const files = await CorpusLoader . loadPattern ( '**/PEPPOL/**/*.xml' ) ;
2025-05-25 19:45:37 +00:00
const results = {
files : [ ] ,
memoryProfile : {
baseline : 0 ,
peak : 0 ,
increments : [ ]
}
} ;
// Get baseline memory
if ( global . gc ) global . gc ( ) ;
const baselineMemory = process . memoryUsage ( ) ;
results . memoryProfile . baseline = baselineMemory . heapUsed / 1024 / 1024 ;
// Process PEPPOL files (known to be large)
for ( const file of files ) {
try {
const startTime = Date . now ( ) ;
const startMemory = process . memoryUsage ( ) ;
// Read file
2025-05-29 13:35:36 +00:00
const content = await plugins . fs . readFile ( file . path , 'utf-8' ) ;
2025-05-25 19:45:37 +00:00
const fileSize = Buffer . byteLength ( content , 'utf-8' ) ;
// Process file
2025-05-29 13:35:36 +00:00
const format = FormatDetector . detectFormat ( content ) ;
2025-05-25 19:45:37 +00:00
const parseStart = Date . now ( ) ;
2025-05-29 13:35:36 +00:00
const einvoice = await EInvoice . fromXml ( content ) ;
2025-05-25 19:45:37 +00:00
const parseEnd = Date . now ( ) ;
const validationStart = Date . now ( ) ;
2025-05-29 13:35:36 +00:00
const validationResult = await einvoice . validate ( ) ;
2025-05-25 19:45:37 +00:00
const validationEnd = Date . now ( ) ;
const endMemory = process . memoryUsage ( ) ;
const totalTime = Date . now ( ) - startTime ;
const memoryUsed = ( endMemory . heapUsed - startMemory . heapUsed ) / 1024 / 1024 ;
if ( endMemory . heapUsed > results . memoryProfile . peak ) {
results . memoryProfile . peak = endMemory . heapUsed / 1024 / 1024 ;
}
results . files . push ( {
path : file ,
sizeKB : ( fileSize / 1024 ) . toFixed ( 2 ) ,
sizeMB : ( fileSize / 1024 / 1024 ) . toFixed ( 2 ) ,
format ,
processingTime : totalTime ,
parseTime : parseEnd - parseStart ,
validationTime : validationEnd - validationStart ,
memoryUsedMB : memoryUsed.toFixed ( 2 ) ,
throughputMBps : ( ( fileSize / 1024 / 1024 ) / ( totalTime / 1000 ) ) . toFixed ( 2 ) ,
2025-05-29 13:35:36 +00:00
itemCount : einvoice.data.items?.length || 0 ,
valid : validationResult.valid
2025-05-25 19:45:37 +00:00
} ) ;
results . memoryProfile . increments . push ( memoryUsed ) ;
} catch ( error ) {
results . files . push ( {
path : file ,
error : error.message
} ) ;
}
}
return results ;
}
) ;
// Test 2: Synthetic large file generation and processing
const syntheticLargeFiles = await performanceTracker . measureAsync (
'synthetic-large-files' ,
async ( ) = > {
const results = {
tests : [ ] ,
scalingAnalysis : null
} ;
// Generate invoices of increasing size
const sizes = [
{ items : 100 , name : '100 items' } ,
{ items : 500 , name : '500 items' } ,
{ items : 1000 , name : '1K items' } ,
{ items : 5000 , name : '5K items' } ,
{ items : 10000 , name : '10K items' }
] ;
for ( const size of sizes ) {
// Generate large invoice
const invoice = {
format : 'ubl' as const ,
data : {
documentType : 'INVOICE' ,
invoiceNumber : ` LARGE- ${ size . items } ` ,
issueDate : '2024-02-25' ,
dueDate : '2024-03-25' ,
currency : 'EUR' ,
seller : {
name : 'Large File Test Seller Corporation International GmbH' ,
address : 'Hauptstraße 123-125, Building A, Floor 5' ,
city : 'Berlin' ,
postalCode : '10115' ,
country : 'DE' ,
taxId : 'DE123456789' ,
registrationNumber : 'HRB123456' ,
email : 'invoicing@largetest.de' ,
phone : '+49 30 123456789' ,
bankAccount : {
iban : 'DE89370400440532013000' ,
bic : 'COBADEFFXXX' ,
bankName : 'Commerzbank AG'
}
} ,
buyer : {
name : 'Large File Test Buyer Enterprises Ltd.' ,
address : '456 Commerce Boulevard, Suite 789' ,
city : 'Munich' ,
postalCode : '80331' ,
country : 'DE' ,
taxId : 'DE987654321' ,
registrationNumber : 'HRB654321' ,
email : 'ap@largebuyer.de' ,
phone : '+49 89 987654321'
} ,
items : Array.from ( { length : size.items } , ( _ , i ) = > ( {
itemId : ` ITEM- ${ String ( i + 1 ) . padStart ( 6 , '0' ) } ` ,
description : ` Product Item Number ${ i + 1 } - Detailed description with technical specifications, compliance information, country of origin, weight, dimensions, and special handling instructions. This is a very detailed description to simulate real-world invoice data with comprehensive product information. ` ,
quantity : Math.floor ( Math . random ( ) * 100 ) + 1 ,
unitPrice : Math.random ( ) * 1000 ,
vatRate : [ 0 , 7 , 19 ] [ Math . floor ( Math . random ( ) * 3 ) ] ,
lineTotal : 0 ,
additionalInfo : {
weight : ` ${ ( Math . random ( ) * 50 ) . toFixed ( 2 ) } kg ` ,
dimensions : ` ${ Math . floor ( Math . random ( ) * 100 ) } x ${ Math . floor ( Math . random ( ) * 100 ) } x ${ Math . floor ( Math . random ( ) * 100 ) } cm ` ,
countryOfOrigin : [ 'DE' , 'FR' , 'IT' , 'CN' , 'US' ] [ Math . floor ( Math . random ( ) * 5 ) ] ,
customsCode : ` ${ Math . floor ( Math . random ( ) * 9000000000 ) + 1000000000 } ` ,
serialNumber : ` SN- ${ Date . now ( ) } - ${ i } ` ,
batchNumber : ` BATCH- ${ Math . floor ( i / 100 ) } `
}
} ) ) ,
totals : { netAmount : 0 , vatAmount : 0 , grossAmount : 0 } ,
notes : 'This is a large invoice generated for performance testing purposes. ' +
'It contains a significant number of line items to test the system\'s ability ' +
'to handle large documents efficiently.'
}
} ;
// Calculate totals
invoice . data . items . forEach ( item = > {
item . lineTotal = item . quantity * item . unitPrice ;
invoice . data . totals . netAmount += item . lineTotal ;
invoice . data . totals . vatAmount += item . lineTotal * ( item . vatRate / 100 ) ;
} ) ;
invoice . data . totals . grossAmount = invoice . data . totals . netAmount + invoice . data . totals . vatAmount ;
// Measure processing
if ( global . gc ) global . gc ( ) ;
const startMemory = process . memoryUsage ( ) ;
const startTime = Date . now ( ) ;
// Generate XML
const xmlStart = Date . now ( ) ;
2025-05-29 13:35:36 +00:00
const xml = createUBLInvoiceXML ( invoice . data ) ;
2025-05-25 19:45:37 +00:00
const xmlEnd = Date . now ( ) ;
const xmlSize = Buffer . byteLength ( xml , 'utf-8' ) ;
// Parse back
const parseStart = Date . now ( ) ;
2025-05-29 13:35:36 +00:00
const parsed = await EInvoice . fromXml ( xml ) ;
2025-05-25 19:45:37 +00:00
const parseEnd = Date . now ( ) ;
// Validate
const validateStart = Date . now ( ) ;
2025-05-29 13:35:36 +00:00
const validation = await parsed . validate ( ) ;
2025-05-25 19:45:37 +00:00
const validateEnd = Date . now ( ) ;
// Convert
const convertStart = Date . now ( ) ;
2025-05-29 13:35:36 +00:00
await parsed . toXmlString ( 'cii' ) ; // Test conversion performance
2025-05-25 19:45:37 +00:00
const convertEnd = Date . now ( ) ;
const endTime = Date . now ( ) ;
const endMemory = process . memoryUsage ( ) ;
results . tests . push ( {
size : size.name ,
items : size.items ,
xmlSizeMB : ( xmlSize / 1024 / 1024 ) . toFixed ( 2 ) ,
totalTime : endTime - startTime ,
xmlGeneration : xmlEnd - xmlStart ,
parsing : parseEnd - parseStart ,
validation : validateEnd - validateStart ,
conversion : convertEnd - convertStart ,
memoryUsedMB : ( ( endMemory . heapUsed - startMemory . heapUsed ) / 1024 / 1024 ) . toFixed ( 2 ) ,
memoryPerItemKB : ( ( endMemory . heapUsed - startMemory . heapUsed ) / 1024 / size . items ) . toFixed ( 2 ) ,
throughputMBps : ( ( xmlSize / 1024 / 1024 ) / ( ( endTime - startTime ) / 1000 ) ) . toFixed ( 2 ) ,
2025-05-29 13:35:36 +00:00
valid : validation.valid
2025-05-25 19:45:37 +00:00
} ) ;
}
// Analyze scaling
if ( results . tests . length >= 3 ) {
const points = results . tests . map ( t = > ( {
x : t.items ,
y : t.totalTime
} ) ) ;
// Simple linear regression
const n = points . length ;
const sumX = points . reduce ( ( sum , p ) = > sum + p . x , 0 ) ;
const sumY = points . reduce ( ( sum , p ) = > sum + p . y , 0 ) ;
const sumXY = points . reduce ( ( sum , p ) = > sum + p . x * p . y , 0 ) ;
const sumX2 = points . reduce ( ( sum , p ) = > sum + p . x * p . x , 0 ) ;
const slope = ( n * sumXY - sumX * sumY ) / ( n * sumX2 - sumX * sumX ) ;
const intercept = ( sumY - slope * sumX ) / n ;
results . scalingAnalysis = {
type : slope < 0.5 ? 'Sub-linear' : slope <= 1.5 ? 'Linear' : 'Super-linear' ,
formula : ` Time(ms) = ${ slope . toFixed ( 3 ) } * items + ${ intercept . toFixed ( 2 ) } ` ,
msPerItem : slope.toFixed ( 3 )
} ;
}
return results ;
}
) ;
// Test 3: Memory-efficient large file streaming
const streamingLargeFiles = await performanceTracker . measureAsync (
'streaming-large-files' ,
async ( ) = > {
const results = {
streamingSupported : false ,
chunkProcessing : [ ] ,
memoryEfficiency : null
} ;
// Simulate large file processing in chunks
const totalItems = 10000 ;
const chunkSizes = [ 100 , 500 , 1000 , 2000 ] ;
for ( const chunkSize of chunkSizes ) {
const chunks = Math . ceil ( totalItems / chunkSize ) ;
const startTime = Date . now ( ) ;
const startMemory = process . memoryUsage ( ) ;
let peakMemory = startMemory . heapUsed ;
// Process in chunks
const chunkResults = [ ] ;
for ( let chunk = 0 ; chunk < chunks ; chunk ++ ) {
const startItem = chunk * chunkSize ;
const endItem = Math . min ( startItem + chunkSize , totalItems ) ;
// Create chunk invoice
const chunkInvoice = {
format : 'ubl' as const ,
data : {
documentType : 'INVOICE' ,
invoiceNumber : ` CHUNK- ${ chunk } ` ,
issueDate : '2024-02-25' ,
seller : { name : 'Chunk Seller' , address : 'Address' , country : 'US' , taxId : 'US123' } ,
buyer : { name : 'Chunk Buyer' , address : 'Address' , country : 'US' , taxId : 'US456' } ,
items : Array.from ( { length : endItem - startItem } , ( _ , i ) = > ( {
description : ` Chunk ${ chunk } Item ${ i + 1 } ` ,
quantity : 1 ,
unitPrice : 100 ,
vatRate : 19 ,
lineTotal : 100
} ) ) ,
totals : {
netAmount : ( endItem - startItem ) * 100 ,
vatAmount : ( endItem - startItem ) * 19 ,
grossAmount : ( endItem - startItem ) * 119
}
}
} ;
// Process chunk
const chunkStart = Date . now ( ) ;
2025-05-29 13:35:36 +00:00
const chunkXml = createUBLInvoiceXML ( chunkInvoice . data ) ;
const chunkEInvoice = await EInvoice . fromXml ( chunkXml ) ;
await chunkEInvoice . validate ( ) ;
2025-05-25 19:45:37 +00:00
const chunkEnd = Date . now ( ) ;
chunkResults . push ( {
chunk ,
items : endItem - startItem ,
duration : chunkEnd - chunkStart
} ) ;
// Track peak memory
const currentMemory = process . memoryUsage ( ) ;
if ( currentMemory . heapUsed > peakMemory ) {
peakMemory = currentMemory . heapUsed ;
}
// Simulate cleanup between chunks
if ( global . gc ) global . gc ( ) ;
}
const totalDuration = Date . now ( ) - startTime ;
const memoryIncrease = ( peakMemory - startMemory . heapUsed ) / 1024 / 1024 ;
results . chunkProcessing . push ( {
chunkSize ,
chunks ,
totalItems ,
totalDuration ,
avgChunkTime : chunkResults.reduce ( ( sum , r ) = > sum + r . duration , 0 ) / chunkResults . length ,
throughput : ( totalItems / ( totalDuration / 1000 ) ) . toFixed ( 2 ) ,
peakMemoryMB : ( peakMemory / 1024 / 1024 ) . toFixed ( 2 ) ,
memoryIncreaseMB : memoryIncrease.toFixed ( 2 ) ,
memoryPerItemKB : ( ( memoryIncrease * 1024 ) / totalItems ) . toFixed ( 3 )
} ) ;
}
// Analyze memory efficiency
if ( results . chunkProcessing . length > 0 ) {
const smallChunk = results . chunkProcessing [ 0 ] ;
const largeChunk = results . chunkProcessing [ results . chunkProcessing . length - 1 ] ;
results . memoryEfficiency = {
smallChunkMemory : smallChunk.memoryIncreaseMB ,
largeChunkMemory : largeChunk.memoryIncreaseMB ,
memoryScaling : ( parseFloat ( largeChunk . memoryIncreaseMB ) / parseFloat ( smallChunk . memoryIncreaseMB ) ) . toFixed ( 2 ) ,
recommendation : parseFloat ( largeChunk . memoryIncreaseMB ) < parseFloat ( smallChunk . memoryIncreaseMB ) * 2 ?
'Use larger chunks for better memory efficiency' :
'Use smaller chunks to reduce memory usage'
} ;
}
return results ;
}
) ;
// Test 4: Corpus large file analysis
const corpusLargeFiles = await performanceTracker . measureAsync (
'corpus-large-file-analysis' ,
async ( ) = > {
2025-05-29 13:35:36 +00:00
const files = await CorpusLoader . loadPattern ( '**/*.xml' ) ;
2025-05-25 19:45:37 +00:00
const results = {
totalFiles : 0 ,
largeFiles : [ ] ,
sizeDistribution : {
tiny : { count : 0 , maxSize : 10 * 1024 } , // < 10KB
small : { count : 0 , maxSize : 100 * 1024 } , // < 100KB
medium : { count : 0 , maxSize : 1024 * 1024 } , // < 1MB
large : { count : 0 , maxSize : 10 * 1024 * 1024 } , // < 10MB
huge : { count : 0 , maxSize : Infinity } // >= 10MB
} ,
processingStats : {
avgTimePerKB : 0 ,
avgMemoryPerKB : 0
}
} ;
// Analyze all files
const fileSizes = [ ] ;
const processingMetrics = [ ] ;
for ( const file of files ) {
try {
2025-05-29 13:35:36 +00:00
const stats = await plugins . fs . stat ( file . path ) ;
2025-05-25 19:45:37 +00:00
const fileSize = stats . size ;
results . totalFiles ++ ;
// Categorize by size
if ( fileSize < results . sizeDistribution . tiny . maxSize ) {
results . sizeDistribution . tiny . count ++ ;
} else if ( fileSize < results . sizeDistribution . small . maxSize ) {
results . sizeDistribution . small . count ++ ;
} else if ( fileSize < results . sizeDistribution . medium . maxSize ) {
results . sizeDistribution . medium . count ++ ;
} else if ( fileSize < results . sizeDistribution . large . maxSize ) {
results . sizeDistribution . large . count ++ ;
} else {
results . sizeDistribution . huge . count ++ ;
}
// Process large files
if ( fileSize > 100 * 1024 ) { // Process files > 100KB
2025-05-29 13:35:36 +00:00
const content = await plugins . fs . readFile ( file . path , 'utf-8' ) ;
2025-05-25 19:45:37 +00:00
const startTime = Date . now ( ) ;
const startMemory = process . memoryUsage ( ) ;
2025-05-29 13:35:36 +00:00
const format = FormatDetector . detectFormat ( content ) ;
2025-05-25 19:45:37 +00:00
if ( format && format !== 'unknown' ) {
2025-05-29 13:35:36 +00:00
const invoice = await EInvoice . fromXml ( content ) ;
await invoice . validate ( ) ;
2025-05-25 19:45:37 +00:00
}
const endTime = Date . now ( ) ;
const endMemory = process . memoryUsage ( ) ;
const processingTime = endTime - startTime ;
const memoryUsed = ( endMemory . heapUsed - startMemory . heapUsed ) / 1024 ; // KB
results . largeFiles . push ( {
path : file ,
sizeKB : ( fileSize / 1024 ) . toFixed ( 2 ) ,
format ,
processingTime ,
memoryUsedKB : memoryUsed.toFixed ( 2 ) ,
timePerKB : ( processingTime / ( fileSize / 1024 ) ) . toFixed ( 3 ) ,
throughputKBps : ( ( fileSize / 1024 ) / ( processingTime / 1000 ) ) . toFixed ( 2 )
} ) ;
processingMetrics . push ( {
size : fileSize ,
time : processingTime ,
memory : memoryUsed
} ) ;
}
fileSizes . push ( fileSize ) ;
} catch ( error ) {
// Skip files that can't be processed
}
}
// Calculate statistics
if ( processingMetrics . length > 0 ) {
const totalSize = processingMetrics . reduce ( ( sum , m ) = > sum + m . size , 0 ) ;
const totalTime = processingMetrics . reduce ( ( sum , m ) = > sum + m . time , 0 ) ;
const totalMemory = processingMetrics . reduce ( ( sum , m ) = > sum + m . memory , 0 ) ;
2025-05-29 13:35:36 +00:00
results . processingStats . avgTimePerKB = parseFloat ( ( totalTime / ( totalSize / 1024 ) ) . toFixed ( 3 ) ) ;
results . processingStats . avgMemoryPerKB = parseFloat ( ( totalMemory / ( totalSize / 1024 ) ) . toFixed ( 3 ) ) ;
2025-05-25 19:45:37 +00:00
}
// Sort large files by size
results . largeFiles . sort ( ( a , b ) = > parseFloat ( b . sizeKB ) - parseFloat ( a . sizeKB ) ) ;
return {
. . . results ,
largeFiles : results.largeFiles.slice ( 0 , 10 ) , // Top 10 largest
avgFileSizeKB : fileSizes.length > 0 ?
( fileSizes . reduce ( ( a , b ) = > a + b , 0 ) / fileSizes . length / 1024 ) . toFixed ( 2 ) : 0
} ;
}
) ;
// Test 5: Stress test with extreme sizes
const extremeSizeStressTest = await performanceTracker . measureAsync (
'extreme-size-stress-test' ,
async ( ) = > {
const results = {
tests : [ ] ,
limits : {
maxItemsProcessed : 0 ,
maxSizeProcessedMB : 0 ,
failurePoint : null
}
} ;
// Test extreme scenarios
const extremeScenarios = [
{
name : 'Wide invoice (many items)' ,
generator : ( count : number ) = > ( {
format : 'ubl' as const ,
data : {
documentType : 'INVOICE' ,
invoiceNumber : ` EXTREME-WIDE- ${ count } ` ,
issueDate : '2024-02-25' ,
seller : { name : 'Seller' , address : 'Address' , country : 'US' , taxId : 'US123' } ,
buyer : { name : 'Buyer' , address : 'Address' , country : 'US' , taxId : 'US456' } ,
items : Array.from ( { length : count } , ( _ , i ) = > ( {
description : ` Item ${ i + 1 } ` ,
quantity : 1 ,
unitPrice : 10 ,
vatRate : 10 ,
lineTotal : 10
} ) ) ,
totals : { netAmount : count * 10 , vatAmount : count , grossAmount : count * 11 }
}
} )
} ,
{
name : 'Deep invoice (long descriptions)' ,
generator : ( size : number ) = > ( {
format : 'ubl' as const ,
data : {
documentType : 'INVOICE' ,
invoiceNumber : ` EXTREME-DEEP- ${ size } ` ,
issueDate : '2024-02-25' ,
seller : { name : 'Seller' , address : 'Address' , country : 'US' , taxId : 'US123' } ,
buyer : { name : 'Buyer' , address : 'Address' , country : 'US' , taxId : 'US456' } ,
items : [ {
description : 'A' . repeat ( size * 1024 ) , // Size in KB
quantity : 1 ,
unitPrice : 100 ,
vatRate : 10 ,
lineTotal : 100
} ] ,
totals : { netAmount : 100 , vatAmount : 10 , grossAmount : 110 }
}
} )
}
] ;
// Test each scenario
for ( const scenario of extremeScenarios ) {
const testResults = {
scenario : scenario.name ,
tests : [ ]
} ;
// Test increasing sizes
const sizes = scenario . name . includes ( 'Wide' ) ?
[ 1000 , 5000 , 10000 , 20000 , 50000 ] :
[ 100 , 500 , 1000 , 2000 , 5000 ] ; // KB
for ( const size of sizes ) {
try {
const invoice = scenario . generator ( size ) ;
const startTime = Date . now ( ) ;
const startMemory = process . memoryUsage ( ) ;
2025-05-29 13:35:36 +00:00
// Try to process - create XML from invoice data
// Since we have invoice data, we need to convert it to XML
// For now, we'll create a simple UBL invoice XML
const xml = createUBLInvoiceXML ( invoice . data ) ;
2025-05-25 19:45:37 +00:00
const xmlSize = Buffer . byteLength ( xml , 'utf-8' ) / 1024 / 1024 ; // MB
2025-05-29 13:35:36 +00:00
const parsed = await EInvoice . fromXml ( xml ) ;
await parsed . validate ( ) ;
2025-05-25 19:45:37 +00:00
const endTime = Date . now ( ) ;
const endMemory = process . memoryUsage ( ) ;
testResults . tests . push ( {
size : scenario.name.includes ( 'Wide' ) ? ` ${ size } items ` : ` ${ size } KB text ` ,
success : true ,
time : endTime - startTime ,
memoryMB : ( ( endMemory . heapUsed - startMemory . heapUsed ) / 1024 / 1024 ) . toFixed ( 2 ) ,
xmlSizeMB : xmlSize.toFixed ( 2 )
} ) ;
// Update limits
if ( scenario . name . includes ( 'Wide' ) && size > results . limits . maxItemsProcessed ) {
results . limits . maxItemsProcessed = size ;
}
if ( xmlSize > results . limits . maxSizeProcessedMB ) {
results . limits . maxSizeProcessedMB = xmlSize ;
}
} catch ( error ) {
testResults . tests . push ( {
size : scenario.name.includes ( 'Wide' ) ? ` ${ size } items ` : ` ${ size } KB text ` ,
success : false ,
error : error.message
} ) ;
if ( ! results . limits . failurePoint ) {
results . limits . failurePoint = {
scenario : scenario.name ,
size ,
error : error.message
} ;
}
break ; // Stop testing larger sizes after failure
}
}
results . tests . push ( testResults ) ;
}
return results ;
}
) ;
// Summary
2025-05-29 13:35:36 +00:00
console . log ( '\n=== PERF-08: Large File Processing Test Summary ===' ) ;
2025-05-25 19:45:37 +00:00
2025-05-29 13:35:36 +00:00
if ( largePEPPOLProcessing . files . length > 0 ) {
console . log ( '\nLarge PEPPOL File Processing:' ) ;
largePEPPOLProcessing . files . forEach ( file = > {
2025-05-25 19:45:37 +00:00
if ( ! file . error ) {
2025-05-29 13:35:36 +00:00
console . log ( ` ${ file . path . split ( '/' ) . pop ( ) } : ` ) ;
console . log ( ` - Size: ${ file . sizeMB } MB, Items: ${ file . itemCount } ` ) ;
console . log ( ` - Processing: ${ file . processingTime } ms (parse: ${ file . parseTime } ms, validate: ${ file . validationTime } ms) ` ) ;
console . log ( ` - Throughput: ${ file . throughputMBps } MB/s ` ) ;
console . log ( ` - Memory used: ${ file . memoryUsedMB } MB ` ) ;
2025-05-25 19:45:37 +00:00
}
} ) ;
2025-05-29 13:35:36 +00:00
console . log ( ` Peak memory: ${ largePEPPOLProcessing . memoryProfile . peak . toFixed ( 2 ) } MB ` ) ;
2025-05-25 19:45:37 +00:00
}
2025-05-29 13:35:36 +00:00
console . log ( '\nSynthetic Large File Scaling:' ) ;
console . log ( ' Size | XML Size | Total Time | Parse | Validate | Convert | Memory | Throughput' ) ;
console . log ( ' ----------|----------|------------|--------|----------|---------|--------|----------' ) ;
syntheticLargeFiles . tests . forEach ( ( test : any ) = > {
console . log ( ` ${ test . size . padEnd ( 9 ) } | ${ test . xmlSizeMB . padEnd ( 8 ) } MB | ${ String ( test . totalTime + 'ms' ) . padEnd ( 10 ) } | ${ String ( test . parsing + 'ms' ) . padEnd ( 6 ) } | ${ String ( test . validation + 'ms' ) . padEnd ( 8 ) } | ${ String ( test . conversion + 'ms' ) . padEnd ( 7 ) } | ${ test . memoryUsedMB . padEnd ( 6 ) } MB | ${ test . throughputMBps } MB/s ` ) ;
2025-05-25 19:45:37 +00:00
} ) ;
2025-05-29 13:35:36 +00:00
if ( syntheticLargeFiles . scalingAnalysis ) {
console . log ( ` Scaling: ${ syntheticLargeFiles . scalingAnalysis . type } ` ) ;
console . log ( ` Formula: ${ syntheticLargeFiles . scalingAnalysis . formula } ` ) ;
2025-05-25 19:45:37 +00:00
}
2025-05-29 13:35:36 +00:00
console . log ( '\nChunked Processing Efficiency:' ) ;
console . log ( ' Chunk Size | Chunks | Duration | Throughput | Peak Memory | Memory/Item' ) ;
console . log ( ' -----------|--------|----------|------------|-------------|------------' ) ;
streamingLargeFiles . chunkProcessing . forEach ( ( chunk : any ) = > {
console . log ( ` ${ String ( chunk . chunkSize ) . padEnd ( 10 ) } | ${ String ( chunk . chunks ) . padEnd ( 6 ) } | ${ String ( chunk . totalDuration + 'ms' ) . padEnd ( 8 ) } | ${ chunk . throughput . padEnd ( 10 ) } /s | ${ chunk . peakMemoryMB . padEnd ( 11 ) } MB | ${ chunk . memoryPerItemKB } KB ` ) ;
2025-05-25 19:45:37 +00:00
} ) ;
2025-05-29 13:35:36 +00:00
if ( streamingLargeFiles . memoryEfficiency ) {
console . log ( ` Recommendation: ${ streamingLargeFiles . memoryEfficiency . recommendation } ` ) ;
2025-05-25 19:45:37 +00:00
}
2025-05-29 13:35:36 +00:00
console . log ( '\nCorpus Large File Analysis:' ) ;
console . log ( ` Total files: ${ corpusLargeFiles . totalFiles } ` ) ;
console . log ( ` Size distribution: ` ) ;
Object . entries ( corpusLargeFiles . sizeDistribution ) . forEach ( ( [ size , data ] : [ string , any ] ) = > {
console . log ( ` - ${ size } : ${ data . count } files ` ) ;
2025-05-25 19:45:37 +00:00
} ) ;
2025-05-29 13:35:36 +00:00
console . log ( ` Largest processed files: ` ) ;
corpusLargeFiles . largeFiles . slice ( 0 , 5 ) . forEach ( file = > {
console . log ( ` - ${ file . path . split ( '/' ) . pop ( ) } : ${ file . sizeKB } KB, ${ file . processingTime } ms, ${ file . throughputKBps } KB/s ` ) ;
2025-05-25 19:45:37 +00:00
} ) ;
2025-05-29 13:35:36 +00:00
console . log ( ` Average processing: ${ corpusLargeFiles . processingStats . avgTimePerKB } ms/KB ` ) ;
2025-05-25 19:45:37 +00:00
2025-05-29 13:35:36 +00:00
console . log ( '\nExtreme Size Stress Test:' ) ;
extremeSizeStressTest . tests . forEach ( scenario = > {
console . log ( ` ${ scenario . scenario } : ` ) ;
scenario . tests . forEach ( ( test : any ) = > {
console . log ( ` - ${ test . size } : ${ test . success ? ` ✅ ${ test . time } ms, ${ test . xmlSizeMB } MB XML ` : ` ❌ ${ test . error } ` } ` ) ;
2025-05-25 19:45:37 +00:00
} ) ;
} ) ;
2025-05-29 13:35:36 +00:00
console . log ( ` Limits: ` ) ;
console . log ( ` - Max items processed: ${ extremeSizeStressTest . limits . maxItemsProcessed } ` ) ;
console . log ( ` - Max size processed: ${ extremeSizeStressTest . limits . maxSizeProcessedMB . toFixed ( 2 ) } MB ` ) ;
if ( extremeSizeStressTest . limits . failurePoint ) {
console . log ( ` - Failure point: ${ extremeSizeStressTest . limits . failurePoint . scenario } at ${ extremeSizeStressTest . limits . failurePoint . size } ` ) ;
2025-05-25 19:45:37 +00:00
}
// Performance targets check
2025-05-29 13:35:36 +00:00
console . log ( '\n=== Performance Targets Check ===' ) ;
const largeFileThroughput = syntheticLargeFiles . tests . length > 0 ?
parseFloat ( syntheticLargeFiles . tests [ syntheticLargeFiles . tests . length - 1 ] . throughputMBps ) : 0 ;
2025-05-25 19:45:37 +00:00
const targetThroughput = 1 ; // Target: >1MB/s for large files
2025-05-29 13:35:36 +00:00
console . log ( ` Large file throughput: ${ largeFileThroughput } MB/s ${ largeFileThroughput > targetThroughput ? '✅' : '⚠️' } (target: > ${ targetThroughput } MB/s) ` ) ;
2025-05-25 19:45:37 +00:00
// Overall performance summary
2025-05-29 13:35:36 +00:00
console . log ( '\n=== Overall Performance Summary ===' ) ;
console . log ( performanceTracker . getSummary ( ) ) ;
2025-05-25 19:45:37 +00:00
2025-05-29 13:35:36 +00:00
t . pass ( 'Large file processing tests completed' ) ;
2025-05-25 19:45:37 +00:00
} ) ;
tap . start ( ) ;