import * as plugins from './plugins.js'; import * as path from 'path'; import type { IInvoice, IInvoiceFilter, IDuplicateCheckResult } from './skr.invoice.entity.js'; /** * Invoice storage metadata */ export interface IInvoiceMetadata { invoiceId: string; invoiceNumber: string; direction: 'inbound' | 'outbound'; issueDate: string; supplierName: string; customerName: string; totalAmount: number; currency: string; contentHash: string; pdfHash?: string; xmlHash: string; journalEntryId?: string; transactionIds?: string[]; validationResult: { isValid: boolean; errors: number; warnings: number; }; parserVersion: string; storedAt: string; storedBy: string; } /** * Invoice registry entry (for NDJSON streaming) */ export interface IInvoiceRegistryEntry { id: string; hash: string; metadata: IInvoiceMetadata; } /** * Storage statistics */ export interface IStorageStats { totalInvoices: number; inboundCount: number; outboundCount: number; totalSize: number; duplicatesDetected: number; lastUpdate: Date; } /** * Content-addressed storage for invoices * Integrates with BagIt archive structure for GoBD compliance */ export class InvoiceStorage { private exportPath: string; private logger: plugins.smartlog.ConsoleLog; private registryPath: string; private metadataCache: Map; private readonly MAX_CACHE_SIZE = 10000; // Maximum number of cached entries private cacheAccessOrder: string[] = []; // Track access order for LRU eviction constructor(exportPath: string) { this.exportPath = exportPath; this.logger = new plugins.smartlog.ConsoleLog(); this.registryPath = path.join(exportPath, 'data', 'documents', 'invoices', 'registry.ndjson'); this.metadataCache = new Map(); } /** * Manage cache size using LRU eviction */ private manageCacheSize(): void { if (this.metadataCache.size > this.MAX_CACHE_SIZE) { // Remove least recently used entries const entriesToRemove = Math.min(100, Math.floor(this.MAX_CACHE_SIZE * 0.1)); // Remove 10% or 100 entries const keysToRemove = this.cacheAccessOrder.splice(0, entriesToRemove); for (const key of keysToRemove) { this.metadataCache.delete(key); } this.logger.log('info', `Evicted ${entriesToRemove} entries from metadata cache`); } } /** * Update cache access order for LRU */ private touchCacheEntry(key: string): void { const index = this.cacheAccessOrder.indexOf(key); if (index > -1) { this.cacheAccessOrder.splice(index, 1); } this.cacheAccessOrder.push(key); } /** * Initialize storage directories */ public async initialize(): Promise { const dirs = [ path.join(this.exportPath, 'data', 'documents', 'invoices', 'inbound'), path.join(this.exportPath, 'data', 'documents', 'invoices', 'inbound', 'metadata'), path.join(this.exportPath, 'data', 'documents', 'invoices', 'outbound'), path.join(this.exportPath, 'data', 'documents', 'invoices', 'outbound', 'metadata'), path.join(this.exportPath, 'data', 'validation') ]; for (const dir of dirs) { await plugins.smartfile.fs.ensureDir(dir); } // Load existing registry if it exists await this.loadRegistry(); } private readonly MAX_PDF_SIZE = 50 * 1024 * 1024; // 50MB max /** * Store an invoice with content addressing */ public async storeInvoice( invoice: IInvoice, pdfBuffer?: Buffer ): Promise { try { // Validate PDF size if provided if (pdfBuffer && pdfBuffer.length > this.MAX_PDF_SIZE) { throw new Error(`PDF file too large: ${pdfBuffer.length} bytes (max ${this.MAX_PDF_SIZE} bytes)`); } // Calculate hashes const xmlHash = await this.calculateHash(invoice.xmlContent || ''); const pdfHash = pdfBuffer ? await this.calculateHash(pdfBuffer) : undefined; const contentHash = xmlHash; // Primary content hash is XML // Check for duplicates const duplicateCheck = await this.checkDuplicate(invoice, contentHash); if (duplicateCheck.isDuplicate) { this.logger.log('warn', `Duplicate invoice detected: ${invoice.invoiceNumber}`); return duplicateCheck.matchedContentHash || contentHash; } // Determine storage path const direction = invoice.direction; const basePath = path.join( this.exportPath, 'data', 'documents', 'invoices', direction ); // Create filename with content hash const dateStr = invoice.issueDate.toISOString().split('T')[0]; const sanitizedNumber = invoice.invoiceNumber.replace(/[^a-zA-Z0-9-_]/g, '_'); const xmlFilename = `${contentHash.substring(0, 8)}_${dateStr}_${sanitizedNumber}.xml`; const xmlPath = path.join(basePath, xmlFilename); // Store XML await plugins.smartfile.memory.toFs(invoice.xmlContent || '', xmlPath); // Store PDF if available let pdfFilename: string | undefined; if (pdfBuffer) { pdfFilename = xmlFilename.replace('.xml', '.pdf'); const pdfPath = path.join(basePath, pdfFilename); await plugins.smartfile.memory.toFs(pdfBuffer, pdfPath); // Also store PDF/A-3 with embedded XML if supported if (invoice.format === 'zugferd' || invoice.format === 'facturx') { const pdfA3Filename = xmlFilename.replace('.xml', '_pdfa3.pdf'); const pdfA3Path = path.join(basePath, pdfA3Filename); // The PDF should already have embedded XML if it's ZUGFeRD/Factur-X await plugins.smartfile.memory.toFs(pdfBuffer, pdfA3Path); } } // Create and store metadata const metadata: IInvoiceMetadata = { invoiceId: invoice.id, invoiceNumber: invoice.invoiceNumber, direction: invoice.direction, issueDate: invoice.issueDate.toISOString(), supplierName: invoice.supplier.name, customerName: invoice.customer.name, totalAmount: invoice.payableAmount, currency: invoice.currencyCode, contentHash, pdfHash, xmlHash, journalEntryId: invoice.bookingInfo?.journalEntryId, transactionIds: invoice.bookingInfo?.transactionIds, validationResult: { isValid: invoice.validationResult?.isValid || false, errors: this.countErrors(invoice.validationResult), warnings: this.countWarnings(invoice.validationResult) }, parserVersion: invoice.metadata?.parserVersion || '5.1.4', storedAt: new Date().toISOString(), storedBy: invoice.createdBy }; const metadataPath = path.join(basePath, 'metadata', `${contentHash}.json`); await plugins.smartfile.memory.toFs( JSON.stringify(metadata, null, 2), metadataPath ); // Update registry await this.updateRegistry(invoice.id, contentHash, metadata); // Cache metadata with LRU management this.setCacheEntry(contentHash, metadata); this.logger.log('info', `Invoice stored: ${invoice.invoiceNumber} (${contentHash})`); return contentHash; } catch (error) { this.logger.log('error', `Failed to store invoice: ${error}`); throw new Error(`Invoice storage failed: ${error.message}`); } } /** * Retrieve an invoice by content hash */ public async retrieveInvoice(contentHash: string): Promise { try { // Check cache first const metadata = this.getCacheEntry(contentHash); if (!metadata) { this.logger.log('warn', `Invoice not found: ${contentHash}`); return null; } // Load XML content const xmlPath = await this.findInvoiceFile(contentHash, '.xml'); if (!xmlPath) { throw new Error(`XML file not found for invoice ${contentHash}`); } const xmlContent = await plugins.smartfile.fs.toStringSync(xmlPath); // Load PDF if exists let pdfContent: Buffer | undefined; const pdfPath = await this.findInvoiceFile(contentHash, '.pdf'); if (pdfPath) { pdfContent = await plugins.smartfile.fs.toBuffer(pdfPath); } // Reconstruct invoice object (partial) const invoice: Partial = { id: metadata.invoiceId, invoiceNumber: metadata.invoiceNumber, direction: metadata.direction as any, issueDate: new Date(metadata.issueDate), supplier: { name: metadata.supplierName, id: '', address: { countryCode: 'DE' } }, customer: { name: metadata.customerName, id: '', address: { countryCode: 'DE' } }, payableAmount: metadata.totalAmount, currencyCode: metadata.currency, contentHash: metadata.contentHash, xmlContent, pdfContent, pdfHash: metadata.pdfHash }; return invoice as IInvoice; } catch (error) { this.logger.log('error', `Failed to retrieve invoice: ${error}`); return null; } } /** * Check for duplicate invoices */ public async checkDuplicate( invoice: IInvoice, contentHash: string ): Promise { // Check by content hash (exact match) const existing = this.getCacheEntry(contentHash); if (existing) { return { isDuplicate: true, matchedInvoiceId: existing.invoiceId, matchedContentHash: contentHash, matchedFields: ['contentHash'], confidence: 100 }; } // Check by invoice number and supplier/customer for (const [hash, metadata] of this.metadataCache.entries()) { if ( metadata.invoiceNumber === invoice.invoiceNumber && metadata.direction === invoice.direction ) { // Same invoice number and direction if (invoice.direction === 'inbound' && metadata.supplierName === invoice.supplier.name) { // Same supplier return { isDuplicate: true, matchedInvoiceId: metadata.invoiceId, matchedContentHash: hash, matchedFields: ['invoiceNumber', 'supplier'], confidence: 95 }; } else if (invoice.direction === 'outbound' && metadata.customerName === invoice.customer.name) { // Same customer return { isDuplicate: true, matchedInvoiceId: metadata.invoiceId, matchedContentHash: hash, matchedFields: ['invoiceNumber', 'customer'], confidence: 95 }; } } // Check by amount and date within tolerance const dateTolerance = 7 * 24 * 60 * 60 * 1000; // 7 days const amountTolerance = 0.01; if ( Math.abs(metadata.totalAmount - invoice.payableAmount) < amountTolerance && Math.abs(new Date(metadata.issueDate).getTime() - invoice.issueDate.getTime()) < dateTolerance && metadata.direction === invoice.direction ) { if ( (invoice.direction === 'inbound' && metadata.supplierName === invoice.supplier.name) || (invoice.direction === 'outbound' && metadata.customerName === invoice.customer.name) ) { return { isDuplicate: true, matchedInvoiceId: metadata.invoiceId, matchedContentHash: hash, matchedFields: ['amount', 'date', 'party'], confidence: 85 }; } } } return { isDuplicate: false, confidence: 0 }; } /** * Search invoices by filter */ public async searchInvoices(filter: IInvoiceFilter): Promise { const results: IInvoiceMetadata[] = []; for (const metadata of this.metadataCache.values()) { if (this.matchesFilter(metadata, filter)) { results.push(metadata); } } // Sort by date descending results.sort((a, b) => new Date(b.issueDate).getTime() - new Date(a.issueDate).getTime() ); return results; } /** * Get storage statistics */ public async getStatistics(): Promise { let totalSize = 0; let inboundCount = 0; let outboundCount = 0; for (const metadata of this.metadataCache.values()) { if (metadata.direction === 'inbound') { inboundCount++; } else { outboundCount++; } // Estimate size (would need actual file sizes in production) totalSize += 50000; // Rough estimate } return { totalInvoices: this.metadataCache.size, inboundCount, outboundCount, totalSize, duplicatesDetected: 0, // Would track this in production lastUpdate: new Date() }; } /** * Create EN16931 compliance report */ public async createComplianceReport(): Promise { const report = { timestamp: new Date().toISOString(), totalInvoices: this.metadataCache.size, validInvoices: 0, invalidInvoices: 0, warnings: 0, byFormat: {} as Record, byDirection: { inbound: 0, outbound: 0 }, validationErrors: [] as string[], complianceLevel: 'EN16931', validatorVersion: '5.1.4' }; for (const metadata of this.metadataCache.values()) { if (metadata.validationResult.isValid) { report.validInvoices++; } else { report.invalidInvoices++; } report.warnings += metadata.validationResult.warnings; if (metadata.direction === 'inbound') { report.byDirection.inbound++; } else { report.byDirection.outbound++; } } const reportPath = path.join( this.exportPath, 'data', 'validation', 'en16931_compliance.json' ); await plugins.smartfile.memory.toFs( JSON.stringify(report, null, 2), reportPath ); } /** * Load registry from disk */ private async loadRegistry(): Promise { try { if (await plugins.smartfile.fs.fileExists(this.registryPath)) { const content = await plugins.smartfile.fs.toStringSync(this.registryPath); const lines = content.split('\n').filter(line => line.trim()); for (const line of lines) { try { const entry: IInvoiceRegistryEntry = JSON.parse(line); this.setCacheEntry(entry.hash, entry.metadata); } catch (e) { this.logger.log('warn', `Invalid registry entry: ${line}`); } } this.logger.log('info', `Loaded ${this.metadataCache.size} invoices from registry`); } } catch (error) { this.logger.log('error', `Failed to load registry: ${error}`); } } /** * Update registry with new entry */ private async updateRegistry( invoiceId: string, contentHash: string, metadata: IInvoiceMetadata ): Promise { try { const entry: IInvoiceRegistryEntry = { id: invoiceId, hash: contentHash, metadata }; // Append to NDJSON file const line = JSON.stringify(entry) + '\n'; await plugins.smartfile.fs.ensureDir(path.dirname(this.registryPath)); // Use native fs for atomic append (better performance and concurrency safety) const fs = await import('fs/promises'); await fs.appendFile(this.registryPath, line, 'utf8'); } catch (error) { this.logger.log('error', `Failed to update registry: ${error}`); } } /** * Find invoice file by hash and extension */ private async findInvoiceFile( contentHash: string, extension: string ): Promise { const dirs = [ path.join(this.exportPath, 'data', 'documents', 'invoices', 'inbound'), path.join(this.exportPath, 'data', 'documents', 'invoices', 'outbound') ]; for (const dir of dirs) { const files = await plugins.smartfile.fs.listFileTree(dir, '**/*' + extension); for (const file of files) { if (file.includes(contentHash.substring(0, 8))) { return path.join(dir, file); } } } return null; } /** * Calculate SHA-256 hash */ private async calculateHash(data: string | Buffer): Promise { if (typeof data === 'string') { return await plugins.smarthash.sha256FromString(data); } else { return await plugins.smarthash.sha256FromBuffer(data); } } /** * Check if metadata matches filter */ private matchesFilter(metadata: IInvoiceMetadata, filter: IInvoiceFilter): boolean { if (filter.direction && metadata.direction !== filter.direction) { return false; } if (filter.dateFrom && new Date(metadata.issueDate) < filter.dateFrom) { return false; } if (filter.dateTo && new Date(metadata.issueDate) > filter.dateTo) { return false; } if (filter.minAmount && metadata.totalAmount < filter.minAmount) { return false; } if (filter.maxAmount && metadata.totalAmount > filter.maxAmount) { return false; } if (filter.invoiceNumber && !metadata.invoiceNumber.includes(filter.invoiceNumber)) { return false; } if (filter.supplierId && !metadata.supplierName.includes(filter.supplierId)) { return false; } if (filter.customerId && !metadata.customerName.includes(filter.customerId)) { return false; } return true; } /** * Count errors in validation result */ private countErrors(validationResult?: IInvoice['validationResult']): number { if (!validationResult) return 0; return ( validationResult.syntax.errors.length + validationResult.semantic.errors.length + validationResult.businessRules.errors.length + (validationResult.countrySpecific?.errors.length || 0) ); } /** * Count warnings in validation result */ private countWarnings(validationResult?: IInvoice['validationResult']): number { if (!validationResult) return 0; return ( validationResult.syntax.warnings.length + validationResult.semantic.warnings.length + validationResult.businessRules.warnings.length + (validationResult.countrySpecific?.warnings.length || 0) ); } /** * Clean up old invoices (for testing only) */ public async cleanup(olderThanDays: number = 365): Promise { let removed = 0; const cutoffDate = new Date(); cutoffDate.setDate(cutoffDate.getDate() - olderThanDays); for (const [hash, metadata] of this.metadataCache.entries()) { if (new Date(metadata.issueDate) < cutoffDate) { this.metadataCache.delete(hash); removed++; } } this.logger.log('info', `Removed ${removed} old invoices from cache`); return removed; } /** * Set cache entry with LRU eviction */ private setCacheEntry(key: string, value: IInvoiceMetadata): void { // Remove from access order if already exists const existingIndex = this.cacheAccessOrder.indexOf(key); if (existingIndex > -1) { this.cacheAccessOrder.splice(existingIndex, 1); } // Add to end (most recently used) this.cacheAccessOrder.push(key); this.metadataCache.set(key, value); // Evict oldest if cache is too large while (this.metadataCache.size > this.MAX_CACHE_SIZE) { const oldestKey = this.cacheAccessOrder.shift(); if (oldestKey) { this.metadataCache.delete(oldestKey); this.logger.log('debug', `Evicted invoice from cache: ${oldestKey}`); } } } /** * Get cache entry and update access order */ private getCacheEntry(key: string): IInvoiceMetadata | undefined { const value = this.metadataCache.get(key); if (value) { // Move to end (most recently used) const index = this.cacheAccessOrder.indexOf(key); if (index > -1) { this.cacheAccessOrder.splice(index, 1); } this.cacheAccessOrder.push(key); } return value; } /** * Update metadata in storage and cache */ public async updateMetadata(contentHash: string, updates: Partial): Promise { const metadata = this.getCacheEntry(contentHash); if (!metadata) { this.logger.log('warn', `Cannot update metadata - invoice not found: ${contentHash}`); return; } // Update metadata const updatedMetadata = { ...metadata, ...updates }; this.setCacheEntry(contentHash, updatedMetadata); // Persist to disk const metadataPath = path.join( this.exportPath, 'data', 'documents', 'invoices', metadata.direction, 'metadata', `${contentHash}.json` ); await plugins.smartfile.memory.toFs( JSON.stringify(updatedMetadata, null, 2), metadataPath ); this.logger.log('info', `Updated metadata for invoice: ${contentHash}`); } }