einvoice/test/suite/einvoice_encoding/test.enc-07.attribute-encoding.ts

460 lines
18 KiB
TypeScript
Raw Normal View History

2025-05-25 19:45:37 +00:00
import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-07: Attribute Encoding - should handle XML attribute encoding correctly', async (t) => {
// ENC-07: Verify proper encoding of XML attributes including special chars and quotes
// This test ensures attributes are properly encoded across different scenarios
const performanceTracker = new PerformanceTracker('ENC-07: Attribute Encoding');
const corpusLoader = new CorpusLoader();
t.test('Basic attribute encoding', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID schemeID="INVOICE" schemeAgencyID="6">ATTR-BASIC-001</ID>
<IssueDate>2025-01-25</IssueDate>
<DocumentCurrencyCode listID="ISO4217" listAgencyID="6" listVersionID="2001">EUR</DocumentCurrencyCode>
<TaxTotal>
<TaxAmount currencyID="EUR">19.00</TaxAmount>
<TaxSubtotal>
<TaxCategory>
<ID schemeID="UNCL5305" schemeAgencyID="6">S</ID>
<Percent>19</Percent>
<TaxScheme>
<ID schemeID="UN/ECE 5153" schemeAgencyID="6">VAT</ID>
</TaxScheme>
</TaxCategory>
</TaxSubtotal>
</TaxTotal>
<InvoiceLine>
<ID>1</ID>
<InvoicedQuantity unitCode="C62" unitCodeListID="UNECERec20">10</InvoicedQuantity>
<LineExtensionAmount currencyID="EUR">100.00</LineExtensionAmount>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify basic attributes are preserved
expect(xmlString).toMatch(/schemeID\s*=\s*["']INVOICE["']/);
expect(xmlString).toMatch(/schemeAgencyID\s*=\s*["']6["']/);
expect(xmlString).toMatch(/listID\s*=\s*["']ISO4217["']/);
expect(xmlString).toMatch(/listVersionID\s*=\s*["']2001["']/);
expect(xmlString).toMatch(/currencyID\s*=\s*["']EUR["']/);
expect(xmlString).toMatch(/unitCode\s*=\s*["']C62["']/);
expect(xmlString).toMatch(/unitCodeListID\s*=\s*["']UNECERec20["']/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('basic-attributes', elapsed);
});
t.test('Attributes with special characters', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-SPECIAL-001</ID>
<Note languageID="de-DE" encoding="UTF-8">Rechnung für Bücher &amp; Zeitschriften</Note>
<PaymentMeans>
<PaymentMeansCode name="Überweisung (Bank &amp; SEPA)">30</PaymentMeansCode>
<PaymentID reference="Order &lt;2025-001&gt;">PAY-123</PaymentID>
<PayeeFinancialAccount>
<Name type="IBAN &amp; BIC">DE89 3704 0044 0532 0130 00</Name>
<FinancialInstitutionBranch>
<Name branch="München &quot;Zentrum&quot;">Sparkasse</Name>
</FinancialInstitutionBranch>
</PayeeFinancialAccount>
</PaymentMeans>
<AllowanceCharge>
<ChargeIndicator>false</ChargeIndicator>
<AllowanceChargeReason code="95" description="Discount for &gt; 100€ orders">Volume discount</AllowanceChargeReason>
<Amount currencyID="EUR" percentage="5%" calculation="100 * 0.05">5.00</Amount>
</AllowanceCharge>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify special characters in attributes are properly escaped
expect(xmlString).toMatch(/name\s*=\s*["']Überweisung \(Bank &amp; SEPA\)["']/);
expect(xmlString).toMatch(/reference\s*=\s*["']Order &lt;2025-001&gt;["']/);
expect(xmlString).toMatch(/type\s*=\s*["']IBAN &amp; BIC["']/);
expect(xmlString).toMatch(/branch\s*=\s*["']München (&quot;|")Zentrum(&quot;|")["']/);
expect(xmlString).toMatch(/description\s*=\s*["']Discount for &gt; 100€ orders["']/);
expect(xmlString).toMatch(/percentage\s*=\s*["']5%["']/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('special-char-attributes', elapsed);
});
t.test('Quote handling in attributes', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-QUOTES-001</ID>
<Note title='Single quotes with "double quotes" inside'>Test note</Note>
<AdditionalDocumentReference>
<ID description="Product &quot;Premium&quot; edition">DOC-001</ID>
<DocumentDescription title="User's guide">Manual for "advanced" users</DocumentDescription>
<Attachment>
<ExternalReference>
<URI scheme="http" description='Link to "official" site'>http://example.com/doc?id=123&amp;type="pdf"</URI>
</ExternalReference>
</Attachment>
</AdditionalDocumentReference>
<InvoiceLine>
<Item>
<Name type='"Special" product'>Item with quotes</Name>
<Description note="Contains both 'single' and &quot;double&quot; quotes">Complex quoting test</Description>
<AdditionalItemProperty>
<Name>Quote test</Name>
<Value type="text" format='He said: "It\'s working!"'>Quoted value</Value>
</AdditionalItemProperty>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify quote handling - implementation may use different strategies
// Either escape quotes or switch quote style
expect(xmlString).toBeTruthy();
// Should contain the attribute values somehow
expect(xmlString).toMatch(/Single quotes with .*double quotes.* inside/);
expect(xmlString).toMatch(/Product .*Premium.* edition/);
expect(xmlString).toMatch(/User.*s guide/);
expect(xmlString).toMatch(/Special.*product/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('quote-attributes', elapsed);
});
t.test('International characters in attributes', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-INTL-001</ID>
<Note languageID="multi" region="Europa/歐洲/यूरोप">International attributes</Note>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name tradingName="Société Générale" localName="ソシエテ・ジェネラル">SG Group</Name>
</PartyName>
<PostalAddress>
<StreetName type="Avenue/大道/एवेन्यू">Champs-Élysées</StreetName>
<CityName region="Île-de-France">Paris</CityName>
<Country>
<IdentificationCode listName="ISO 3166-1 α2">FR</IdentificationCode>
<Name language="fr-FR">République française</Name>
</Country>
</PostalAddress>
</Party>
</AccountingSupplierParty>
<PaymentTerms>
<Note terms="30 días/天/दिन" currency="€/¥/₹">Multi-currency payment</Note>
</PaymentTerms>
<InvoiceLine>
<Item>
<Name category="Bücher/书籍/पुस्तकें">International Books</Name>
<Description author="François Müller (佛朗索瓦·穆勒)">Multilingual content</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify international characters in attributes
expect(xmlString).toContain('Europa/歐洲/यूरोप');
expect(xmlString).toContain('Société Générale');
expect(xmlString).toContain('ソシエテ・ジェネラル');
expect(xmlString).toContain('Avenue/大道/एवेन्यू');
expect(xmlString).toContain('Île-de-France');
expect(xmlString).toContain('α2'); // Greek alpha
expect(xmlString).toContain('République française');
expect(xmlString).toContain('30 días/天/दिन');
expect(xmlString).toContain('€/¥/₹');
expect(xmlString).toContain('Bücher/书籍/पुस्तकें');
expect(xmlString).toContain('佛朗索瓦·穆勒');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('intl-attributes', elapsed);
});
t.test('Empty and whitespace attributes', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-WHITESPACE-001</ID>
<Note title="" language="">Empty attributes</Note>
<DocumentReference>
<ID schemeID=" " schemeAgencyID=" ">REF-001</ID>
<DocumentDescription prefix=" " suffix=" "> Trimmed content </DocumentDescription>
</DocumentReference>
<PaymentMeans>
<PaymentID reference="
multiline
reference
">PAY-001</PaymentID>
<InstructionNote format=" preserved spaces ">Note with spaces</InstructionNote>
</PaymentMeans>
<InvoiceLine>
<LineExtensionAmount currencyID="EUR" decimals="" symbol="€">100.00</LineExtensionAmount>
<Item>
<Description short=" " long=" ">Item description</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify empty attributes are preserved
expect(xmlString).toMatch(/title\s*=\s*["'](\s*)["']/);
expect(xmlString).toMatch(/language\s*=\s*["'](\s*)["']/);
// Whitespace handling may vary
expect(xmlString).toContain('schemeID=');
expect(xmlString).toContain('reference=');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('whitespace-attributes', elapsed);
});
t.test('Numeric and boolean attribute values', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-NUMERIC-001</ID>
<AllowanceCharge>
<ChargeIndicator>true</ChargeIndicator>
<SequenceNumeric>1</SequenceNumeric>
<Amount currencyID="EUR" decimals="2" precision="0.01">19.99</Amount>
<BaseAmount currencyID="EUR" percentage="19.5" factor="0.195">100.00</BaseAmount>
</AllowanceCharge>
<TaxTotal>
<TaxAmount currencyID="EUR" rate="19" rateType="percent">19.00</TaxAmount>
<TaxSubtotal>
<TaxableAmount currencyID="EUR" rounded="false">100.00</TaxableAmount>
<TaxCategory>
<ID>S</ID>
<Percent format="decimal">19.0</Percent>
<TaxExemptionReason code="0" active="true">Not exempt</TaxExemptionReason>
</TaxCategory>
</TaxSubtotal>
</TaxTotal>
<InvoiceLine>
<ID sequence="001" index="0">1</ID>
<InvoicedQuantity unitCode="C62" value="10.0" isInteger="true">10</InvoicedQuantity>
<Price>
<PriceAmount currencyID="EUR" negative="false">10.00</PriceAmount>
<BaseQuantity unitCode="C62" default="1">1</BaseQuantity>
</Price>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify numeric and boolean attributes
expect(xmlString).toMatch(/decimals\s*=\s*["']2["']/);
expect(xmlString).toMatch(/precision\s*=\s*["']0\.01["']/);
expect(xmlString).toMatch(/percentage\s*=\s*["']19\.5["']/);
expect(xmlString).toMatch(/factor\s*=\s*["']0\.195["']/);
expect(xmlString).toMatch(/rate\s*=\s*["']19["']/);
expect(xmlString).toMatch(/rounded\s*=\s*["']false["']/);
expect(xmlString).toMatch(/active\s*=\s*["']true["']/);
expect(xmlString).toMatch(/sequence\s*=\s*["']001["']/);
expect(xmlString).toMatch(/index\s*=\s*["']0["']/);
expect(xmlString).toMatch(/isInteger\s*=\s*["']true["']/);
expect(xmlString).toMatch(/negative\s*=\s*["']false["']/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('numeric-boolean-attributes', elapsed);
});
t.test('Namespace-prefixed attributes', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice
xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:ds="http://www.w3.org/2000/09/xmldsig#"
xsi:schemaLocation="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2 Invoice.xsd">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-NS-PREFIX-001</ID>
<ProfileID xsi:type="string">urn:fdc:peppol.eu:2017:poacc:billing:01:1.0</ProfileID>
<AdditionalDocumentReference>
<ID>DOC-001</ID>
<Attachment>
<ExternalReference>
<URI xlink:type="simple" xlink:href="http://example.com/doc.pdf" xlink:title="Invoice Documentation">http://example.com/doc.pdf</URI>
</ExternalReference>
<EmbeddedDocumentBinaryObject
mimeCode="application/pdf"
encodingCode="base64"
filename="invoice.pdf"
ds:algorithm="SHA256">
JVBERi0xLjQKJeLjz9MKNCAwIG9iago=
</EmbeddedDocumentBinaryObject>
</Attachment>
</AdditionalDocumentReference>
<Signature>
<ID>SIG-001</ID>
<SignatureMethod ds:Algorithm="http://www.w3.org/2001/04/xmldsig-more#rsa-sha256">RSA-SHA256</SignatureMethod>
</Signature>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify namespace-prefixed attributes
expect(xmlString).toContain('xsi:schemaLocation=');
expect(xmlString).toContain('xsi:type=');
expect(xmlString).toContain('xlink:type=');
expect(xmlString).toContain('xlink:href=');
expect(xmlString).toContain('xlink:title=');
expect(xmlString).toContain('ds:algorithm=');
expect(xmlString).toContain('ds:Algorithm=');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('ns-prefixed-attributes', elapsed);
});
t.test('Corpus attribute analysis', async () => {
const startTime = performance.now();
let processedCount = 0;
const attributeStats = {
totalAttributes: 0,
escapedAttributes: 0,
unicodeAttributes: 0,
numericAttributes: 0,
emptyAttributes: 0,
commonAttributes: new Map<string, number>()
};
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Analyze attribute usage in corpus
const sampleSize = Math.min(80, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
let xmlString: string;
if (Buffer.isBuffer(content)) {
xmlString = content.toString('utf8');
} else {
xmlString = content;
}
// Count attributes
const attrMatches = xmlString.match(/\s(\w+(?::\w+)?)\s*=\s*["'][^"']*["']/g);
if (attrMatches) {
attributeStats.totalAttributes += attrMatches.length;
attrMatches.forEach(attr => {
// Check for escaped content
if (attr.includes('&amp;') || attr.includes('&lt;') || attr.includes('&gt;') ||
attr.includes('&quot;') || attr.includes('&apos;')) {
attributeStats.escapedAttributes++;
}
// Check for Unicode
if (/[^\x00-\x7F]/.test(attr)) {
attributeStats.unicodeAttributes++;
}
// Check for numeric values
if (/=\s*["']\d+(?:\.\d+)?["']/.test(attr)) {
attributeStats.numericAttributes++;
}
// Check for empty values
if (/=\s*["']\s*["']/.test(attr)) {
attributeStats.emptyAttributes++;
}
// Extract attribute name
const nameMatch = attr.match(/(\w+(?::\w+)?)\s*=/);
if (nameMatch) {
const attrName = nameMatch[1];
attributeStats.commonAttributes.set(
attrName,
(attributeStats.commonAttributes.get(attrName) || 0) + 1
);
}
});
}
processedCount++;
} catch (error) {
console.log(`Attribute parsing issue in ${file}:`, error.message);
}
}
console.log(`Attribute corpus analysis (${processedCount} files):`);
console.log(`- Total attributes: ${attributeStats.totalAttributes}`);
console.log(`- Escaped attributes: ${attributeStats.escapedAttributes}`);
console.log(`- Unicode attributes: ${attributeStats.unicodeAttributes}`);
console.log(`- Numeric attributes: ${attributeStats.numericAttributes}`);
console.log(`- Empty attributes: ${attributeStats.emptyAttributes}`);
const topAttributes = Array.from(attributeStats.commonAttributes.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 10);
console.log('Top 10 attribute names:', topAttributes);
expect(processedCount).toBeGreaterThan(0);
expect(attributeStats.totalAttributes).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-attributes', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(120); // Attribute operations should be reasonably fast
});
tap.start();