einvoice/test/suite/einvoice_encoding/test.enc-07.attribute-encoding.ts
2025-05-25 19:45:37 +00:00

460 lines
18 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { expect, tap } from '@git.zone/tstest/tapbundle';
import * as plugins from '../plugins.js';
import { EInvoice } from '../../../ts/index.js';
import { CorpusLoader } from '../corpus.loader.js';
import { PerformanceTracker } from '../performance.tracker.js';
tap.test('ENC-07: Attribute Encoding - should handle XML attribute encoding correctly', async (t) => {
// ENC-07: Verify proper encoding of XML attributes including special chars and quotes
// This test ensures attributes are properly encoded across different scenarios
const performanceTracker = new PerformanceTracker('ENC-07: Attribute Encoding');
const corpusLoader = new CorpusLoader();
t.test('Basic attribute encoding', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID schemeID="INVOICE" schemeAgencyID="6">ATTR-BASIC-001</ID>
<IssueDate>2025-01-25</IssueDate>
<DocumentCurrencyCode listID="ISO4217" listAgencyID="6" listVersionID="2001">EUR</DocumentCurrencyCode>
<TaxTotal>
<TaxAmount currencyID="EUR">19.00</TaxAmount>
<TaxSubtotal>
<TaxCategory>
<ID schemeID="UNCL5305" schemeAgencyID="6">S</ID>
<Percent>19</Percent>
<TaxScheme>
<ID schemeID="UN/ECE 5153" schemeAgencyID="6">VAT</ID>
</TaxScheme>
</TaxCategory>
</TaxSubtotal>
</TaxTotal>
<InvoiceLine>
<ID>1</ID>
<InvoicedQuantity unitCode="C62" unitCodeListID="UNECERec20">10</InvoicedQuantity>
<LineExtensionAmount currencyID="EUR">100.00</LineExtensionAmount>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify basic attributes are preserved
expect(xmlString).toMatch(/schemeID\s*=\s*["']INVOICE["']/);
expect(xmlString).toMatch(/schemeAgencyID\s*=\s*["']6["']/);
expect(xmlString).toMatch(/listID\s*=\s*["']ISO4217["']/);
expect(xmlString).toMatch(/listVersionID\s*=\s*["']2001["']/);
expect(xmlString).toMatch(/currencyID\s*=\s*["']EUR["']/);
expect(xmlString).toMatch(/unitCode\s*=\s*["']C62["']/);
expect(xmlString).toMatch(/unitCodeListID\s*=\s*["']UNECERec20["']/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('basic-attributes', elapsed);
});
t.test('Attributes with special characters', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-SPECIAL-001</ID>
<Note languageID="de-DE" encoding="UTF-8">Rechnung für Bücher &amp; Zeitschriften</Note>
<PaymentMeans>
<PaymentMeansCode name="Überweisung (Bank &amp; SEPA)">30</PaymentMeansCode>
<PaymentID reference="Order &lt;2025-001&gt;">PAY-123</PaymentID>
<PayeeFinancialAccount>
<Name type="IBAN &amp; BIC">DE89 3704 0044 0532 0130 00</Name>
<FinancialInstitutionBranch>
<Name branch="München &quot;Zentrum&quot;">Sparkasse</Name>
</FinancialInstitutionBranch>
</PayeeFinancialAccount>
</PaymentMeans>
<AllowanceCharge>
<ChargeIndicator>false</ChargeIndicator>
<AllowanceChargeReason code="95" description="Discount for &gt; 100€ orders">Volume discount</AllowanceChargeReason>
<Amount currencyID="EUR" percentage="5%" calculation="100 * 0.05">5.00</Amount>
</AllowanceCharge>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify special characters in attributes are properly escaped
expect(xmlString).toMatch(/name\s*=\s*["']Überweisung \(Bank &amp; SEPA\)["']/);
expect(xmlString).toMatch(/reference\s*=\s*["']Order &lt;2025-001&gt;["']/);
expect(xmlString).toMatch(/type\s*=\s*["']IBAN &amp; BIC["']/);
expect(xmlString).toMatch(/branch\s*=\s*["']München (&quot;|")Zentrum(&quot;|")["']/);
expect(xmlString).toMatch(/description\s*=\s*["']Discount for &gt; 100€ orders["']/);
expect(xmlString).toMatch(/percentage\s*=\s*["']5%["']/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('special-char-attributes', elapsed);
});
t.test('Quote handling in attributes', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-QUOTES-001</ID>
<Note title='Single quotes with "double quotes" inside'>Test note</Note>
<AdditionalDocumentReference>
<ID description="Product &quot;Premium&quot; edition">DOC-001</ID>
<DocumentDescription title="User's guide">Manual for "advanced" users</DocumentDescription>
<Attachment>
<ExternalReference>
<URI scheme="http" description='Link to "official" site'>http://example.com/doc?id=123&amp;type="pdf"</URI>
</ExternalReference>
</Attachment>
</AdditionalDocumentReference>
<InvoiceLine>
<Item>
<Name type='"Special" product'>Item with quotes</Name>
<Description note="Contains both 'single' and &quot;double&quot; quotes">Complex quoting test</Description>
<AdditionalItemProperty>
<Name>Quote test</Name>
<Value type="text" format='He said: "It\'s working!"'>Quoted value</Value>
</AdditionalItemProperty>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify quote handling - implementation may use different strategies
// Either escape quotes or switch quote style
expect(xmlString).toBeTruthy();
// Should contain the attribute values somehow
expect(xmlString).toMatch(/Single quotes with .*double quotes.* inside/);
expect(xmlString).toMatch(/Product .*Premium.* edition/);
expect(xmlString).toMatch(/User.*s guide/);
expect(xmlString).toMatch(/Special.*product/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('quote-attributes', elapsed);
});
t.test('International characters in attributes', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-INTL-001</ID>
<Note languageID="multi" region="Europa/歐洲/यूरोप">International attributes</Note>
<AccountingSupplierParty>
<Party>
<PartyName>
<Name tradingName="Société Générale" localName="ソシエテ・ジェネラル">SG Group</Name>
</PartyName>
<PostalAddress>
<StreetName type="Avenue/大道/एवेन्यू">Champs-Élysées</StreetName>
<CityName region="Île-de-France">Paris</CityName>
<Country>
<IdentificationCode listName="ISO 3166-1 α2">FR</IdentificationCode>
<Name language="fr-FR">République française</Name>
</Country>
</PostalAddress>
</Party>
</AccountingSupplierParty>
<PaymentTerms>
<Note terms="30 días/天/दिन" currency="€/¥/₹">Multi-currency payment</Note>
</PaymentTerms>
<InvoiceLine>
<Item>
<Name category="Bücher/书籍/पुस्तकें">International Books</Name>
<Description author="François Müller (佛朗索瓦·穆勒)">Multilingual content</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify international characters in attributes
expect(xmlString).toContain('Europa/歐洲/यूरोप');
expect(xmlString).toContain('Société Générale');
expect(xmlString).toContain('ソシエテ・ジェネラル');
expect(xmlString).toContain('Avenue/大道/एवेन्यू');
expect(xmlString).toContain('Île-de-France');
expect(xmlString).toContain('α2'); // Greek alpha
expect(xmlString).toContain('République française');
expect(xmlString).toContain('30 días/天/दिन');
expect(xmlString).toContain('€/¥/₹');
expect(xmlString).toContain('Bücher/书籍/पुस्तकें');
expect(xmlString).toContain('佛朗索瓦·穆勒');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('intl-attributes', elapsed);
});
t.test('Empty and whitespace attributes', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-WHITESPACE-001</ID>
<Note title="" language="">Empty attributes</Note>
<DocumentReference>
<ID schemeID=" " schemeAgencyID=" ">REF-001</ID>
<DocumentDescription prefix=" " suffix=" "> Trimmed content </DocumentDescription>
</DocumentReference>
<PaymentMeans>
<PaymentID reference="
multiline
reference
">PAY-001</PaymentID>
<InstructionNote format=" preserved spaces ">Note with spaces</InstructionNote>
</PaymentMeans>
<InvoiceLine>
<LineExtensionAmount currencyID="EUR" decimals="" symbol="€">100.00</LineExtensionAmount>
<Item>
<Description short=" " long=" ">Item description</Description>
</Item>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify empty attributes are preserved
expect(xmlString).toMatch(/title\s*=\s*["'](\s*)["']/);
expect(xmlString).toMatch(/language\s*=\s*["'](\s*)["']/);
// Whitespace handling may vary
expect(xmlString).toContain('schemeID=');
expect(xmlString).toContain('reference=');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('whitespace-attributes', elapsed);
});
t.test('Numeric and boolean attribute values', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-NUMERIC-001</ID>
<AllowanceCharge>
<ChargeIndicator>true</ChargeIndicator>
<SequenceNumeric>1</SequenceNumeric>
<Amount currencyID="EUR" decimals="2" precision="0.01">19.99</Amount>
<BaseAmount currencyID="EUR" percentage="19.5" factor="0.195">100.00</BaseAmount>
</AllowanceCharge>
<TaxTotal>
<TaxAmount currencyID="EUR" rate="19" rateType="percent">19.00</TaxAmount>
<TaxSubtotal>
<TaxableAmount currencyID="EUR" rounded="false">100.00</TaxableAmount>
<TaxCategory>
<ID>S</ID>
<Percent format="decimal">19.0</Percent>
<TaxExemptionReason code="0" active="true">Not exempt</TaxExemptionReason>
</TaxCategory>
</TaxSubtotal>
</TaxTotal>
<InvoiceLine>
<ID sequence="001" index="0">1</ID>
<InvoicedQuantity unitCode="C62" value="10.0" isInteger="true">10</InvoicedQuantity>
<Price>
<PriceAmount currencyID="EUR" negative="false">10.00</PriceAmount>
<BaseQuantity unitCode="C62" default="1">1</BaseQuantity>
</Price>
</InvoiceLine>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify numeric and boolean attributes
expect(xmlString).toMatch(/decimals\s*=\s*["']2["']/);
expect(xmlString).toMatch(/precision\s*=\s*["']0\.01["']/);
expect(xmlString).toMatch(/percentage\s*=\s*["']19\.5["']/);
expect(xmlString).toMatch(/factor\s*=\s*["']0\.195["']/);
expect(xmlString).toMatch(/rate\s*=\s*["']19["']/);
expect(xmlString).toMatch(/rounded\s*=\s*["']false["']/);
expect(xmlString).toMatch(/active\s*=\s*["']true["']/);
expect(xmlString).toMatch(/sequence\s*=\s*["']001["']/);
expect(xmlString).toMatch(/index\s*=\s*["']0["']/);
expect(xmlString).toMatch(/isInteger\s*=\s*["']true["']/);
expect(xmlString).toMatch(/negative\s*=\s*["']false["']/);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('numeric-boolean-attributes', elapsed);
});
t.test('Namespace-prefixed attributes', async () => {
const startTime = performance.now();
const xmlContent = `<?xml version="1.0" encoding="UTF-8"?>
<Invoice
xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:ds="http://www.w3.org/2000/09/xmldsig#"
xsi:schemaLocation="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2 Invoice.xsd">
<UBLVersionID>2.1</UBLVersionID>
<ID>ATTR-NS-PREFIX-001</ID>
<ProfileID xsi:type="string">urn:fdc:peppol.eu:2017:poacc:billing:01:1.0</ProfileID>
<AdditionalDocumentReference>
<ID>DOC-001</ID>
<Attachment>
<ExternalReference>
<URI xlink:type="simple" xlink:href="http://example.com/doc.pdf" xlink:title="Invoice Documentation">http://example.com/doc.pdf</URI>
</ExternalReference>
<EmbeddedDocumentBinaryObject
mimeCode="application/pdf"
encodingCode="base64"
filename="invoice.pdf"
ds:algorithm="SHA256">
JVBERi0xLjQKJeLjz9MKNCAwIG9iago=
</EmbeddedDocumentBinaryObject>
</Attachment>
</AdditionalDocumentReference>
<Signature>
<ID>SIG-001</ID>
<SignatureMethod ds:Algorithm="http://www.w3.org/2001/04/xmldsig-more#rsa-sha256">RSA-SHA256</SignatureMethod>
</Signature>
</Invoice>`;
const einvoice = new EInvoice();
await einvoice.loadFromString(xmlContent);
const xmlString = einvoice.getXmlString();
// Verify namespace-prefixed attributes
expect(xmlString).toContain('xsi:schemaLocation=');
expect(xmlString).toContain('xsi:type=');
expect(xmlString).toContain('xlink:type=');
expect(xmlString).toContain('xlink:href=');
expect(xmlString).toContain('xlink:title=');
expect(xmlString).toContain('ds:algorithm=');
expect(xmlString).toContain('ds:Algorithm=');
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('ns-prefixed-attributes', elapsed);
});
t.test('Corpus attribute analysis', async () => {
const startTime = performance.now();
let processedCount = 0;
const attributeStats = {
totalAttributes: 0,
escapedAttributes: 0,
unicodeAttributes: 0,
numericAttributes: 0,
emptyAttributes: 0,
commonAttributes: new Map<string, number>()
};
const files = await corpusLoader.getAllFiles();
const xmlFiles = files.filter(f => f.endsWith('.xml'));
// Analyze attribute usage in corpus
const sampleSize = Math.min(80, xmlFiles.length);
const sample = xmlFiles.slice(0, sampleSize);
for (const file of sample) {
try {
const content = await corpusLoader.readFile(file);
let xmlString: string;
if (Buffer.isBuffer(content)) {
xmlString = content.toString('utf8');
} else {
xmlString = content;
}
// Count attributes
const attrMatches = xmlString.match(/\s(\w+(?::\w+)?)\s*=\s*["'][^"']*["']/g);
if (attrMatches) {
attributeStats.totalAttributes += attrMatches.length;
attrMatches.forEach(attr => {
// Check for escaped content
if (attr.includes('&amp;') || attr.includes('&lt;') || attr.includes('&gt;') ||
attr.includes('&quot;') || attr.includes('&apos;')) {
attributeStats.escapedAttributes++;
}
// Check for Unicode
if (/[^\x00-\x7F]/.test(attr)) {
attributeStats.unicodeAttributes++;
}
// Check for numeric values
if (/=\s*["']\d+(?:\.\d+)?["']/.test(attr)) {
attributeStats.numericAttributes++;
}
// Check for empty values
if (/=\s*["']\s*["']/.test(attr)) {
attributeStats.emptyAttributes++;
}
// Extract attribute name
const nameMatch = attr.match(/(\w+(?::\w+)?)\s*=/);
if (nameMatch) {
const attrName = nameMatch[1];
attributeStats.commonAttributes.set(
attrName,
(attributeStats.commonAttributes.get(attrName) || 0) + 1
);
}
});
}
processedCount++;
} catch (error) {
console.log(`Attribute parsing issue in ${file}:`, error.message);
}
}
console.log(`Attribute corpus analysis (${processedCount} files):`);
console.log(`- Total attributes: ${attributeStats.totalAttributes}`);
console.log(`- Escaped attributes: ${attributeStats.escapedAttributes}`);
console.log(`- Unicode attributes: ${attributeStats.unicodeAttributes}`);
console.log(`- Numeric attributes: ${attributeStats.numericAttributes}`);
console.log(`- Empty attributes: ${attributeStats.emptyAttributes}`);
const topAttributes = Array.from(attributeStats.commonAttributes.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 10);
console.log('Top 10 attribute names:', topAttributes);
expect(processedCount).toBeGreaterThan(0);
expect(attributeStats.totalAttributes).toBeGreaterThan(0);
const elapsed = performance.now() - startTime;
performanceTracker.addMeasurement('corpus-attributes', elapsed);
});
// Print performance summary
performanceTracker.printSummary();
// Performance assertions
const avgTime = performanceTracker.getAverageTime();
expect(avgTime).toBeLessThan(120); // Attribute operations should be reasonably fast
});
tap.start();