Files
smartsitemap/ts/smartsitemap.classes.xmlrenderer.ts

295 lines
8.3 KiB
TypeScript

import * as plugins from './smartsitemap.plugins.js';
import type * as interfaces from './interfaces/index.js';
// Sitemap XML namespace constants
const NS_SITEMAP = 'http://www.sitemaps.org/schemas/sitemap/0.9';
const NS_IMAGE = 'http://www.google.com/schemas/sitemap-image/1.1';
const NS_VIDEO = 'http://www.google.com/schemas/sitemap-video/1.1';
const NS_NEWS = 'http://www.google.com/schemas/sitemap-news/0.9';
const NS_XHTML = 'http://www.w3.org/1999/xhtml';
/**
* Handles all XML generation for sitemaps.
* Supports proper escaping, namespace detection, date formatting,
* XSL stylesheet references, and pretty printing.
*/
export class XmlRenderer {
/**
* Escape a string for use in XML content.
* Handles the 5 XML special characters.
*/
static escapeXml(str: string): string {
return str
.replace(/&/g, '&')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&apos;');
}
/**
* Format a date value (Date, ISO string, or Unix timestamp in ms)
* to W3C Datetime format suitable for sitemaps.
*/
static formatDate(date: Date | string | number): string {
if (date instanceof Date) {
return date.toISOString();
}
if (typeof date === 'number') {
return new Date(date).toISOString();
}
// Already a string — validate it parses
const parsed = new Date(date);
if (isNaN(parsed.getTime())) {
return date; // Return as-is if unparseable
}
return parsed.toISOString();
}
/**
* Detect which XML namespaces are needed based on URL entries.
*/
static detectNamespaces(urls: interfaces.ISitemapUrl[]): Record<string, string> {
const ns: Record<string, string> = {
'@_xmlns': NS_SITEMAP,
};
for (const url of urls) {
if (url.images && url.images.length > 0) {
ns['@_xmlns:image'] = NS_IMAGE;
}
if (url.videos && url.videos.length > 0) {
ns['@_xmlns:video'] = NS_VIDEO;
}
if (url.news) {
ns['@_xmlns:news'] = NS_NEWS;
}
if (url.alternates && url.alternates.length > 0) {
ns['@_xmlns:xhtml'] = NS_XHTML;
}
}
return ns;
}
/**
* Render a URL array to sitemap XML string.
*/
static renderUrlset(urls: interfaces.ISitemapUrl[], options?: interfaces.ISitemapOptions): string {
const namespaces = XmlRenderer.detectNamespaces(urls);
const urlElements = urls.map((url) => XmlRenderer.buildUrlElement(url, options));
const xmlObj: any = {
urlset: {
...namespaces,
url: urlElements,
},
};
const smartXml = new plugins.smartxml.SmartXml();
let xml = smartXml.createXmlFromObject(xmlObj);
// Insert XSL stylesheet processing instruction if specified
if (options?.xslUrl) {
xml = XmlRenderer.insertXslInstruction(xml, options.xslUrl);
}
return xml;
}
/**
* Render a sitemap index XML string.
*/
static renderIndex(entries: interfaces.ISitemapIndexEntry[], options?: interfaces.ISitemapOptions): string {
const sitemapElements = entries.map((entry) => {
const el: any = {
loc: XmlRenderer.escapeXml(entry.loc),
};
if (entry.lastmod != null) {
el.lastmod = XmlRenderer.formatDate(entry.lastmod);
}
return el;
});
const xmlObj: any = {
sitemapindex: {
'@_xmlns': NS_SITEMAP,
sitemap: sitemapElements,
},
};
const smartXml = new plugins.smartxml.SmartXml();
let xml = smartXml.createXmlFromObject(xmlObj);
if (options?.xslUrl) {
xml = XmlRenderer.insertXslInstruction(xml, options.xslUrl);
}
return xml;
}
/**
* Render URLs as plain text (one URL per line).
*/
static renderTxt(urls: interfaces.ISitemapUrl[]): string {
return urls.map((u) => u.loc).join('\n');
}
/**
* Render URLs as JSON.
*/
static renderJson(urls: interfaces.ISitemapUrl[]): string {
return JSON.stringify(urls, null, 2);
}
/**
* Build a single <url> element object for use with smartxml.
*/
private static buildUrlElement(url: interfaces.ISitemapUrl, options?: interfaces.ISitemapOptions): any {
const el: any = {
loc: XmlRenderer.escapeXml(url.loc),
};
// lastmod
if (url.lastmod != null) {
el.lastmod = XmlRenderer.formatDate(url.lastmod);
}
// changefreq (use default if not specified)
const changefreq = url.changefreq ?? options?.defaultChangeFreq;
if (changefreq) {
el.changefreq = changefreq;
}
// priority (use default if not specified)
const priority = url.priority ?? options?.defaultPriority;
if (priority != null) {
el.priority = priority.toFixed(1);
}
// Image extension
if (url.images && url.images.length > 0) {
el['image:image'] = url.images.map((img) => XmlRenderer.buildImageElement(img));
}
// Video extension
if (url.videos && url.videos.length > 0) {
el['video:video'] = url.videos.map((vid) => XmlRenderer.buildVideoElement(vid));
}
// News extension
if (url.news) {
el['news:news'] = XmlRenderer.buildNewsElement(url.news);
}
// hreflang alternates
if (url.alternates && url.alternates.length > 0) {
el['xhtml:link'] = url.alternates.map((alt) => ({
'@_rel': 'alternate',
'@_hreflang': alt.hreflang,
'@_href': XmlRenderer.escapeXml(alt.href),
}));
}
return el;
}
/**
* Build an <image:image> element object.
*/
private static buildImageElement(img: interfaces.ISitemapImage): any {
const el: any = {
'image:loc': XmlRenderer.escapeXml(img.loc),
};
if (img.caption) {
el['image:caption'] = XmlRenderer.escapeXml(img.caption);
}
if (img.title) {
el['image:title'] = XmlRenderer.escapeXml(img.title);
}
if (img.geoLocation) {
el['image:geo_location'] = XmlRenderer.escapeXml(img.geoLocation);
}
if (img.licenseUrl) {
el['image:license'] = XmlRenderer.escapeXml(img.licenseUrl);
}
return el;
}
/**
* Build a <video:video> element object.
*/
private static buildVideoElement(vid: interfaces.ISitemapVideo): any {
const el: any = {
'video:thumbnail_loc': XmlRenderer.escapeXml(vid.thumbnailLoc),
'video:title': XmlRenderer.escapeXml(vid.title),
'video:description': XmlRenderer.escapeXml(vid.description),
};
if (vid.contentLoc) {
el['video:content_loc'] = XmlRenderer.escapeXml(vid.contentLoc);
}
if (vid.playerLoc) {
el['video:player_loc'] = XmlRenderer.escapeXml(vid.playerLoc);
}
if (vid.duration != null) {
el['video:duration'] = vid.duration;
}
if (vid.rating != null) {
el['video:rating'] = vid.rating;
}
if (vid.viewCount != null) {
el['video:view_count'] = vid.viewCount;
}
if (vid.publicationDate != null) {
el['video:publication_date'] = XmlRenderer.formatDate(vid.publicationDate);
}
if (vid.familyFriendly != null) {
el['video:family_friendly'] = vid.familyFriendly ? 'yes' : 'no';
}
if (vid.tags && vid.tags.length > 0) {
el['video:tag'] = vid.tags;
}
if (vid.live != null) {
el['video:live'] = vid.live ? 'yes' : 'no';
}
if (vid.requiresSubscription != null) {
el['video:requires_subscription'] = vid.requiresSubscription ? 'yes' : 'no';
}
return el;
}
/**
* Build a <news:news> element object.
*/
private static buildNewsElement(news: interfaces.ISitemapNews): any {
const el: any = {
'news:publication': {
'news:name': XmlRenderer.escapeXml(news.publication.name),
'news:language': news.publication.language,
},
'news:publication_date': XmlRenderer.formatDate(news.publicationDate),
'news:title': XmlRenderer.escapeXml(news.title),
};
if (news.keywords) {
const kw = Array.isArray(news.keywords) ? news.keywords.join(', ') : news.keywords;
el['news:keywords'] = XmlRenderer.escapeXml(kw);
}
return el;
}
/**
* Insert an XSL stylesheet processing instruction after the XML declaration.
*/
private static insertXslInstruction(xml: string, xslUrl: string): string {
const pi = `<?xml-stylesheet type="text/xsl" href="${XmlRenderer.escapeXml(xslUrl)}"?>`;
return xml.replace(
'<?xml version="1.0" encoding="UTF-8"?>',
`<?xml version="1.0" encoding="UTF-8"?>\n${pi}`,
);
}
}