275 lines
9.2 KiB
TypeScript
275 lines
9.2 KiB
TypeScript
import * as plugins from './smartsitemap.plugins.js';
|
|
import type * as interfaces from './interfaces/index.js';
|
|
import { XmlRenderer } from './smartsitemap.classes.xmlrenderer.js';
|
|
import { SitemapValidator } from './smartsitemap.classes.validator.js';
|
|
import { FeedImporter } from './smartsitemap.classes.feedimporter.js';
|
|
import { YamlImporter } from './smartsitemap.classes.yamlimporter.js';
|
|
import type { SitemapStream } from './smartsitemap.classes.sitemapstream.js';
|
|
|
|
/**
|
|
* Chainable builder for creating standard XML sitemaps (<urlset>).
|
|
* Every mutating method returns `this` for fluent chaining.
|
|
*
|
|
* Supports all sitemap extensions (images, videos, news, hreflang),
|
|
* auto-splitting at 50K URLs, multiple output formats, and validation.
|
|
*/
|
|
export class UrlsetBuilder {
|
|
protected urls: interfaces.ISitemapUrl[] = [];
|
|
protected options: interfaces.ISitemapOptions;
|
|
|
|
constructor(options?: interfaces.ISitemapOptions) {
|
|
this.options = {
|
|
prettyPrint: true,
|
|
maxUrlsPerSitemap: 50000,
|
|
validate: true,
|
|
...options,
|
|
};
|
|
}
|
|
|
|
// ──────────────────────────────────────────────
|
|
// Adding URLs
|
|
// ──────────────────────────────────────────────
|
|
|
|
/** Add a single URL with full options */
|
|
add(url: interfaces.ISitemapUrl): this {
|
|
this.urls.push(url);
|
|
return this;
|
|
}
|
|
|
|
/** Add a URL by loc string, optionally with lastmod */
|
|
addUrl(loc: string, lastmod?: Date | string | number): this {
|
|
const url: interfaces.ISitemapUrl = { loc };
|
|
if (lastmod != null) {
|
|
url.lastmod = lastmod;
|
|
}
|
|
this.urls.push(url);
|
|
return this;
|
|
}
|
|
|
|
/** Add multiple URL objects */
|
|
addUrls(urls: interfaces.ISitemapUrl[]): this {
|
|
this.urls.push(...urls);
|
|
return this;
|
|
}
|
|
|
|
/** Add URLs from a plain string array */
|
|
addFromArray(locs: string[]): this {
|
|
for (const loc of locs) {
|
|
this.urls.push({ loc });
|
|
}
|
|
return this;
|
|
}
|
|
|
|
// ──────────────────────────────────────────────
|
|
// Bulk operations
|
|
// ──────────────────────────────────────────────
|
|
|
|
/** Merge all URLs from another UrlsetBuilder */
|
|
merge(other: UrlsetBuilder): this {
|
|
this.urls.push(...other.getUrls());
|
|
return this;
|
|
}
|
|
|
|
/** Filter URLs by predicate (in-place) */
|
|
filter(predicate: (url: interfaces.ISitemapUrl) => boolean): this {
|
|
this.urls = this.urls.filter(predicate);
|
|
return this;
|
|
}
|
|
|
|
/** Transform URLs (in-place) */
|
|
map(transform: (url: interfaces.ISitemapUrl) => interfaces.ISitemapUrl): this {
|
|
this.urls = this.urls.map(transform);
|
|
return this;
|
|
}
|
|
|
|
/** Sort URLs (in-place) */
|
|
sort(compareFn?: (a: interfaces.ISitemapUrl, b: interfaces.ISitemapUrl) => number): this {
|
|
this.urls.sort(compareFn ?? ((a, b) => a.loc.localeCompare(b.loc)));
|
|
return this;
|
|
}
|
|
|
|
/** Remove duplicate URLs by loc */
|
|
dedupe(): this {
|
|
const seen = new Set<string>();
|
|
this.urls = this.urls.filter((url) => {
|
|
if (seen.has(url.loc)) return false;
|
|
seen.add(url.loc);
|
|
return true;
|
|
});
|
|
return this;
|
|
}
|
|
|
|
// ──────────────────────────────────────────────
|
|
// Defaults
|
|
// ──────────────────────────────────────────────
|
|
|
|
/** Set default changefreq for URLs that don't specify one */
|
|
setDefaultChangeFreq(freq: interfaces.TChangeFreq): this {
|
|
this.options.defaultChangeFreq = freq;
|
|
return this;
|
|
}
|
|
|
|
/** Set default priority for URLs that don't specify one */
|
|
setDefaultPriority(priority: number): this {
|
|
this.options.defaultPriority = priority;
|
|
return this;
|
|
}
|
|
|
|
/** Set XSL stylesheet URL for browser rendering */
|
|
setXslUrl(url: string): this {
|
|
this.options.xslUrl = url;
|
|
return this;
|
|
}
|
|
|
|
// ──────────────────────────────────────────────
|
|
// Import sources (async, return Promise<this>)
|
|
// ──────────────────────────────────────────────
|
|
|
|
/** Import URLs from an RSS/Atom feed URL */
|
|
async importFromFeedUrl(feedUrl: string, options?: interfaces.IFeedImportOptions): Promise<this> {
|
|
const imported = await FeedImporter.fromUrl(feedUrl, options);
|
|
this.urls.push(...imported);
|
|
return this;
|
|
}
|
|
|
|
/** Import URLs from an RSS/Atom feed XML string */
|
|
async importFromFeedString(feedXml: string, options?: interfaces.IFeedImportOptions): Promise<this> {
|
|
const imported = await FeedImporter.fromString(feedXml, options);
|
|
this.urls.push(...imported);
|
|
return this;
|
|
}
|
|
|
|
/** Import URLs from a YAML config string */
|
|
async importFromYaml(yamlString: string): Promise<this> {
|
|
const imported = await YamlImporter.parseConfig(yamlString);
|
|
this.urls.push(...imported);
|
|
return this;
|
|
}
|
|
|
|
/** Import from @tsclass/tsclass IArticle array */
|
|
importFromArticles(articles: plugins.tsclass.content.IArticle[]): this {
|
|
for (const article of articles) {
|
|
const url: interfaces.ISitemapUrl = {
|
|
loc: article.url,
|
|
lastmod: article.timestamp ? new Date(article.timestamp) : undefined,
|
|
};
|
|
this.urls.push(url);
|
|
}
|
|
return this;
|
|
}
|
|
|
|
// ──────────────────────────────────────────────
|
|
// Output
|
|
// ──────────────────────────────────────────────
|
|
|
|
/** Export as sitemap XML string */
|
|
toXml(): string {
|
|
return XmlRenderer.renderUrlset(this.urls, this.options);
|
|
}
|
|
|
|
/** Export as plain text (one URL per line) */
|
|
toTxt(): string {
|
|
return XmlRenderer.renderTxt(this.urls);
|
|
}
|
|
|
|
/** Export as JSON string */
|
|
toJson(): string {
|
|
return XmlRenderer.renderJson(this.urls);
|
|
}
|
|
|
|
/** Export as gzipped XML buffer */
|
|
async toGzipBuffer(): Promise<Buffer> {
|
|
const xml = this.toXml();
|
|
const gzip = plugins.promisify(plugins.zlib.gzip);
|
|
return gzip(Buffer.from(xml, 'utf-8')) as Promise<Buffer>;
|
|
}
|
|
|
|
/**
|
|
* Export with automatic index splitting.
|
|
* If URL count exceeds maxUrlsPerSitemap, returns a sitemap index
|
|
* plus individual sitemap chunks.
|
|
*/
|
|
toSitemapSet(): interfaces.ISitemapSet {
|
|
const maxUrls = Math.min(this.options.maxUrlsPerSitemap ?? 50000, 50000);
|
|
|
|
if (this.urls.length <= maxUrls) {
|
|
return {
|
|
needsIndex: false,
|
|
indexXml: null,
|
|
sitemaps: [{ filename: 'sitemap.xml', xml: this.toXml() }],
|
|
};
|
|
}
|
|
|
|
// Split into chunks
|
|
const chunks: interfaces.ISitemapUrl[][] = [];
|
|
for (let i = 0; i < this.urls.length; i += maxUrls) {
|
|
chunks.push(this.urls.slice(i, i + maxUrls));
|
|
}
|
|
|
|
const baseUrl = this.options.baseUrl || '';
|
|
const sitemaps: Array<{ filename: string; xml: string }> = [];
|
|
const indexEntries: Array<{ loc: string; lastmod?: string }> = [];
|
|
|
|
for (let i = 0; i < chunks.length; i++) {
|
|
const filename = `sitemap-${i + 1}.xml`;
|
|
const xml = XmlRenderer.renderUrlset(chunks[i], this.options);
|
|
sitemaps.push({ filename, xml });
|
|
indexEntries.push({
|
|
loc: baseUrl ? `${baseUrl.replace(/\/$/, '')}/${filename}` : filename,
|
|
});
|
|
}
|
|
|
|
const indexXml = XmlRenderer.renderIndex(indexEntries, this.options);
|
|
|
|
return {
|
|
needsIndex: true,
|
|
indexXml,
|
|
sitemaps,
|
|
};
|
|
}
|
|
|
|
/** Create a Node.js Readable stream for large sitemaps */
|
|
toStream(): SitemapStream {
|
|
// Lazy import to avoid circular dependency issues at module level
|
|
const { SitemapStream: SitemapStreamClass } = require('./smartsitemap.classes.sitemapstream.js');
|
|
const stream = new SitemapStreamClass(this.options);
|
|
// Push all URLs into the stream asynchronously
|
|
process.nextTick(() => {
|
|
for (const url of this.urls) {
|
|
stream.pushUrl(url);
|
|
}
|
|
stream.finish();
|
|
});
|
|
return stream;
|
|
}
|
|
|
|
// ──────────────────────────────────────────────
|
|
// Inspection
|
|
// ──────────────────────────────────────────────
|
|
|
|
/** Get the raw URL array */
|
|
getUrls(): interfaces.ISitemapUrl[] {
|
|
return [...this.urls];
|
|
}
|
|
|
|
/** Get the number of URLs */
|
|
get count(): number {
|
|
return this.urls.length;
|
|
}
|
|
|
|
/** Validate this sitemap against the protocol specification */
|
|
validate(): interfaces.IValidationResult {
|
|
return SitemapValidator.validateUrlset(this.urls, this.options);
|
|
}
|
|
|
|
/** Get statistics about this sitemap */
|
|
stats(): interfaces.ISitemapStats {
|
|
return SitemapValidator.computeStats(this.urls, this.options);
|
|
}
|
|
|
|
/** Get the options for this builder */
|
|
getOptions(): interfaces.ISitemapOptions {
|
|
return { ...this.options };
|
|
}
|
|
}
|