BREAKING CHANGE(api): redesign smartsitemap around builder-based sitemap creation, parsing, validation, and import utilities

This commit is contained in:
2026-03-20 14:03:33 +00:00
parent 61f6bcebd4
commit 4e707347dd
22 changed files with 4843 additions and 2196 deletions

View File

@@ -1,5 +1,12 @@
# Changelog
## 2026-03-20 - 4.0.0 - BREAKING CHANGE(api)
redesign smartsitemap around builder-based sitemap creation, parsing, validation, and import utilities
- Replace the previous SmartSitemap instance API and legacy sitemap classes with exported builder, parser, renderer, validator, stream, feed importer, and YAML importer modules
- Add support for sitemap indexes, news/image/video/hreflang extensions, auto-splitting, gzip and stream output, and structured validation and statistics
- Expand public TypeScript interfaces and package metadata to reflect the broader sitemap feature set
## 2025-11-19 - 2.0.4 - fix(smartsitemap)
Update CI configuration, bump dependencies, and apply small code cleanups

View File

@@ -1,34 +1,45 @@
{
"gitzone": {
"@git.zone/cli": {
"projectType": "npm",
"module": {
"githost": "code.foss.global",
"gitscope": "push.rocks",
"gitrepo": "smartsitemap",
"description": "A module for generating and managing sitemaps, supporting dynamic sitemap generation from feeds.",
"description": "A comprehensive TypeScript sitemap library with builder API, supporting standard, news, image, video, and hreflang sitemaps with auto-splitting, streaming, validation, and RSS feed integration.",
"npmPackagename": "@push.rocks/smartsitemap",
"license": "MIT",
"projectDomain": "push.rocks",
"keywords": [
"sitemap",
"sitemap generator",
"RSS feeds",
"news sitemap",
"sitemap index",
"XML sitemap",
"website indexing",
"search engine optimization",
"news sitemap",
"image sitemap",
"video sitemap",
"hreflang",
"RSS feeds",
"SEO",
"web crawling",
"dynamic sitemap creation",
"search engine optimization",
"builder API",
"streaming",
"validation",
"TypeScript",
"node.js"
]
},
"release": {
"registries": [
"https://verdaccio.lossless.digital",
"https://registry.npmjs.org"
],
"accessLevel": "public"
}
},
"npmci": {
"npmGlobalTools": [],
"npmAccessLevel": "public"
},
"tsdoc": {
"@git.zone/tsdoc": {
"legal": "\n## License and Legal Information\n\nThis repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository. \n\n**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.\n\n### Trademarks\n\nThis project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH.\n\n### Company Information\n\nTask Venture Capital GmbH \nRegistered at District court Bremen HRB 35230 HB, Germany\n\nFor any legal inquiries or if you require further information, please contact us via email at hello@task.vc.\n\nBy using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.\n"
},
"@ship.zone/szci": {
"npmGlobalTools": []
}
}
}

View File

@@ -1,8 +1,8 @@
{
"name": "@push.rocks/smartsitemap",
"version": "2.0.4",
"version": "3.0.0",
"private": false,
"description": "A module for generating and managing sitemaps, supporting dynamic sitemap generation from feeds.",
"description": "A comprehensive TypeScript sitemap library with builder API, supporting standard, news, image, video, and hreflang sitemaps with auto-splitting, streaming, validation, and RSS feed integration.",
"main": "dist_ts/index.js",
"typings": "dist_ts/index.d.ts",
"type": "module",
@@ -14,21 +14,20 @@
"buildDocs": "tsdoc"
},
"devDependencies": {
"@git.zone/tsbuild": "^3.1.0",
"@git.zone/tsbundle": "^2.0.8",
"@git.zone/tsrun": "^2.0.0",
"@git.zone/tstest": "^2.8.2",
"@git.zone/tsbuild": "^4.3.0",
"@git.zone/tsbundle": "^2.9.1",
"@git.zone/tsrun": "^2.0.1",
"@git.zone/tstest": "^3.5.0",
"@push.rocks/smartenv": "^6.0.0",
"@push.rocks/tapbundle": "^6.0.3",
"@types/node": "^20.8.7"
"@types/node": "^22.0.0"
},
"dependencies": {
"@push.rocks/smartcache": "^1.0.16",
"@push.rocks/smartfeed": "^1.0.11",
"@push.rocks/smartxml": "^2.0.0",
"@push.rocks/smartyaml": "^3.0.4",
"@push.rocks/webrequest": "^4.0.1",
"@tsclass/tsclass": "^9.3.0"
"@push.rocks/webrequest": "^4.0.5",
"@tsclass/tsclass": "^9.5.0"
},
"browserslist": [
"last 1 chrome versions"
@@ -46,15 +45,20 @@
"readme.md"
],
"keywords": [
"sitemap",
"sitemap generator",
"RSS feeds",
"news sitemap",
"sitemap index",
"XML sitemap",
"website indexing",
"search engine optimization",
"news sitemap",
"image sitemap",
"video sitemap",
"hreflang",
"RSS feeds",
"SEO",
"web crawling",
"dynamic sitemap creation",
"search engine optimization",
"builder API",
"streaming",
"validation",
"TypeScript",
"node.js"
],

3006
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

1127
readme.md

File diff suppressed because it is too large Load Diff

View File

@@ -1,25 +1,484 @@
import { expect, tap } from '@push.rocks/tapbundle';
import * as smartsitemap from '../ts/index.js';
let testSmartsitemap: smartsitemap.SmartSitemap;
// ──────────────────────────────────────────────
// Basic builder tests
// ──────────────────────────────────────────────
tap.test('should create an instance of Smartsitemap', async () => {
testSmartsitemap = new smartsitemap.SmartSitemap();
expect(testSmartsitemap).toBeInstanceOf(smartsitemap.SmartSitemap);
tap.test('SmartSitemap.create() should return a UrlsetBuilder', async () => {
const builder = smartsitemap.SmartSitemap.create();
expect(builder).toBeInstanceOf(smartsitemap.UrlsetBuilder);
});
tap.test('should create a sitemap from feed', async () => {
const sitemapString = await testSmartsitemap.createSitemapNewsFromFeedUrl(
'https://coffee.link/rss/',
);
console.log(sitemapString);
tap.test('should create a basic sitemap with addUrl()', async () => {
const xml = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/')
.addUrl('https://example.com/about')
.addUrl('https://example.com/blog')
.toXml();
expect(xml).toInclude('<?xml version="1.0" encoding="UTF-8"?>');
expect(xml).toInclude('<urlset');
expect(xml).toInclude('xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"');
expect(xml).toInclude('<loc>https://example.com/</loc>');
expect(xml).toInclude('<loc>https://example.com/about</loc>');
expect(xml).toInclude('<loc>https://example.com/blog</loc>');
});
tap.test('should parse a sitemap', async () => {
const result = await testSmartsitemap.parseSitemapUrl(
'https://www.theverge.com/sitemaps/google_news',
);
// console.log(result.urlset.url);
tap.test('should apply default changefreq and priority', async () => {
const xml = smartsitemap.SmartSitemap.create()
.setDefaultChangeFreq('weekly')
.setDefaultPriority(0.5)
.addUrl('https://example.com/')
.toXml();
expect(xml).toInclude('<changefreq>weekly</changefreq>');
expect(xml).toInclude('<priority>0.5</priority>');
});
tap.start();
tap.test('should support per-URL changefreq and priority override', async () => {
const xml = smartsitemap.SmartSitemap.create()
.setDefaultChangeFreq('weekly')
.setDefaultPriority(0.5)
.add({
loc: 'https://example.com/',
changefreq: 'daily',
priority: 1.0,
})
.toXml();
expect(xml).toInclude('<changefreq>daily</changefreq>');
expect(xml).toInclude('<priority>1.0</priority>');
expect(xml).not.toInclude('<changefreq>weekly</changefreq>');
});
tap.test('should support lastmod as Date, string, and number', async () => {
const date = new Date('2025-06-15T00:00:00.000Z');
const xml = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/a', date)
.addUrl('https://example.com/b', '2025-06-15')
.addUrl('https://example.com/c', date.getTime())
.toXml();
expect(xml).toInclude('<lastmod>2025-06-15T00:00:00.000Z</lastmod>');
});
// ──────────────────────────────────────────────
// URL escaping
// ──────────────────────────────────────────────
tap.test('should escape XML special characters in URLs', async () => {
const xml = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/search?q=foo&bar=baz')
.toXml();
expect(xml).toInclude('&amp;');
expect(xml).not.toInclude('&bar=baz');
});
// ──────────────────────────────────────────────
// Image extension
// ──────────────────────────────────────────────
tap.test('should generate image sitemap extension', async () => {
const xml = smartsitemap.SmartSitemap.create()
.add({
loc: 'https://example.com/gallery',
images: [
{ loc: 'https://example.com/img/photo1.jpg', title: 'Photo 1' },
{ loc: 'https://example.com/img/photo2.jpg', caption: 'A nice photo' },
],
})
.toXml();
expect(xml).toInclude('xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"');
expect(xml).toInclude('<image:image>');
expect(xml).toInclude('<image:loc>https://example.com/img/photo1.jpg</image:loc>');
expect(xml).toInclude('<image:title>Photo 1</image:title>');
expect(xml).toInclude('<image:caption>A nice photo</image:caption>');
});
// ──────────────────────────────────────────────
// Video extension
// ──────────────────────────────────────────────
tap.test('should generate video sitemap extension', async () => {
const xml = smartsitemap.SmartSitemap.create()
.add({
loc: 'https://example.com/video-page',
videos: [
{
thumbnailLoc: 'https://example.com/thumb.jpg',
title: 'My Video',
description: 'A great video about testing.',
contentLoc: 'https://example.com/video.mp4',
duration: 120,
rating: 4.5,
},
],
})
.toXml();
expect(xml).toInclude('xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"');
expect(xml).toInclude('<video:video>');
expect(xml).toInclude('<video:thumbnail_loc>https://example.com/thumb.jpg</video:thumbnail_loc>');
expect(xml).toInclude('<video:title>My Video</video:title>');
expect(xml).toInclude('<video:duration>120</video:duration>');
expect(xml).toInclude('<video:rating>4.5</video:rating>');
});
// ──────────────────────────────────────────────
// News extension
// ──────────────────────────────────────────────
tap.test('should generate news sitemap', async () => {
const xml = smartsitemap.SmartSitemap.createNews({
publicationName: 'The Daily Test',
publicationLanguage: 'en',
})
.addNewsUrl(
'https://example.com/news/article-1',
'Breaking: Tests Pass!',
new Date('2025-06-15T12:00:00Z'),
['testing', 'CI'],
)
.toXml();
expect(xml).toInclude('xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"');
expect(xml).toInclude('<news:news>');
expect(xml).toInclude('<news:name>The Daily Test</news:name>');
expect(xml).toInclude('<news:language>en</news:language>');
expect(xml).toInclude('<news:title>Breaking: Tests Pass!</news:title>');
expect(xml).toInclude('<news:keywords>testing, CI</news:keywords>');
expect(xml).toInclude('<news:publication_date>');
});
// ──────────────────────────────────────────────
// hreflang alternates
// ──────────────────────────────────────────────
tap.test('should generate hreflang alternate links', async () => {
const xml = smartsitemap.SmartSitemap.create()
.add({
loc: 'https://example.com/page',
alternates: [
{ hreflang: 'en', href: 'https://example.com/page' },
{ hreflang: 'de', href: 'https://example.com/de/page' },
{ hreflang: 'x-default', href: 'https://example.com/page' },
],
})
.toXml();
expect(xml).toInclude('xmlns:xhtml="http://www.w3.org/1999/xhtml"');
expect(xml).toInclude('xhtml:link');
});
// ──────────────────────────────────────────────
// XSL stylesheet
// ──────────────────────────────────────────────
tap.test('should include XSL stylesheet processing instruction', async () => {
const xml = smartsitemap.SmartSitemap.create()
.setXslUrl('/sitemap.xsl')
.addUrl('https://example.com/')
.toXml();
expect(xml).toInclude('<?xml-stylesheet type="text/xsl" href="/sitemap.xsl"?>');
});
// ──────────────────────────────────────────────
// Builder operations: fromUrls, merge, dedupe, filter, sort
// ──────────────────────────────────────────────
tap.test('SmartSitemap.fromUrls() should create builder from string array', async () => {
const builder = smartsitemap.SmartSitemap.fromUrls([
'https://example.com/a',
'https://example.com/b',
]);
expect(builder.count).toEqual(2);
});
tap.test('should merge two builders', async () => {
const a = smartsitemap.SmartSitemap.create().addUrl('https://example.com/a');
const b = smartsitemap.SmartSitemap.create().addUrl('https://example.com/b');
const merged = smartsitemap.SmartSitemap.create().merge(a).merge(b);
expect(merged.count).toEqual(2);
});
tap.test('should deduplicate URLs', async () => {
const builder = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/a')
.addUrl('https://example.com/a')
.addUrl('https://example.com/b')
.dedupe();
expect(builder.count).toEqual(2);
});
tap.test('should filter URLs', async () => {
const builder = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/keep')
.addUrl('https://example.com/remove')
.filter((url) => !url.loc.includes('remove'));
expect(builder.count).toEqual(1);
expect(builder.getUrls()[0].loc).toEqual('https://example.com/keep');
});
tap.test('should sort URLs', async () => {
const builder = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/z')
.addUrl('https://example.com/a')
.addUrl('https://example.com/m')
.sort();
const urls = builder.getUrls();
expect(urls[0].loc).toEqual('https://example.com/a');
expect(urls[1].loc).toEqual('https://example.com/m');
expect(urls[2].loc).toEqual('https://example.com/z');
});
// ──────────────────────────────────────────────
// Output formats
// ──────────────────────────────────────────────
tap.test('should export as TXT format', async () => {
const txt = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/a')
.addUrl('https://example.com/b')
.toTxt();
expect(txt).toEqual('https://example.com/a\nhttps://example.com/b');
});
tap.test('should export as JSON format', async () => {
const json = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/a')
.toJson();
const parsed = JSON.parse(json);
expect(parsed).toBeArray();
expect(parsed[0].loc).toEqual('https://example.com/a');
});
tap.test('should export as gzip buffer', async () => {
const buffer = await smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/')
.toGzipBuffer();
expect(buffer).toBeInstanceOf(Buffer);
expect(buffer.length).toBeGreaterThan(0);
});
// ──────────────────────────────────────────────
// Auto-splitting (sitemap index)
// ──────────────────────────────────────────────
tap.test('should auto-split into sitemap index when URLs exceed max', async () => {
const builder = smartsitemap.SmartSitemap.create({
maxUrlsPerSitemap: 3,
baseUrl: 'https://example.com',
});
for (let i = 0; i < 7; i++) {
builder.addUrl(`https://example.com/page-${i}`);
}
const set = builder.toSitemapSet();
expect(set.needsIndex).toBeTrue();
expect(set.indexXml).toBeTruthy();
expect(set.sitemaps.length).toEqual(3); // ceil(7/3) = 3
expect(set.indexXml).toInclude('<sitemapindex');
expect(set.indexXml).toInclude('sitemap-1.xml');
expect(set.indexXml).toInclude('sitemap-2.xml');
expect(set.indexXml).toInclude('sitemap-3.xml');
});
tap.test('should not create index when URLs fit in one sitemap', async () => {
const builder = smartsitemap.SmartSitemap.create({ maxUrlsPerSitemap: 100 });
builder.addUrl('https://example.com/a');
builder.addUrl('https://example.com/b');
const set = builder.toSitemapSet();
expect(set.needsIndex).toBeFalse();
expect(set.indexXml).toBeNull();
expect(set.sitemaps.length).toEqual(1);
});
// ──────────────────────────────────────────────
// Sitemap Index Builder
// ──────────────────────────────────────────────
tap.test('SitemapIndexBuilder should generate valid index XML', async () => {
const xml = smartsitemap.SmartSitemap.createIndex()
.addSitemap('https://example.com/sitemap-1.xml')
.addSitemap('https://example.com/sitemap-2.xml', new Date('2025-06-15'))
.toXml();
expect(xml).toInclude('<sitemapindex');
expect(xml).toInclude('xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"');
expect(xml).toInclude('<loc>https://example.com/sitemap-1.xml</loc>');
expect(xml).toInclude('<loc>https://example.com/sitemap-2.xml</loc>');
expect(xml).toInclude('<lastmod>2025-06-15');
});
// ──────────────────────────────────────────────
// Validation
// ──────────────────────────────────────────────
tap.test('should validate URLs and detect errors', async () => {
const result = smartsitemap.SmartSitemap.create()
.addUrl('not-a-valid-url')
.add({ loc: 'https://example.com/', priority: 1.5 })
.validate();
expect(result.valid).toBeFalse();
expect(result.errors.length).toBeGreaterThan(0);
const locError = result.errors.find((e) => e.url === 'not-a-valid-url');
expect(locError).toBeTruthy();
const priorityError = result.errors.find((e) => e.field === 'priority');
expect(priorityError).toBeTruthy();
});
tap.test('should pass validation for valid sitemap', async () => {
const result = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/')
.add({ loc: 'https://example.com/about', priority: 0.8, changefreq: 'weekly' })
.validate();
expect(result.valid).toBeTrue();
expect(result.errors.length).toEqual(0);
});
// ──────────────────────────────────────────────
// Statistics
// ──────────────────────────────────────────────
tap.test('should compute stats', async () => {
const stats = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/')
.add({
loc: 'https://example.com/gallery',
images: [{ loc: 'https://example.com/img/1.jpg' }, { loc: 'https://example.com/img/2.jpg' }],
})
.add({
loc: 'https://example.com/news',
news: {
publication: { name: 'Test', language: 'en' },
publicationDate: new Date(),
title: 'Article',
},
})
.stats();
expect(stats.urlCount).toEqual(3);
expect(stats.imageCount).toEqual(2);
expect(stats.newsCount).toEqual(1);
expect(stats.estimatedSizeBytes).toBeGreaterThan(0);
});
// ──────────────────────────────────────────────
// YAML import
// ──────────────────────────────────────────────
tap.test('should import from YAML config', async () => {
const yaml = `
baseUrl: https://example.com
defaults:
priority: 0.5
urls:
daily:
- /
- /blog
monthly:
- /about
- /contact
yearly:
- /privacy
`;
const builder = await smartsitemap.SmartSitemap.fromYaml(yaml);
expect(builder.count).toEqual(5);
const xml = builder.toXml();
expect(xml).toInclude('https://example.com/');
expect(xml).toInclude('<changefreq>daily</changefreq>');
expect(xml).toInclude('<changefreq>monthly</changefreq>');
expect(xml).toInclude('<changefreq>yearly</changefreq>');
});
// ──────────────────────────────────────────────
// SitemapStream
// ──────────────────────────────────────────────
tap.test('should stream sitemap XML', async () => {
const stream = new smartsitemap.SitemapStream();
const chunks: string[] = [];
stream.on('data', (chunk: string) => {
chunks.push(chunk);
});
const done = new Promise<void>((resolve) => {
stream.on('end', resolve);
});
stream.pushUrl({ loc: 'https://example.com/' });
stream.pushUrl({ loc: 'https://example.com/about' });
stream.finish();
await done;
const xml = chunks.join('');
expect(xml).toInclude('<?xml version="1.0" encoding="UTF-8"?>');
expect(xml).toInclude('<urlset');
expect(xml).toInclude('<loc>https://example.com/</loc>');
expect(xml).toInclude('<loc>https://example.com/about</loc>');
expect(xml).toInclude('</urlset>');
expect(stream.count).toEqual(2);
});
// ──────────────────────────────────────────────
// Sitemap parsing
// ──────────────────────────────────────────────
tap.test('should parse a urlset sitemap XML', async () => {
const xml = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/', new Date('2025-06-15'))
.add({ loc: 'https://example.com/about', changefreq: 'monthly' })
.toXml();
const parsed = await smartsitemap.SmartSitemap.parse(xml);
expect(parsed.type).toEqual('urlset');
expect(parsed.urls.length).toEqual(2);
expect(parsed.urls[0].loc).toEqual('https://example.com/');
});
tap.test('should detect sitemap type', async () => {
expect(smartsitemap.SitemapParser.detectType('<urlset>')).toEqual('urlset');
expect(smartsitemap.SitemapParser.detectType('<sitemapindex>')).toEqual('sitemapindex');
expect(smartsitemap.SitemapParser.detectType('<html>')).toEqual('unknown');
});
// ──────────────────────────────────────────────
// XmlRenderer utility
// ──────────────────────────────────────────────
tap.test('XmlRenderer.escapeXml should escape special characters', async () => {
expect(smartsitemap.XmlRenderer.escapeXml('a&b')).toEqual('a&amp;b');
expect(smartsitemap.XmlRenderer.escapeXml('<tag>')).toEqual('&lt;tag&gt;');
expect(smartsitemap.XmlRenderer.escapeXml('"quoted"')).toEqual('&quot;quoted&quot;');
});
tap.test('XmlRenderer.formatDate should handle all date types', async () => {
const iso = smartsitemap.XmlRenderer.formatDate(new Date('2025-06-15T00:00:00.000Z'));
expect(iso).toEqual('2025-06-15T00:00:00.000Z');
const fromString = smartsitemap.XmlRenderer.formatDate('2025-06-15');
expect(fromString).toInclude('2025-06-15');
const fromNumber = smartsitemap.XmlRenderer.formatDate(new Date('2025-06-15').getTime());
expect(fromNumber).toInclude('2025-06-15');
});
export default tap.start();

View File

@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@push.rocks/smartsitemap',
version: '2.0.4',
description: 'A module for generating and managing sitemaps, supporting dynamic sitemap generation from feeds.'
version: '4.0.0',
description: 'A comprehensive TypeScript sitemap library with builder API, supporting standard, news, image, video, and hreflang sitemaps with auto-splitting, streaming, validation, and RSS feed integration.'
}

View File

@@ -1,3 +1,44 @@
export * from './smartsitemap.classes.smartsitemap.js';
export * from './smartsitemap.classes.sitemapnews.js';
export * from './smartsitemap.classes.sitemapwebsite.js';
// Main facade
export { SmartSitemap } from './smartsitemap.classes.smartsitemap.js';
// Builders
export { UrlsetBuilder } from './smartsitemap.classes.urlsetbuilder.js';
export { NewsSitemapBuilder } from './smartsitemap.classes.newsbuilder.js';
export { SitemapIndexBuilder } from './smartsitemap.classes.indexbuilder.js';
// Parser
export { SitemapParser } from './smartsitemap.classes.sitemapparser.js';
// Stream
export { SitemapStream } from './smartsitemap.classes.sitemapstream.js';
// Utilities
export { XmlRenderer } from './smartsitemap.classes.xmlrenderer.js';
export { SitemapValidator } from './smartsitemap.classes.validator.js';
// Feed & YAML importers
export { FeedImporter } from './smartsitemap.classes.feedimporter.js';
export { YamlImporter } from './smartsitemap.classes.yamlimporter.js';
// All interfaces and types
export type {
TChangeFreq,
TOutputFormat,
ISitemapUrl,
ISitemapImage,
ISitemapVideo,
ISitemapNews,
ISitemapAlternate,
ISitemapIndexEntry,
ISitemapOptions,
INewsSitemapOptions,
IFeedImportOptions,
IFeedItem,
ISitemapYamlConfig,
IParsedSitemap,
IValidationError,
IValidationWarning,
IValidationResult,
ISitemapStats,
ISitemapSet,
} from './interfaces/index.js';

View File

@@ -1,42 +1,277 @@
export interface ISitemapYaml {
daily: string[];
// ============================================================
// CORE TYPES
// ============================================================
/**
* Change frequency values per the sitemap protocol specification.
* Note: Google ignores changefreq, but other search engines may use it.
*/
export type TChangeFreq =
| 'always'
| 'hourly'
| 'daily'
| 'weekly'
| 'monthly'
| 'yearly'
| 'never';
/** Supported output formats */
export type TOutputFormat = 'xml' | 'txt' | 'json';
// ============================================================
// URL ENTRY — the core unit of a sitemap
// ============================================================
/**
* A single URL entry in a sitemap, supporting all standard extensions.
*/
export interface ISitemapUrl {
/** Absolute URL of the page (required, max 2048 chars) */
loc: string;
/** Last modification date — accepts Date, ISO string, or Unix timestamp (ms) */
lastmod?: Date | string | number;
/** How frequently the page changes */
changefreq?: TChangeFreq;
/** Priority relative to other URLs on your site, 0.0 to 1.0 */
priority?: number;
/** Image sitemap extension entries */
images?: ISitemapImage[];
/** Video sitemap extension entries */
videos?: ISitemapVideo[];
/** News sitemap extension */
news?: ISitemapNews;
/** Alternate language versions (hreflang) */
alternates?: ISitemapAlternate[];
}
export interface IRssItem {
[key: string]: any;
link?: string;
guid?: string;
// ============================================================
// SITEMAP EXTENSIONS
// ============================================================
export interface ISitemapImage {
/** URL of the image (required) */
loc: string;
/** Caption for the image */
caption?: string;
/** Title of the image */
title?: string;
pubDate?: string;
creator?: string;
content?: string;
isoDate?: string;
categories?: string[];
contentSnippet?: string;
enclosure?: any;
/** Geographic location (e.g. "New York, USA") */
geoLocation?: string;
/** URL to the image license */
licenseUrl?: string;
}
export interface IParsedSiteMap {
urlset: {
url:
| {
loc: string;
lastmod: string;
changefreq: string;
}
| {
loc: string;
lastmod: string;
changefreq: string;
}[]
| {
loc: string;
'news:news': {
'news:publication': [];
'news:keywords': string;
'news:publication_date': string;
'news:title': string;
};
}[];
};
export interface ISitemapVideo {
/** URL to the video thumbnail (required) */
thumbnailLoc: string;
/** Title of the video (required) */
title: string;
/** Description of the video, max 2048 chars (required) */
description: string;
/** URL of the actual video media file */
contentLoc?: string;
/** URL of the embeddable player — at least one of contentLoc or playerLoc required */
playerLoc?: string;
/** Duration in seconds (128800) */
duration?: number;
/** Rating 0.0 to 5.0 */
rating?: number;
/** Number of views */
viewCount?: number;
/** Publication date */
publicationDate?: Date | string;
/** Whether the video is family friendly (default true) */
familyFriendly?: boolean;
/** Tags for the video (max 32) */
tags?: string[];
/** Whether this is a live stream */
live?: boolean;
/** Whether a subscription is required to view */
requiresSubscription?: boolean;
}
export interface ISitemapNews {
/** Publication information */
publication: {
/** Publication name (e.g. "The New York Times") */
name: string;
/** Language code (ISO 639, e.g. "en", "de", "zh-cn") */
language: string;
};
/** Publication date of the article */
publicationDate: Date | string | number;
/** Article title */
title: string;
/** Keywords (array or comma-separated string) */
keywords?: string[] | string;
}
export interface ISitemapAlternate {
/** Language code (ISO 639) or 'x-default' for the default version */
hreflang: string;
/** URL for this language version */
href: string;
}
// ============================================================
// SITEMAP INDEX
// ============================================================
export interface ISitemapIndexEntry {
/** URL to the sitemap file */
loc: string;
/** Last modification date of the referenced sitemap */
lastmod?: Date | string | number;
}
// ============================================================
// CONFIGURATION
// ============================================================
export interface ISitemapOptions {
/** Base URL for the website (used to resolve relative URLs and for auto-split filenames) */
baseUrl?: string;
/** XSL stylesheet URL for browser-viewable sitemaps */
xslUrl?: string;
/** Default changefreq for URLs that don't specify one */
defaultChangeFreq?: TChangeFreq;
/** Default priority for URLs that don't specify one (0.01.0) */
defaultPriority?: number;
/** Whether to pretty-print XML output (default: true) */
prettyPrint?: boolean;
/** Maximum URLs per sitemap file before auto-splitting (default: 50000, max: 50000) */
maxUrlsPerSitemap?: number;
/** Enable gzip compression for toGzipBuffer() */
gzip?: boolean;
/** Whether to validate URLs and fields (default: true) */
validate?: boolean;
}
export interface INewsSitemapOptions extends ISitemapOptions {
/** Publication name — required for news sitemaps */
publicationName: string;
/** Publication language (default: 'en') */
publicationLanguage?: string;
}
export interface IFeedImportOptions {
/** Publication name for news sitemap mapping */
publicationName?: string;
/** Publication language for news sitemap mapping */
publicationLanguage?: string;
/** Only include items newer than this date */
newerThan?: Date | number;
/** Maximum number of items to import */
limit?: number;
/** Custom mapping function from feed item to sitemap URL (return null to skip) */
mapItem?: (item: IFeedItem) => ISitemapUrl | null;
}
/** Shape of a parsed RSS/Atom feed item */
export interface IFeedItem {
title?: string;
link?: string;
pubDate?: string;
author?: string;
content?: string;
contentSnippet?: string;
isoDate?: string;
id?: string;
categories?: string[];
enclosure?: {
url?: string;
type?: string;
length?: string;
};
[key: string]: any;
}
// ============================================================
// YAML CONFIG
// ============================================================
/**
* Enhanced YAML configuration format for defining sitemaps declaratively.
* Supports per-frequency URL groups, default settings, and feed imports.
*/
export interface ISitemapYamlConfig {
/** Base URL to prepend to relative paths */
baseUrl?: string;
/** Default values for all URLs */
defaults?: {
changefreq?: TChangeFreq;
priority?: number;
};
/** URL groups organized by change frequency */
urls?: { [K in TChangeFreq]?: string[] };
/** RSS/Atom feeds to import */
feeds?: Array<{
url: string;
type: 'news' | 'standard';
publicationName?: string;
publicationLanguage?: string;
}>;
}
// ============================================================
// PARSED SITEMAP (bidirectional)
// ============================================================
export interface IParsedSitemap {
/** Whether this is a urlset or a sitemap index */
type: 'urlset' | 'sitemapindex';
/** Parsed URL entries (populated when type is 'urlset') */
urls: ISitemapUrl[];
/** Parsed index entries (populated when type is 'sitemapindex') */
sitemaps: ISitemapIndexEntry[];
}
// ============================================================
// VALIDATION
// ============================================================
export interface IValidationError {
field: string;
message: string;
url?: string;
}
export interface IValidationWarning {
field: string;
message: string;
url?: string;
}
export interface IValidationResult {
valid: boolean;
errors: IValidationError[];
warnings: IValidationWarning[];
stats: ISitemapStats;
}
// ============================================================
// STATISTICS
// ============================================================
export interface ISitemapStats {
urlCount: number;
imageCount: number;
videoCount: number;
newsCount: number;
alternateCount: number;
estimatedSizeBytes: number;
needsIndex: boolean;
}
// ============================================================
// AUTO-SPLIT OUTPUT
// ============================================================
export interface ISitemapSet {
/** Whether the URL count exceeded maxUrlsPerSitemap */
needsIndex: boolean;
/** The sitemap index XML (null if all URLs fit in one sitemap) */
indexXml: string | null;
/** Individual sitemap chunks */
sitemaps: Array<{ filename: string; xml: string }>;
}

View File

@@ -0,0 +1,159 @@
import * as plugins from './smartsitemap.plugins.js';
import type * as interfaces from './interfaces/index.js';
/**
* Imports RSS/Atom feeds and converts them to sitemap URL entries.
* This is a unique feature of smartsitemap that competitors don't offer.
*/
export class FeedImporter {
/**
* Import from a feed URL, returning standard sitemap URL entries.
*/
static async fromUrl(
feedUrl: string,
options?: interfaces.IFeedImportOptions,
): Promise<interfaces.ISitemapUrl[]> {
const smartfeed = new plugins.smartfeed.Smartfeed();
const feed = await smartfeed.parseFeedFromUrl(feedUrl);
return FeedImporter.mapItems(feed.items, options);
}
/**
* Import from a feed XML string, returning standard sitemap URL entries.
*/
static async fromString(
feedXml: string,
options?: interfaces.IFeedImportOptions,
): Promise<interfaces.ISitemapUrl[]> {
const smartfeed = new plugins.smartfeed.Smartfeed();
const feed = await smartfeed.parseFeedFromString(feedXml);
return FeedImporter.mapItems(feed.items, options);
}
/**
* Import from a feed URL, returning news sitemap URL entries.
*/
static async fromUrlAsNews(
feedUrl: string,
publicationName: string,
publicationLanguage?: string,
options?: interfaces.IFeedImportOptions,
): Promise<interfaces.ISitemapUrl[]> {
const smartfeed = new plugins.smartfeed.Smartfeed();
const feed = await smartfeed.parseFeedFromUrl(feedUrl);
return FeedImporter.mapItemsAsNews(feed.items, publicationName, publicationLanguage ?? 'en', options);
}
/**
* Import from a feed string, returning news sitemap URL entries.
*/
static async fromStringAsNews(
feedXml: string,
publicationName: string,
publicationLanguage?: string,
options?: interfaces.IFeedImportOptions,
): Promise<interfaces.ISitemapUrl[]> {
const smartfeed = new plugins.smartfeed.Smartfeed();
const feed = await smartfeed.parseFeedFromString(feedXml);
return FeedImporter.mapItemsAsNews(feed.items, publicationName, publicationLanguage ?? 'en', options);
}
/**
* Map parsed feed items to standard sitemap URLs.
*/
private static mapItems(
items: any[],
options?: interfaces.IFeedImportOptions,
): interfaces.ISitemapUrl[] {
let filtered = FeedImporter.filterItems(items, options);
if (options?.mapItem) {
const results: interfaces.ISitemapUrl[] = [];
for (const item of filtered) {
const mapped = options.mapItem(item as interfaces.IFeedItem);
if (mapped) results.push(mapped);
}
return results;
}
return filtered
.filter((item: any) => item.link)
.map((item: any) => {
const url: interfaces.ISitemapUrl = {
loc: item.link,
};
if (item.isoDate) {
url.lastmod = item.isoDate;
}
return url;
});
}
/**
* Map parsed feed items to news sitemap URLs.
*/
private static mapItemsAsNews(
items: any[],
publicationName: string,
publicationLanguage: string,
options?: interfaces.IFeedImportOptions,
): interfaces.ISitemapUrl[] {
let filtered = FeedImporter.filterItems(items, options);
if (options?.mapItem) {
const results: interfaces.ISitemapUrl[] = [];
for (const item of filtered) {
const mapped = options.mapItem(item as interfaces.IFeedItem);
if (mapped) results.push(mapped);
}
return results;
}
return filtered
.filter((item: any) => item.link)
.map((item: any) => {
const url: interfaces.ISitemapUrl = {
loc: item.link,
news: {
publication: {
name: publicationName,
language: publicationLanguage,
},
publicationDate: item.isoDate || new Date().toISOString(),
title: item.title || '',
keywords: item.categories,
},
};
if (item.isoDate) {
url.lastmod = item.isoDate;
}
return url;
});
}
/**
* Apply date and limit filters to feed items.
*/
private static filterItems(items: any[], options?: interfaces.IFeedImportOptions): any[] {
let result = [...items];
// Filter by date
if (options?.newerThan != null) {
const threshold = options.newerThan instanceof Date
? options.newerThan.getTime()
: options.newerThan;
result = result.filter((item: any) => {
if (!item.isoDate) return true; // keep items without dates
return new Date(item.isoDate).getTime() >= threshold;
});
}
// Apply limit
if (options?.limit != null && options.limit > 0) {
result = result.slice(0, options.limit);
}
return result;
}
}

View File

@@ -0,0 +1,82 @@
import type * as interfaces from './interfaces/index.js';
import { XmlRenderer } from './smartsitemap.classes.xmlrenderer.js';
import { UrlsetBuilder } from './smartsitemap.classes.urlsetbuilder.js';
/**
* Builder for sitemap index files (<sitemapindex>).
* Used when you have multiple sitemaps that need to be referenced from a single index.
* Every mutating method returns `this` for fluent chaining.
*/
export class SitemapIndexBuilder {
private entries: interfaces.ISitemapIndexEntry[] = [];
private options: interfaces.ISitemapOptions;
constructor(options?: interfaces.ISitemapOptions) {
this.options = options ?? {};
}
/** Add a sitemap index entry */
add(entry: interfaces.ISitemapIndexEntry): this {
this.entries.push(entry);
return this;
}
/** Add a sitemap by URL, optionally with lastmod */
addSitemap(loc: string, lastmod?: Date | string | number): this {
const entry: interfaces.ISitemapIndexEntry = { loc };
if (lastmod != null) {
entry.lastmod = lastmod;
}
this.entries.push(entry);
return this;
}
/** Add multiple sitemap entries */
addSitemaps(entries: interfaces.ISitemapIndexEntry[]): this {
this.entries.push(...entries);
return this;
}
/**
* Build an index and individual sitemaps from a UrlsetBuilder that needs splitting.
* The builder's URLs are divided into chunks of maxUrlsPerSitemap.
*/
static fromBuilder(
builder: UrlsetBuilder,
baseUrl: string,
): { index: SitemapIndexBuilder; sitemaps: UrlsetBuilder[] } {
const urls = builder.getUrls();
const options = builder.getOptions();
const maxUrls = Math.min(options.maxUrlsPerSitemap ?? 50000, 50000);
const index = new SitemapIndexBuilder(options);
const sitemaps: UrlsetBuilder[] = [];
for (let i = 0; i < urls.length; i += maxUrls) {
const chunk = urls.slice(i, i + maxUrls);
const chunkBuilder = new UrlsetBuilder(options);
chunkBuilder.addUrls(chunk);
sitemaps.push(chunkBuilder);
const filename = `sitemap-${sitemaps.length}.xml`;
index.addSitemap(`${baseUrl.replace(/\/$/, '')}/${filename}`);
}
return { index, sitemaps };
}
/** Export as sitemap index XML string */
toXml(): string {
return XmlRenderer.renderIndex(this.entries, this.options);
}
/** Get all entries */
getEntries(): interfaces.ISitemapIndexEntry[] {
return [...this.entries];
}
/** Get the number of sitemaps in this index */
get count(): number {
return this.entries.length;
}
}

View File

@@ -0,0 +1,95 @@
import * as plugins from './smartsitemap.plugins.js';
import type * as interfaces from './interfaces/index.js';
import { UrlsetBuilder } from './smartsitemap.classes.urlsetbuilder.js';
import { FeedImporter } from './smartsitemap.classes.feedimporter.js';
/**
* Specialized builder for Google News sitemaps.
* Extends UrlsetBuilder with news-specific convenience methods.
* All standard builder methods (add, filter, merge, etc.) are inherited.
*/
export class NewsSitemapBuilder extends UrlsetBuilder {
private publicationName: string;
private publicationLanguage: string;
constructor(options: interfaces.INewsSitemapOptions) {
super(options);
this.publicationName = options.publicationName;
this.publicationLanguage = options.publicationLanguage ?? 'en';
}
/**
* Add a news article URL with convenient parameters.
* Automatically fills in publication name and language from constructor options.
*/
addNewsUrl(
loc: string,
title: string,
publicationDate: Date | string | number,
keywords?: string[] | string,
): this {
this.add({
loc,
news: {
publication: {
name: this.publicationName,
language: this.publicationLanguage,
},
publicationDate,
title,
keywords,
},
});
return this;
}
/**
* Import from RSS/Atom feed URL, automatically mapping items to news entries.
*/
async importFromFeedUrl(feedUrl: string, options?: interfaces.IFeedImportOptions): Promise<this> {
const imported = await FeedImporter.fromUrlAsNews(
feedUrl,
options?.publicationName ?? this.publicationName,
options?.publicationLanguage ?? this.publicationLanguage,
options,
);
this.addUrls(imported);
return this;
}
/**
* Import from RSS/Atom feed string, automatically mapping items to news entries.
*/
async importFromFeedString(feedXml: string, options?: interfaces.IFeedImportOptions): Promise<this> {
const imported = await FeedImporter.fromStringAsNews(
feedXml,
options?.publicationName ?? this.publicationName,
options?.publicationLanguage ?? this.publicationLanguage,
options,
);
this.addUrls(imported);
return this;
}
/**
* Import from @tsclass/tsclass IArticle array with proper news mapping.
*/
importFromArticles(articles: plugins.tsclass.content.IArticle[]): this {
for (const article of articles) {
this.add({
loc: article.url,
lastmod: article.timestamp ? new Date(article.timestamp) : undefined,
news: {
publication: {
name: this.publicationName,
language: this.publicationLanguage,
},
publicationDate: article.timestamp ? new Date(article.timestamp) : new Date(),
title: article.title || '',
keywords: article.tags,
},
});
}
return this;
}
}

View File

@@ -1,79 +0,0 @@
import * as plugins from './smartsitemap.plugins.js';
import * as interfaces from './interfaces/index.js';
export class SitemapNews {
public rssItems: interfaces.IRssItem[] = [];
constructor(optionsArg: {}) {}
public async readAndAddFromRssFeedString(feedStringArg: string) {
const smartfeedInstance = new plugins.smartfeed.Smartfeed();
const parsedFeed =
await smartfeedInstance.parseFeedFromString(feedStringArg);
this.rssItems = this.rssItems.concat(parsedFeed.items);
}
public async readAndAddFromRssFeedUrl(urlArg: string) {
const smartfeedInstance = new plugins.smartfeed.Smartfeed();
const parsedFeed = await smartfeedInstance.parseFeedFromUrl(urlArg);
this.rssItems = this.rssItems.concat(parsedFeed.items);
}
public async readAndParseArticles(
articleArrayArg: plugins.tsclass.content.IArticle[],
) {
const rssItemArray = articleArrayArg.map(
(articleArg): interfaces.IRssItem => {
return {
title: articleArg.title,
content: articleArg.content,
isoDate:
new Date(/* TODO: put article timestamp here */).toISOString(),
link: articleArg.url,
};
},
);
this.rssItems = this.rssItems.concat(rssItemArray);
}
public exportSitemapXml() {
const urls: {
loc: string;
'news:news': {
'news:publication': {
'news:name': string;
'news:language': string;
};
'news:publication_date': string;
'news:keywords': string;
'news:title': string;
};
}[] = [];
for (const itemArg of this.rssItems) {
console.log(itemArg);
urls.push({
loc: itemArg.link,
'news:news': {
'news:publication': {
'news:language': 'en',
'news:name': 'some name',
},
'news:keywords': '',
'news:publication_date': itemArg.isoDate,
'news:title': itemArg.title,
},
});
}
const sitemapObject: any = {
urlset: {
'@_xmlns': 'http://www.sitemaps.org/schemas/sitemap/0.9',
'@_xmlns:news': 'http://www.google.com/schemas/sitemap-news/0.9',
url: urls,
},
};
const smartxmlInstance = new plugins.smartxml.SmartXml();
const sitemapString = smartxmlInstance.createXmlFromObject(sitemapObject);
return sitemapString;
}
}

View File

@@ -0,0 +1,251 @@
import * as plugins from './smartsitemap.plugins.js';
import type * as interfaces from './interfaces/index.js';
import { UrlsetBuilder } from './smartsitemap.classes.urlsetbuilder.js';
/**
* Parses existing sitemap XML into structured data.
* Handles both <urlset> sitemaps and <sitemapindex> files.
*/
export class SitemapParser {
/**
* Parse a sitemap XML string into structured data.
*/
static async parse(xml: string): Promise<interfaces.IParsedSitemap> {
const smartXml = new plugins.smartxml.SmartXml();
const parsed = smartXml.parseXmlToObject(xml);
// The parser returns ordered format (preserveOrder: true)
// We need to walk the structure to extract urls or sitemap entries
return SitemapParser.processOrderedParsed(parsed);
}
/**
* Fetch and parse a sitemap from a URL.
*/
static async parseUrl(url: string): Promise<interfaces.IParsedSitemap> {
const response = await plugins.webrequest.webrequest(url);
const xml = await response.text();
return SitemapParser.parse(xml);
}
/**
* Parse a sitemap XML and return a pre-populated UrlsetBuilder.
*/
static async toBuilder(xml: string, options?: interfaces.ISitemapOptions): Promise<UrlsetBuilder> {
const parsed = await SitemapParser.parse(xml);
const builder = new UrlsetBuilder(options);
builder.addUrls(parsed.urls);
return builder;
}
/**
* Detect whether XML is a urlset or sitemapindex without full parsing.
*/
static detectType(xml: string): 'urlset' | 'sitemapindex' | 'unknown' {
if (xml.includes('<urlset')) return 'urlset';
if (xml.includes('<sitemapindex')) return 'sitemapindex';
return 'unknown';
}
/**
* Process the ordered-format output from smartxml's parseXmlToObject.
* The ordered format uses arrays of objects where each object has a single key.
*/
private static processOrderedParsed(parsed: any[]): interfaces.IParsedSitemap {
const result: interfaces.IParsedSitemap = {
type: 'urlset',
urls: [],
sitemaps: [],
};
if (!Array.isArray(parsed)) {
return result;
}
for (const node of parsed) {
if (node.urlset) {
result.type = 'urlset';
result.urls = SitemapParser.extractUrls(node.urlset);
} else if (node.sitemapindex) {
result.type = 'sitemapindex';
result.sitemaps = SitemapParser.extractIndexEntries(node.sitemapindex);
}
}
return result;
}
/**
* Extract URL entries from an ordered-format urlset.
*/
private static extractUrls(urlsetNodes: any[]): interfaces.ISitemapUrl[] {
const urls: interfaces.ISitemapUrl[] = [];
if (!Array.isArray(urlsetNodes)) return urls;
for (const node of urlsetNodes) {
if (node.url) {
const urlData = SitemapParser.extractUrlData(node.url);
if (urlData) urls.push(urlData);
}
}
return urls;
}
/**
* Extract a single URL entry from ordered-format nodes.
*/
private static extractUrlData(urlNodes: any[]): interfaces.ISitemapUrl | null {
if (!Array.isArray(urlNodes)) return null;
const url: interfaces.ISitemapUrl = { loc: '' };
for (const node of urlNodes) {
if (node.loc) {
url.loc = SitemapParser.extractText(node.loc);
} else if (node.lastmod) {
url.lastmod = SitemapParser.extractText(node.lastmod);
} else if (node.changefreq) {
url.changefreq = SitemapParser.extractText(node.changefreq) as interfaces.TChangeFreq;
} else if (node.priority) {
const pText = SitemapParser.extractText(node.priority);
url.priority = parseFloat(pText);
} else if (node['image:image']) {
if (!url.images) url.images = [];
url.images.push(SitemapParser.extractImageData(node['image:image']));
} else if (node['video:video']) {
if (!url.videos) url.videos = [];
url.videos.push(SitemapParser.extractVideoData(node['video:video']));
} else if (node['news:news']) {
url.news = SitemapParser.extractNewsData(node['news:news']);
} else if (node['xhtml:link']) {
if (!url.alternates) url.alternates = [];
const attrs = node[':@'] || {};
if (attrs['@_hreflang'] && attrs['@_href']) {
url.alternates.push({
hreflang: attrs['@_hreflang'],
href: attrs['@_href'],
});
}
}
}
return url.loc ? url : null;
}
/**
* Extract image data from ordered-format nodes.
*/
private static extractImageData(nodes: any[]): interfaces.ISitemapImage {
const img: interfaces.ISitemapImage = { loc: '' };
if (!Array.isArray(nodes)) return img;
for (const node of nodes) {
if (node['image:loc']) img.loc = SitemapParser.extractText(node['image:loc']);
else if (node['image:caption']) img.caption = SitemapParser.extractText(node['image:caption']);
else if (node['image:title']) img.title = SitemapParser.extractText(node['image:title']);
else if (node['image:geo_location']) img.geoLocation = SitemapParser.extractText(node['image:geo_location']);
else if (node['image:license']) img.licenseUrl = SitemapParser.extractText(node['image:license']);
}
return img;
}
/**
* Extract video data from ordered-format nodes.
*/
private static extractVideoData(nodes: any[]): interfaces.ISitemapVideo {
const vid: interfaces.ISitemapVideo = { thumbnailLoc: '', title: '', description: '' };
if (!Array.isArray(nodes)) return vid;
for (const node of nodes) {
if (node['video:thumbnail_loc']) vid.thumbnailLoc = SitemapParser.extractText(node['video:thumbnail_loc']);
else if (node['video:title']) vid.title = SitemapParser.extractText(node['video:title']);
else if (node['video:description']) vid.description = SitemapParser.extractText(node['video:description']);
else if (node['video:content_loc']) vid.contentLoc = SitemapParser.extractText(node['video:content_loc']);
else if (node['video:player_loc']) vid.playerLoc = SitemapParser.extractText(node['video:player_loc']);
else if (node['video:duration']) vid.duration = parseInt(SitemapParser.extractText(node['video:duration']));
else if (node['video:rating']) vid.rating = parseFloat(SitemapParser.extractText(node['video:rating']));
else if (node['video:view_count']) vid.viewCount = parseInt(SitemapParser.extractText(node['video:view_count']));
else if (node['video:publication_date']) vid.publicationDate = SitemapParser.extractText(node['video:publication_date']);
else if (node['video:family_friendly']) vid.familyFriendly = SitemapParser.extractText(node['video:family_friendly']) === 'yes';
else if (node['video:live']) vid.live = SitemapParser.extractText(node['video:live']) === 'yes';
else if (node['video:requires_subscription']) vid.requiresSubscription = SitemapParser.extractText(node['video:requires_subscription']) === 'yes';
else if (node['video:tag']) {
if (!vid.tags) vid.tags = [];
vid.tags.push(SitemapParser.extractText(node['video:tag']));
}
}
return vid;
}
/**
* Extract news data from ordered-format nodes.
*/
private static extractNewsData(nodes: any[]): interfaces.ISitemapNews {
const news: interfaces.ISitemapNews = {
publication: { name: '', language: '' },
publicationDate: '',
title: '',
};
if (!Array.isArray(nodes)) return news;
for (const node of nodes) {
if (node['news:publication']) {
const pubNodes = node['news:publication'];
if (Array.isArray(pubNodes)) {
for (const pNode of pubNodes) {
if (pNode['news:name']) news.publication.name = SitemapParser.extractText(pNode['news:name']);
else if (pNode['news:language']) news.publication.language = SitemapParser.extractText(pNode['news:language']);
}
}
} else if (node['news:publication_date']) {
news.publicationDate = SitemapParser.extractText(node['news:publication_date']);
} else if (node['news:title']) {
news.title = SitemapParser.extractText(node['news:title']);
} else if (node['news:keywords']) {
news.keywords = SitemapParser.extractText(node['news:keywords']);
}
}
return news;
}
/**
* Extract sitemap index entries from ordered-format nodes.
*/
private static extractIndexEntries(indexNodes: any[]): interfaces.ISitemapIndexEntry[] {
const entries: interfaces.ISitemapIndexEntry[] = [];
if (!Array.isArray(indexNodes)) return entries;
for (const node of indexNodes) {
if (node.sitemap) {
const entry: interfaces.ISitemapIndexEntry = { loc: '' };
if (Array.isArray(node.sitemap)) {
for (const sNode of node.sitemap) {
if (sNode.loc) entry.loc = SitemapParser.extractText(sNode.loc);
else if (sNode.lastmod) entry.lastmod = SitemapParser.extractText(sNode.lastmod);
}
}
if (entry.loc) entries.push(entry);
}
}
return entries;
}
/**
* Extract text content from an ordered-format node.
* In ordered format, text is stored as [{ '#text': 'value' }].
*/
private static extractText(nodes: any): string {
if (typeof nodes === 'string') return nodes;
if (typeof nodes === 'number') return String(nodes);
if (Array.isArray(nodes)) {
for (const n of nodes) {
if (n['#text'] != null) return String(n['#text']);
}
}
return '';
}
}

View File

@@ -0,0 +1,168 @@
import * as plugins from './smartsitemap.plugins.js';
import type * as interfaces from './interfaces/index.js';
import { XmlRenderer } from './smartsitemap.classes.xmlrenderer.js';
/**
* A Node.js Readable stream that generates sitemap XML incrementally.
* Suitable for very large sitemaps (millions of URLs) that cannot be held in memory.
*
* Usage:
* const stream = new SitemapStream();
* stream.pipe(createWriteStream('sitemap.xml'));
* stream.pushUrl({ loc: 'https://example.com/' });
* stream.pushUrl({ loc: 'https://example.com/about' });
* stream.finish();
*/
export class SitemapStream extends plugins.Readable {
private options: interfaces.ISitemapOptions;
private urlCount = 0;
private headerWritten = false;
private finished = false;
private namespaces: Set<string> = new Set();
constructor(options?: interfaces.ISitemapOptions) {
super({ encoding: 'utf-8' });
this.options = {
prettyPrint: true,
...options,
};
}
/**
* Push a URL entry into the stream.
* The URL is immediately rendered to XML and pushed to the readable buffer.
*/
pushUrl(url: interfaces.ISitemapUrl): boolean {
if (this.finished) {
throw new Error('Cannot push URLs after calling finish()');
}
// Detect needed namespaces
if (url.images?.length) this.namespaces.add('image');
if (url.videos?.length) this.namespaces.add('video');
if (url.news) this.namespaces.add('news');
if (url.alternates?.length) this.namespaces.add('xhtml');
// Write header on first URL
if (!this.headerWritten) {
this.writeHeader();
}
// Build URL element XML using XmlRenderer internals
const indent = this.options.prettyPrint !== false ? ' ' : '';
const nl = this.options.prettyPrint !== false ? '\n' : '';
let urlXml = `${indent}<url>${nl}`;
urlXml += `${indent}${indent}<loc>${XmlRenderer.escapeXml(url.loc)}</loc>${nl}`;
if (url.lastmod != null) {
urlXml += `${indent}${indent}<lastmod>${XmlRenderer.formatDate(url.lastmod)}</lastmod>${nl}`;
}
const changefreq = url.changefreq ?? this.options.defaultChangeFreq;
if (changefreq) {
urlXml += `${indent}${indent}<changefreq>${changefreq}</changefreq>${nl}`;
}
const priority = url.priority ?? this.options.defaultPriority;
if (priority != null) {
urlXml += `${indent}${indent}<priority>${priority.toFixed(1)}</priority>${nl}`;
}
// Extensions (simplified inline rendering for streaming)
if (url.images) {
for (const img of url.images) {
urlXml += `${indent}${indent}<image:image>${nl}`;
urlXml += `${indent}${indent}${indent}<image:loc>${XmlRenderer.escapeXml(img.loc)}</image:loc>${nl}`;
if (img.caption) urlXml += `${indent}${indent}${indent}<image:caption>${XmlRenderer.escapeXml(img.caption)}</image:caption>${nl}`;
if (img.title) urlXml += `${indent}${indent}${indent}<image:title>${XmlRenderer.escapeXml(img.title)}</image:title>${nl}`;
urlXml += `${indent}${indent}</image:image>${nl}`;
}
}
if (url.news) {
urlXml += `${indent}${indent}<news:news>${nl}`;
urlXml += `${indent}${indent}${indent}<news:publication>${nl}`;
urlXml += `${indent}${indent}${indent}${indent}<news:name>${XmlRenderer.escapeXml(url.news.publication.name)}</news:name>${nl}`;
urlXml += `${indent}${indent}${indent}${indent}<news:language>${url.news.publication.language}</news:language>${nl}`;
urlXml += `${indent}${indent}${indent}</news:publication>${nl}`;
urlXml += `${indent}${indent}${indent}<news:publication_date>${XmlRenderer.formatDate(url.news.publicationDate)}</news:publication_date>${nl}`;
urlXml += `${indent}${indent}${indent}<news:title>${XmlRenderer.escapeXml(url.news.title)}</news:title>${nl}`;
if (url.news.keywords) {
const kw = Array.isArray(url.news.keywords) ? url.news.keywords.join(', ') : url.news.keywords;
urlXml += `${indent}${indent}${indent}<news:keywords>${XmlRenderer.escapeXml(kw)}</news:keywords>${nl}`;
}
urlXml += `${indent}${indent}</news:news>${nl}`;
}
if (url.alternates) {
for (const alt of url.alternates) {
urlXml += `${indent}${indent}<xhtml:link rel="alternate" hreflang="${alt.hreflang}" href="${XmlRenderer.escapeXml(alt.href)}"/>${nl}`;
}
}
urlXml += `${indent}</url>${nl}`;
this.urlCount++;
return this.push(urlXml);
}
/**
* Signal that no more URLs will be added.
* Writes the closing tag and ends the stream.
*/
finish(): void {
if (this.finished) return;
this.finished = true;
if (!this.headerWritten) {
// Empty sitemap
this.writeHeader();
}
this.push('</urlset>\n');
this.push(null); // signal end of stream
}
/** Get the number of URLs written so far */
get count(): number {
return this.urlCount;
}
// Required by Readable
_read(): void {
// Data is pushed via pushUrl(), not pulled
}
/**
* Write the XML header and opening urlset tag.
* Namespace declarations are based on what's been detected so far.
*/
private writeHeader(): void {
this.headerWritten = true;
const nl = this.options.prettyPrint !== false ? '\n' : '';
let header = `<?xml version="1.0" encoding="UTF-8"?>${nl}`;
if (this.options.xslUrl) {
header += `<?xml-stylesheet type="text/xsl" href="${XmlRenderer.escapeXml(this.options.xslUrl)}"?>${nl}`;
}
header += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"';
if (this.namespaces.has('image')) {
header += `${nl} xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"`;
}
if (this.namespaces.has('video')) {
header += `${nl} xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"`;
}
if (this.namespaces.has('news')) {
header += `${nl} xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"`;
}
if (this.namespaces.has('xhtml')) {
header += `${nl} xmlns:xhtml="http://www.w3.org/1999/xhtml"`;
}
header += `>${nl}`;
this.push(header);
}
}

View File

@@ -1,47 +0,0 @@
import * as plugins from './smartsitemap.plugins.js';
export type TUpdateFrequency =
| 'never'
| 'daily'
| 'weekly'
| 'monthly'
| 'yearly';
export interface IUrlInfo {
url: string;
timestamp: number;
frequency?: TUpdateFrequency;
}
export class SitemapWebsite {
urlInfos: IUrlInfo[] = [];
constructor() {}
public addUrl(urlInfoArg: IUrlInfo) {
this.urlInfos.push(urlInfoArg);
}
public exportSitemapXml() {
const urls: {
loc: string;
lastmod: string;
changefreq: TUpdateFrequency;
}[] = [];
for (const urlInfoArg of this.urlInfos) {
urls.push({
loc: urlInfoArg.url,
lastmod: new Date(urlInfoArg.timestamp).toISOString(),
changefreq: urlInfoArg.frequency ? urlInfoArg.frequency : 'weekly',
});
}
const sitemapObject: any = {
urlset: {
'@_xmlns': 'http://www.sitemaps.org/schemas/sitemap/0.9',
url: urls,
},
};
const smartxmlInstance = new plugins.smartxml.SmartXml();
const sitemapString = smartxmlInstance.createXmlFromObject(sitemapObject);
return sitemapString;
}
}

View File

@@ -1,92 +1,112 @@
import { SitemapNews } from './smartsitemap.classes.sitemapnews.js';
import {
type IUrlInfo,
SitemapWebsite,
} from './smartsitemap.classes.sitemapwebsite.js';
import * as plugins from './smartsitemap.plugins.js';
import * as interfaces from './interfaces/index.js';
import type * as interfaces from './interfaces/index.js';
import { UrlsetBuilder } from './smartsitemap.classes.urlsetbuilder.js';
import { NewsSitemapBuilder } from './smartsitemap.classes.newsbuilder.js';
import { SitemapIndexBuilder } from './smartsitemap.classes.indexbuilder.js';
import { SitemapParser } from './smartsitemap.classes.sitemapparser.js';
import { FeedImporter } from './smartsitemap.classes.feedimporter.js';
import { YamlImporter } from './smartsitemap.classes.yamlimporter.js';
import { SitemapValidator } from './smartsitemap.classes.validator.js';
/**
* Main entry point for @push.rocks/smartsitemap.
* Provides static factory methods for creating, parsing, and validating sitemaps.
*
* @example Simple sitemap
* ```typescript
* const xml = SmartSitemap.create()
* .addUrl('https://example.com/')
* .addUrl('https://example.com/about')
* .toXml();
* ```
*
* @example News sitemap from RSS feed
* ```typescript
* const builder = SmartSitemap.createNews({ publicationName: 'My Pub' });
* await builder.importFromFeedUrl('https://example.com/rss/');
* const xml = builder.toXml();
* ```
*/
export class SmartSitemap {
constructor() {}
// ──────────────────────────────────────────────
// Static Factory Methods
// ──────────────────────────────────────────────
/**
* creates a sitemap for news from feedurl
*/
public async createSitemapNewsFromFeedUrl(
feedUrlArg: string,
): Promise<string> {
const sitemapNewsInstance = new SitemapNews({});
await sitemapNewsInstance.readAndAddFromRssFeedUrl(feedUrlArg);
return sitemapNewsInstance.exportSitemapXml();
/** Create a standard sitemap builder */
static create(options?: interfaces.ISitemapOptions): UrlsetBuilder {
return new UrlsetBuilder(options);
}
/**
* creates a sitemap for news from feedxmlstring
*/
public async createSitemapNewsFromAFeedStringArg(
feedStringArg: string,
): Promise<string> {
const sitemapNewsInstance = new SitemapNews({});
await sitemapNewsInstance.readAndAddFromRssFeedString(feedStringArg);
return sitemapNewsInstance.exportSitemapXml();
/** Create a news sitemap builder */
static createNews(options: interfaces.INewsSitemapOptions): NewsSitemapBuilder {
return new NewsSitemapBuilder(options);
}
/**
* creates a sitemap for news from an array of articles
*/
public async createSitemapNewsFromArticleArray(
articleArrayArg: plugins.tsclass.content.IArticle[],
): Promise<string> {
const sitemapNewsInstance = new SitemapNews({});
await sitemapNewsInstance.readAndParseArticles(articleArrayArg);
return sitemapNewsInstance.exportSitemapXml();
/** Create a sitemap index builder */
static createIndex(options?: interfaces.ISitemapOptions): SitemapIndexBuilder {
return new SitemapIndexBuilder(options);
}
/**
* creates a normal sitemap from a list of urls
*/
public async createSitemapFromYmlString(yamlString: string): Promise<string> {
const yamlObject: interfaces.ISitemapYaml =
await plugins.smartyaml.yamlStringToObject(yamlString);
const sitemapWebsite = new SitemapWebsite();
for (const urlArg of yamlObject.daily) {
sitemapWebsite.addUrl({
url: urlArg,
timestamp: Date.now() - 10000,
frequency: 'daily',
});
}
return sitemapWebsite.exportSitemapXml();
/** Parse a sitemap XML string into structured data */
static async parse(xml: string): Promise<interfaces.IParsedSitemap> {
return SitemapParser.parse(xml);
}
/**
* creates a normal sitemap from a list of urls
*/
public async createSitemapFromUrlInfoArray(urlInfosArg: IUrlInfo[]) {
const sitemapWebsite = new SitemapWebsite();
for (const urlInfo of urlInfosArg) {
sitemapWebsite.addUrl(urlInfo);
}
return sitemapWebsite.exportSitemapXml();
/** Fetch and parse a sitemap from a URL */
static async parseUrl(url: string): Promise<interfaces.IParsedSitemap> {
return SitemapParser.parseUrl(url);
}
/**
* parses a sitemap url
*/
public async parseSitemapUrl(urlArg: string) {
const response = await plugins.webrequest.webrequest(urlArg);
const sitemapXml = await response.text();
const parsedSitemap = await this.parseSitemap(sitemapXml);
return parsedSitemap;
/** Create a UrlsetBuilder populated from an RSS/Atom feed URL */
static async fromFeedUrl(
feedUrl: string,
options?: interfaces.IFeedImportOptions,
): Promise<UrlsetBuilder> {
const urls = await FeedImporter.fromUrl(feedUrl, options);
const builder = new UrlsetBuilder();
builder.addUrls(urls);
return builder;
}
/**
* parses a sitemap
*/
public async parseSitemap(
sitemapXmlArg: string,
): Promise<interfaces.IParsedSiteMap> {
return new plugins.smartxml.SmartXml().parseXmlToObject(sitemapXmlArg);
/** Create a UrlsetBuilder populated from an RSS/Atom feed string */
static async fromFeedString(
feedXml: string,
options?: interfaces.IFeedImportOptions,
): Promise<UrlsetBuilder> {
const urls = await FeedImporter.fromString(feedXml, options);
const builder = new UrlsetBuilder();
builder.addUrls(urls);
return builder;
}
/** Create a UrlsetBuilder populated from a YAML config string */
static async fromYaml(yamlString: string): Promise<UrlsetBuilder> {
const urls = await YamlImporter.parseConfig(yamlString);
const builder = new UrlsetBuilder();
builder.addUrls(urls);
return builder;
}
/** Create a NewsSitemapBuilder populated from @tsclass/tsclass IArticle array */
static fromArticles(
articles: plugins.tsclass.content.IArticle[],
options: interfaces.INewsSitemapOptions,
): NewsSitemapBuilder {
const builder = new NewsSitemapBuilder(options);
builder.importFromArticles(articles);
return builder;
}
/** Create a UrlsetBuilder from a simple URL string array */
static fromUrls(urls: string[], options?: interfaces.ISitemapOptions): UrlsetBuilder {
const builder = new UrlsetBuilder(options);
builder.addFromArray(urls);
return builder;
}
/** Validate a sitemap XML string */
static async validate(xml: string): Promise<interfaces.IValidationResult> {
const parsed = await SitemapParser.parse(xml);
return SitemapValidator.validateUrlset(parsed.urls);
}
}

View File

@@ -0,0 +1,274 @@
import * as plugins from './smartsitemap.plugins.js';
import type * as interfaces from './interfaces/index.js';
import { XmlRenderer } from './smartsitemap.classes.xmlrenderer.js';
import { SitemapValidator } from './smartsitemap.classes.validator.js';
import { FeedImporter } from './smartsitemap.classes.feedimporter.js';
import { YamlImporter } from './smartsitemap.classes.yamlimporter.js';
import type { SitemapStream } from './smartsitemap.classes.sitemapstream.js';
/**
* Chainable builder for creating standard XML sitemaps (<urlset>).
* Every mutating method returns `this` for fluent chaining.
*
* Supports all sitemap extensions (images, videos, news, hreflang),
* auto-splitting at 50K URLs, multiple output formats, and validation.
*/
export class UrlsetBuilder {
protected urls: interfaces.ISitemapUrl[] = [];
protected options: interfaces.ISitemapOptions;
constructor(options?: interfaces.ISitemapOptions) {
this.options = {
prettyPrint: true,
maxUrlsPerSitemap: 50000,
validate: true,
...options,
};
}
// ──────────────────────────────────────────────
// Adding URLs
// ──────────────────────────────────────────────
/** Add a single URL with full options */
add(url: interfaces.ISitemapUrl): this {
this.urls.push(url);
return this;
}
/** Add a URL by loc string, optionally with lastmod */
addUrl(loc: string, lastmod?: Date | string | number): this {
const url: interfaces.ISitemapUrl = { loc };
if (lastmod != null) {
url.lastmod = lastmod;
}
this.urls.push(url);
return this;
}
/** Add multiple URL objects */
addUrls(urls: interfaces.ISitemapUrl[]): this {
this.urls.push(...urls);
return this;
}
/** Add URLs from a plain string array */
addFromArray(locs: string[]): this {
for (const loc of locs) {
this.urls.push({ loc });
}
return this;
}
// ──────────────────────────────────────────────
// Bulk operations
// ──────────────────────────────────────────────
/** Merge all URLs from another UrlsetBuilder */
merge(other: UrlsetBuilder): this {
this.urls.push(...other.getUrls());
return this;
}
/** Filter URLs by predicate (in-place) */
filter(predicate: (url: interfaces.ISitemapUrl) => boolean): this {
this.urls = this.urls.filter(predicate);
return this;
}
/** Transform URLs (in-place) */
map(transform: (url: interfaces.ISitemapUrl) => interfaces.ISitemapUrl): this {
this.urls = this.urls.map(transform);
return this;
}
/** Sort URLs (in-place) */
sort(compareFn?: (a: interfaces.ISitemapUrl, b: interfaces.ISitemapUrl) => number): this {
this.urls.sort(compareFn ?? ((a, b) => a.loc.localeCompare(b.loc)));
return this;
}
/** Remove duplicate URLs by loc */
dedupe(): this {
const seen = new Set<string>();
this.urls = this.urls.filter((url) => {
if (seen.has(url.loc)) return false;
seen.add(url.loc);
return true;
});
return this;
}
// ──────────────────────────────────────────────
// Defaults
// ──────────────────────────────────────────────
/** Set default changefreq for URLs that don't specify one */
setDefaultChangeFreq(freq: interfaces.TChangeFreq): this {
this.options.defaultChangeFreq = freq;
return this;
}
/** Set default priority for URLs that don't specify one */
setDefaultPriority(priority: number): this {
this.options.defaultPriority = priority;
return this;
}
/** Set XSL stylesheet URL for browser rendering */
setXslUrl(url: string): this {
this.options.xslUrl = url;
return this;
}
// ──────────────────────────────────────────────
// Import sources (async, return Promise<this>)
// ──────────────────────────────────────────────
/** Import URLs from an RSS/Atom feed URL */
async importFromFeedUrl(feedUrl: string, options?: interfaces.IFeedImportOptions): Promise<this> {
const imported = await FeedImporter.fromUrl(feedUrl, options);
this.urls.push(...imported);
return this;
}
/** Import URLs from an RSS/Atom feed XML string */
async importFromFeedString(feedXml: string, options?: interfaces.IFeedImportOptions): Promise<this> {
const imported = await FeedImporter.fromString(feedXml, options);
this.urls.push(...imported);
return this;
}
/** Import URLs from a YAML config string */
async importFromYaml(yamlString: string): Promise<this> {
const imported = await YamlImporter.parseConfig(yamlString);
this.urls.push(...imported);
return this;
}
/** Import from @tsclass/tsclass IArticle array */
importFromArticles(articles: plugins.tsclass.content.IArticle[]): this {
for (const article of articles) {
const url: interfaces.ISitemapUrl = {
loc: article.url,
lastmod: article.timestamp ? new Date(article.timestamp) : undefined,
};
this.urls.push(url);
}
return this;
}
// ──────────────────────────────────────────────
// Output
// ──────────────────────────────────────────────
/** Export as sitemap XML string */
toXml(): string {
return XmlRenderer.renderUrlset(this.urls, this.options);
}
/** Export as plain text (one URL per line) */
toTxt(): string {
return XmlRenderer.renderTxt(this.urls);
}
/** Export as JSON string */
toJson(): string {
return XmlRenderer.renderJson(this.urls);
}
/** Export as gzipped XML buffer */
async toGzipBuffer(): Promise<Buffer> {
const xml = this.toXml();
const gzip = plugins.promisify(plugins.zlib.gzip);
return gzip(Buffer.from(xml, 'utf-8')) as Promise<Buffer>;
}
/**
* Export with automatic index splitting.
* If URL count exceeds maxUrlsPerSitemap, returns a sitemap index
* plus individual sitemap chunks.
*/
toSitemapSet(): interfaces.ISitemapSet {
const maxUrls = Math.min(this.options.maxUrlsPerSitemap ?? 50000, 50000);
if (this.urls.length <= maxUrls) {
return {
needsIndex: false,
indexXml: null,
sitemaps: [{ filename: 'sitemap.xml', xml: this.toXml() }],
};
}
// Split into chunks
const chunks: interfaces.ISitemapUrl[][] = [];
for (let i = 0; i < this.urls.length; i += maxUrls) {
chunks.push(this.urls.slice(i, i + maxUrls));
}
const baseUrl = this.options.baseUrl || '';
const sitemaps: Array<{ filename: string; xml: string }> = [];
const indexEntries: Array<{ loc: string; lastmod?: string }> = [];
for (let i = 0; i < chunks.length; i++) {
const filename = `sitemap-${i + 1}.xml`;
const xml = XmlRenderer.renderUrlset(chunks[i], this.options);
sitemaps.push({ filename, xml });
indexEntries.push({
loc: baseUrl ? `${baseUrl.replace(/\/$/, '')}/${filename}` : filename,
});
}
const indexXml = XmlRenderer.renderIndex(indexEntries, this.options);
return {
needsIndex: true,
indexXml,
sitemaps,
};
}
/** Create a Node.js Readable stream for large sitemaps */
toStream(): SitemapStream {
// Lazy import to avoid circular dependency issues at module level
const { SitemapStream: SitemapStreamClass } = require('./smartsitemap.classes.sitemapstream.js');
const stream = new SitemapStreamClass(this.options);
// Push all URLs into the stream asynchronously
process.nextTick(() => {
for (const url of this.urls) {
stream.pushUrl(url);
}
stream.finish();
});
return stream;
}
// ──────────────────────────────────────────────
// Inspection
// ──────────────────────────────────────────────
/** Get the raw URL array */
getUrls(): interfaces.ISitemapUrl[] {
return [...this.urls];
}
/** Get the number of URLs */
get count(): number {
return this.urls.length;
}
/** Validate this sitemap against the protocol specification */
validate(): interfaces.IValidationResult {
return SitemapValidator.validateUrlset(this.urls, this.options);
}
/** Get statistics about this sitemap */
stats(): interfaces.ISitemapStats {
return SitemapValidator.computeStats(this.urls, this.options);
}
/** Get the options for this builder */
getOptions(): interfaces.ISitemapOptions {
return { ...this.options };
}
}

View File

@@ -0,0 +1,289 @@
import type * as interfaces from './interfaces/index.js';
const VALID_CHANGEFREQS: interfaces.TChangeFreq[] = [
'always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never',
];
const MAX_URL_LENGTH = 2048;
const MAX_URLS_PER_SITEMAP = 50000;
const MAX_SITEMAP_SIZE_BYTES = 52_428_800; // 50 MB
const MAX_IMAGES_PER_URL = 1000;
const MAX_VIDEO_TAGS = 32;
const MAX_VIDEO_DURATION = 28800;
const MAX_VIDEO_DESCRIPTION_LENGTH = 2048;
/**
* Validates sitemap URLs and fields against the sitemap protocol specification.
*/
export class SitemapValidator {
/**
* Validate a single URL entry.
*/
static validateUrl(url: interfaces.ISitemapUrl): interfaces.IValidationError[] {
const errors: interfaces.IValidationError[] = [];
// loc is required
if (!url.loc) {
errors.push({ field: 'loc', message: 'URL loc is required', url: url.loc });
} else {
errors.push(...SitemapValidator.validateUrlString(url.loc));
}
// priority range
if (url.priority != null && (url.priority < 0 || url.priority > 1)) {
errors.push({
field: 'priority',
message: 'Priority must be between 0.0 and 1.0',
url: url.loc,
});
}
// changefreq
if (url.changefreq && !VALID_CHANGEFREQS.includes(url.changefreq)) {
errors.push({
field: 'changefreq',
message: `Invalid changefreq "${url.changefreq}". Must be one of: ${VALID_CHANGEFREQS.join(', ')}`,
url: url.loc,
});
}
// lastmod date validation
if (url.lastmod != null) {
const date = url.lastmod instanceof Date ? url.lastmod : new Date(url.lastmod as any);
if (isNaN(date.getTime())) {
errors.push({
field: 'lastmod',
message: `Invalid lastmod date: "${url.lastmod}"`,
url: url.loc,
});
}
}
// Images
if (url.images) {
if (url.images.length > MAX_IMAGES_PER_URL) {
errors.push({
field: 'images',
message: `Maximum ${MAX_IMAGES_PER_URL} images per URL, got ${url.images.length}`,
url: url.loc,
});
}
for (const img of url.images) {
if (!img.loc) {
errors.push({ field: 'image:loc', message: 'Image loc is required', url: url.loc });
}
}
}
// Videos
if (url.videos) {
for (const vid of url.videos) {
if (!vid.thumbnailLoc) {
errors.push({ field: 'video:thumbnail_loc', message: 'Video thumbnail_loc is required', url: url.loc });
}
if (!vid.title) {
errors.push({ field: 'video:title', message: 'Video title is required', url: url.loc });
}
if (!vid.description) {
errors.push({ field: 'video:description', message: 'Video description is required', url: url.loc });
}
if (vid.description && vid.description.length > MAX_VIDEO_DESCRIPTION_LENGTH) {
errors.push({
field: 'video:description',
message: `Video description exceeds ${MAX_VIDEO_DESCRIPTION_LENGTH} chars`,
url: url.loc,
});
}
if (!vid.contentLoc && !vid.playerLoc) {
errors.push({
field: 'video:content_loc',
message: 'Video must have at least one of contentLoc or playerLoc',
url: url.loc,
});
}
if (vid.duration != null && (vid.duration < 1 || vid.duration > MAX_VIDEO_DURATION)) {
errors.push({
field: 'video:duration',
message: `Video duration must be 1${MAX_VIDEO_DURATION} seconds`,
url: url.loc,
});
}
if (vid.rating != null && (vid.rating < 0 || vid.rating > 5)) {
errors.push({
field: 'video:rating',
message: 'Video rating must be 0.05.0',
url: url.loc,
});
}
if (vid.tags && vid.tags.length > MAX_VIDEO_TAGS) {
errors.push({
field: 'video:tag',
message: `Maximum ${MAX_VIDEO_TAGS} video tags, got ${vid.tags.length}`,
url: url.loc,
});
}
}
}
// News
if (url.news) {
if (!url.news.publication?.name) {
errors.push({ field: 'news:publication:name', message: 'News publication name is required', url: url.loc });
}
if (!url.news.publication?.language) {
errors.push({ field: 'news:publication:language', message: 'News publication language is required', url: url.loc });
}
if (!url.news.title) {
errors.push({ field: 'news:title', message: 'News title is required', url: url.loc });
}
if (url.news.publicationDate == null) {
errors.push({ field: 'news:publication_date', message: 'News publication date is required', url: url.loc });
}
}
// Alternates
if (url.alternates) {
for (const alt of url.alternates) {
if (!alt.hreflang) {
errors.push({ field: 'xhtml:link:hreflang', message: 'Alternate hreflang is required', url: url.loc });
}
if (!alt.href) {
errors.push({ field: 'xhtml:link:href', message: 'Alternate href is required', url: url.loc });
}
}
}
return errors;
}
/**
* Validate an entire URL array.
*/
static validateUrlset(urls: interfaces.ISitemapUrl[], options?: interfaces.ISitemapOptions): interfaces.IValidationResult {
const errors: interfaces.IValidationError[] = [];
const warnings: interfaces.IValidationWarning[] = [];
for (const url of urls) {
errors.push(...SitemapValidator.validateUrl(url));
}
// Check for duplicates
const locs = new Set<string>();
for (const url of urls) {
if (locs.has(url.loc)) {
warnings.push({
field: 'loc',
message: `Duplicate URL: "${url.loc}"`,
url: url.loc,
});
}
locs.add(url.loc);
}
const maxUrls = options?.maxUrlsPerSitemap ?? MAX_URLS_PER_SITEMAP;
// Size limit warnings
if (urls.length > maxUrls) {
warnings.push({
field: 'urlset',
message: `URL count (${urls.length}) exceeds maximum of ${maxUrls} per sitemap. Use toSitemapSet() for auto-splitting.`,
});
}
const stats = SitemapValidator.computeStats(urls, options);
return {
valid: errors.length === 0,
errors,
warnings,
stats,
};
}
/**
* Validate a URL string for proper format.
*/
static validateUrlString(url: string): interfaces.IValidationError[] {
const errors: interfaces.IValidationError[] = [];
if (url.length > MAX_URL_LENGTH) {
errors.push({
field: 'loc',
message: `URL exceeds maximum length of ${MAX_URL_LENGTH} characters`,
url,
});
}
try {
new URL(url);
} catch {
errors.push({
field: 'loc',
message: `Invalid URL: "${url}"`,
url,
});
}
return errors;
}
/**
* Compute statistics for a set of URLs.
*/
static computeStats(urls: interfaces.ISitemapUrl[], options?: interfaces.ISitemapOptions): interfaces.ISitemapStats {
let imageCount = 0;
let videoCount = 0;
let newsCount = 0;
let alternateCount = 0;
for (const url of urls) {
if (url.images) imageCount += url.images.length;
if (url.videos) videoCount += url.videos.length;
if (url.news) newsCount++;
if (url.alternates) alternateCount += url.alternates.length;
}
// Rough estimate: ~200 bytes per basic URL entry, more for extensions
const estimatedSizeBytes =
200 + // XML header + urlset tags
urls.length * 200 + // base URL entries
imageCount * 150 +
videoCount * 400 +
newsCount * 300 +
alternateCount * 100;
const maxUrls = options?.maxUrlsPerSitemap ?? MAX_URLS_PER_SITEMAP;
return {
urlCount: urls.length,
imageCount,
videoCount,
newsCount,
alternateCount,
estimatedSizeBytes,
needsIndex: urls.length > maxUrls,
};
}
/**
* Check size limits for a URL set.
*/
static checkSizeLimits(urls: interfaces.ISitemapUrl[], options?: interfaces.ISitemapOptions): {
withinLimits: boolean;
urlCount: number;
maxUrls: number;
estimatedSizeBytes: number;
maxSizeBytes: number;
} {
const maxUrls = Math.min(options?.maxUrlsPerSitemap ?? MAX_URLS_PER_SITEMAP, MAX_URLS_PER_SITEMAP);
const stats = SitemapValidator.computeStats(urls, options);
return {
withinLimits: urls.length <= maxUrls && stats.estimatedSizeBytes <= MAX_SITEMAP_SIZE_BYTES,
urlCount: urls.length,
maxUrls,
estimatedSizeBytes: stats.estimatedSizeBytes,
maxSizeBytes: MAX_SITEMAP_SIZE_BYTES,
};
}
}

View File

@@ -0,0 +1,294 @@
import * as plugins from './smartsitemap.plugins.js';
import type * as interfaces from './interfaces/index.js';
// Sitemap XML namespace constants
const NS_SITEMAP = 'http://www.sitemaps.org/schemas/sitemap/0.9';
const NS_IMAGE = 'http://www.google.com/schemas/sitemap-image/1.1';
const NS_VIDEO = 'http://www.google.com/schemas/sitemap-video/1.1';
const NS_NEWS = 'http://www.google.com/schemas/sitemap-news/0.9';
const NS_XHTML = 'http://www.w3.org/1999/xhtml';
/**
* Handles all XML generation for sitemaps.
* Supports proper escaping, namespace detection, date formatting,
* XSL stylesheet references, and pretty printing.
*/
export class XmlRenderer {
/**
* Escape a string for use in XML content.
* Handles the 5 XML special characters.
*/
static escapeXml(str: string): string {
return str
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&apos;');
}
/**
* Format a date value (Date, ISO string, or Unix timestamp in ms)
* to W3C Datetime format suitable for sitemaps.
*/
static formatDate(date: Date | string | number): string {
if (date instanceof Date) {
return date.toISOString();
}
if (typeof date === 'number') {
return new Date(date).toISOString();
}
// Already a string — validate it parses
const parsed = new Date(date);
if (isNaN(parsed.getTime())) {
return date; // Return as-is if unparseable
}
return parsed.toISOString();
}
/**
* Detect which XML namespaces are needed based on URL entries.
*/
static detectNamespaces(urls: interfaces.ISitemapUrl[]): Record<string, string> {
const ns: Record<string, string> = {
'@_xmlns': NS_SITEMAP,
};
for (const url of urls) {
if (url.images && url.images.length > 0) {
ns['@_xmlns:image'] = NS_IMAGE;
}
if (url.videos && url.videos.length > 0) {
ns['@_xmlns:video'] = NS_VIDEO;
}
if (url.news) {
ns['@_xmlns:news'] = NS_NEWS;
}
if (url.alternates && url.alternates.length > 0) {
ns['@_xmlns:xhtml'] = NS_XHTML;
}
}
return ns;
}
/**
* Render a URL array to sitemap XML string.
*/
static renderUrlset(urls: interfaces.ISitemapUrl[], options?: interfaces.ISitemapOptions): string {
const namespaces = XmlRenderer.detectNamespaces(urls);
const urlElements = urls.map((url) => XmlRenderer.buildUrlElement(url, options));
const xmlObj: any = {
urlset: {
...namespaces,
url: urlElements,
},
};
const smartXml = new plugins.smartxml.SmartXml();
let xml = smartXml.createXmlFromObject(xmlObj);
// Insert XSL stylesheet processing instruction if specified
if (options?.xslUrl) {
xml = XmlRenderer.insertXslInstruction(xml, options.xslUrl);
}
return xml;
}
/**
* Render a sitemap index XML string.
*/
static renderIndex(entries: interfaces.ISitemapIndexEntry[], options?: interfaces.ISitemapOptions): string {
const sitemapElements = entries.map((entry) => {
const el: any = {
loc: XmlRenderer.escapeXml(entry.loc),
};
if (entry.lastmod != null) {
el.lastmod = XmlRenderer.formatDate(entry.lastmod);
}
return el;
});
const xmlObj: any = {
sitemapindex: {
'@_xmlns': NS_SITEMAP,
sitemap: sitemapElements,
},
};
const smartXml = new plugins.smartxml.SmartXml();
let xml = smartXml.createXmlFromObject(xmlObj);
if (options?.xslUrl) {
xml = XmlRenderer.insertXslInstruction(xml, options.xslUrl);
}
return xml;
}
/**
* Render URLs as plain text (one URL per line).
*/
static renderTxt(urls: interfaces.ISitemapUrl[]): string {
return urls.map((u) => u.loc).join('\n');
}
/**
* Render URLs as JSON.
*/
static renderJson(urls: interfaces.ISitemapUrl[]): string {
return JSON.stringify(urls, null, 2);
}
/**
* Build a single <url> element object for use with smartxml.
*/
private static buildUrlElement(url: interfaces.ISitemapUrl, options?: interfaces.ISitemapOptions): any {
const el: any = {
loc: XmlRenderer.escapeXml(url.loc),
};
// lastmod
if (url.lastmod != null) {
el.lastmod = XmlRenderer.formatDate(url.lastmod);
}
// changefreq (use default if not specified)
const changefreq = url.changefreq ?? options?.defaultChangeFreq;
if (changefreq) {
el.changefreq = changefreq;
}
// priority (use default if not specified)
const priority = url.priority ?? options?.defaultPriority;
if (priority != null) {
el.priority = priority.toFixed(1);
}
// Image extension
if (url.images && url.images.length > 0) {
el['image:image'] = url.images.map((img) => XmlRenderer.buildImageElement(img));
}
// Video extension
if (url.videos && url.videos.length > 0) {
el['video:video'] = url.videos.map((vid) => XmlRenderer.buildVideoElement(vid));
}
// News extension
if (url.news) {
el['news:news'] = XmlRenderer.buildNewsElement(url.news);
}
// hreflang alternates
if (url.alternates && url.alternates.length > 0) {
el['xhtml:link'] = url.alternates.map((alt) => ({
'@_rel': 'alternate',
'@_hreflang': alt.hreflang,
'@_href': XmlRenderer.escapeXml(alt.href),
}));
}
return el;
}
/**
* Build an <image:image> element object.
*/
private static buildImageElement(img: interfaces.ISitemapImage): any {
const el: any = {
'image:loc': XmlRenderer.escapeXml(img.loc),
};
if (img.caption) {
el['image:caption'] = XmlRenderer.escapeXml(img.caption);
}
if (img.title) {
el['image:title'] = XmlRenderer.escapeXml(img.title);
}
if (img.geoLocation) {
el['image:geo_location'] = XmlRenderer.escapeXml(img.geoLocation);
}
if (img.licenseUrl) {
el['image:license'] = XmlRenderer.escapeXml(img.licenseUrl);
}
return el;
}
/**
* Build a <video:video> element object.
*/
private static buildVideoElement(vid: interfaces.ISitemapVideo): any {
const el: any = {
'video:thumbnail_loc': XmlRenderer.escapeXml(vid.thumbnailLoc),
'video:title': XmlRenderer.escapeXml(vid.title),
'video:description': XmlRenderer.escapeXml(vid.description),
};
if (vid.contentLoc) {
el['video:content_loc'] = XmlRenderer.escapeXml(vid.contentLoc);
}
if (vid.playerLoc) {
el['video:player_loc'] = XmlRenderer.escapeXml(vid.playerLoc);
}
if (vid.duration != null) {
el['video:duration'] = vid.duration;
}
if (vid.rating != null) {
el['video:rating'] = vid.rating;
}
if (vid.viewCount != null) {
el['video:view_count'] = vid.viewCount;
}
if (vid.publicationDate != null) {
el['video:publication_date'] = XmlRenderer.formatDate(vid.publicationDate);
}
if (vid.familyFriendly != null) {
el['video:family_friendly'] = vid.familyFriendly ? 'yes' : 'no';
}
if (vid.tags && vid.tags.length > 0) {
el['video:tag'] = vid.tags;
}
if (vid.live != null) {
el['video:live'] = vid.live ? 'yes' : 'no';
}
if (vid.requiresSubscription != null) {
el['video:requires_subscription'] = vid.requiresSubscription ? 'yes' : 'no';
}
return el;
}
/**
* Build a <news:news> element object.
*/
private static buildNewsElement(news: interfaces.ISitemapNews): any {
const el: any = {
'news:publication': {
'news:name': XmlRenderer.escapeXml(news.publication.name),
'news:language': news.publication.language,
},
'news:publication_date': XmlRenderer.formatDate(news.publicationDate),
'news:title': XmlRenderer.escapeXml(news.title),
};
if (news.keywords) {
const kw = Array.isArray(news.keywords) ? news.keywords.join(', ') : news.keywords;
el['news:keywords'] = XmlRenderer.escapeXml(kw);
}
return el;
}
/**
* Insert an XSL stylesheet processing instruction after the XML declaration.
*/
private static insertXslInstruction(xml: string, xslUrl: string): string {
const pi = `<?xml-stylesheet type="text/xsl" href="${XmlRenderer.escapeXml(xslUrl)}"?>`;
return xml.replace(
'<?xml version="1.0" encoding="UTF-8"?>',
`<?xml version="1.0" encoding="UTF-8"?>\n${pi}`,
);
}
}

View File

@@ -0,0 +1,61 @@
import * as plugins from './smartsitemap.plugins.js';
import type * as interfaces from './interfaces/index.js';
/**
* Imports sitemap configuration from YAML format.
* Supports the enhanced YAML schema with per-frequency URL groups,
* default settings, and feed imports.
*/
export class YamlImporter {
/**
* Parse a YAML config string and return ISitemapUrl entries.
*/
static async parseConfig(yamlString: string): Promise<interfaces.ISitemapUrl[]> {
const config = (await plugins.smartyaml.yamlStringToObject(yamlString)) as interfaces.ISitemapYamlConfig;
const urls: interfaces.ISitemapUrl[] = [];
const baseUrl = config.baseUrl?.replace(/\/$/, '') ?? '';
// Process URL groups by frequency
if (config.urls) {
const frequencies: interfaces.TChangeFreq[] = [
'always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never',
];
for (const freq of frequencies) {
const urlList = config.urls[freq];
if (urlList && Array.isArray(urlList)) {
for (const path of urlList) {
const loc = path.startsWith('http') ? path : `${baseUrl}${path.startsWith('/') ? '' : '/'}${path}`;
urls.push({
loc,
changefreq: freq,
priority: config.defaults?.priority,
});
}
}
}
}
// Process feed imports
if (config.feeds && Array.isArray(config.feeds)) {
// Dynamic import to avoid circular deps at module load time
const { FeedImporter } = await import('./smartsitemap.classes.feedimporter.js');
for (const feedConfig of config.feeds) {
if (feedConfig.type === 'news') {
const newsUrls = await FeedImporter.fromUrlAsNews(
feedConfig.url,
feedConfig.publicationName ?? 'Unknown',
feedConfig.publicationLanguage ?? 'en',
);
urls.push(...newsUrls);
} else {
const standardUrls = await FeedImporter.fromUrl(feedConfig.url);
urls.push(...standardUrls);
}
}
}
return urls;
}
}

View File

@@ -1,11 +1,17 @@
// node built-ins
import * as zlib from 'zlib';
import { promisify } from 'util';
import { Readable } from 'stream';
export { zlib, promisify, Readable };
// pushrocks scope
import * as smartcache from '@push.rocks/smartcache';
import * as smartfeed from '@push.rocks/smartfeed';
import * as smartxml from '@push.rocks/smartxml';
import * as smartyaml from '@push.rocks/smartyaml';
import * as webrequest from '@push.rocks/webrequest';
export { smartcache, smartfeed, smartxml, smartyaml, webrequest };
export { smartfeed, smartxml, smartyaml, webrequest };
// tsclass
import * as tsclass from '@tsclass/tsclass';