BREAKING CHANGE(api): redesign smartsitemap around builder-based sitemap creation, parsing, validation, and import utilities

This commit is contained in:
2026-03-20 14:03:33 +00:00
parent 61f6bcebd4
commit 4e707347dd
22 changed files with 4843 additions and 2196 deletions

View File

@@ -1,25 +1,484 @@
import { expect, tap } from '@push.rocks/tapbundle';
import * as smartsitemap from '../ts/index.js';
let testSmartsitemap: smartsitemap.SmartSitemap;
// ──────────────────────────────────────────────
// Basic builder tests
// ──────────────────────────────────────────────
tap.test('should create an instance of Smartsitemap', async () => {
testSmartsitemap = new smartsitemap.SmartSitemap();
expect(testSmartsitemap).toBeInstanceOf(smartsitemap.SmartSitemap);
tap.test('SmartSitemap.create() should return a UrlsetBuilder', async () => {
const builder = smartsitemap.SmartSitemap.create();
expect(builder).toBeInstanceOf(smartsitemap.UrlsetBuilder);
});
tap.test('should create a sitemap from feed', async () => {
const sitemapString = await testSmartsitemap.createSitemapNewsFromFeedUrl(
'https://coffee.link/rss/',
);
console.log(sitemapString);
tap.test('should create a basic sitemap with addUrl()', async () => {
const xml = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/')
.addUrl('https://example.com/about')
.addUrl('https://example.com/blog')
.toXml();
expect(xml).toInclude('<?xml version="1.0" encoding="UTF-8"?>');
expect(xml).toInclude('<urlset');
expect(xml).toInclude('xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"');
expect(xml).toInclude('<loc>https://example.com/</loc>');
expect(xml).toInclude('<loc>https://example.com/about</loc>');
expect(xml).toInclude('<loc>https://example.com/blog</loc>');
});
tap.test('should parse a sitemap', async () => {
const result = await testSmartsitemap.parseSitemapUrl(
'https://www.theverge.com/sitemaps/google_news',
);
// console.log(result.urlset.url);
tap.test('should apply default changefreq and priority', async () => {
const xml = smartsitemap.SmartSitemap.create()
.setDefaultChangeFreq('weekly')
.setDefaultPriority(0.5)
.addUrl('https://example.com/')
.toXml();
expect(xml).toInclude('<changefreq>weekly</changefreq>');
expect(xml).toInclude('<priority>0.5</priority>');
});
tap.start();
tap.test('should support per-URL changefreq and priority override', async () => {
const xml = smartsitemap.SmartSitemap.create()
.setDefaultChangeFreq('weekly')
.setDefaultPriority(0.5)
.add({
loc: 'https://example.com/',
changefreq: 'daily',
priority: 1.0,
})
.toXml();
expect(xml).toInclude('<changefreq>daily</changefreq>');
expect(xml).toInclude('<priority>1.0</priority>');
expect(xml).not.toInclude('<changefreq>weekly</changefreq>');
});
tap.test('should support lastmod as Date, string, and number', async () => {
const date = new Date('2025-06-15T00:00:00.000Z');
const xml = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/a', date)
.addUrl('https://example.com/b', '2025-06-15')
.addUrl('https://example.com/c', date.getTime())
.toXml();
expect(xml).toInclude('<lastmod>2025-06-15T00:00:00.000Z</lastmod>');
});
// ──────────────────────────────────────────────
// URL escaping
// ──────────────────────────────────────────────
tap.test('should escape XML special characters in URLs', async () => {
const xml = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/search?q=foo&bar=baz')
.toXml();
expect(xml).toInclude('&amp;');
expect(xml).not.toInclude('&bar=baz');
});
// ──────────────────────────────────────────────
// Image extension
// ──────────────────────────────────────────────
tap.test('should generate image sitemap extension', async () => {
const xml = smartsitemap.SmartSitemap.create()
.add({
loc: 'https://example.com/gallery',
images: [
{ loc: 'https://example.com/img/photo1.jpg', title: 'Photo 1' },
{ loc: 'https://example.com/img/photo2.jpg', caption: 'A nice photo' },
],
})
.toXml();
expect(xml).toInclude('xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"');
expect(xml).toInclude('<image:image>');
expect(xml).toInclude('<image:loc>https://example.com/img/photo1.jpg</image:loc>');
expect(xml).toInclude('<image:title>Photo 1</image:title>');
expect(xml).toInclude('<image:caption>A nice photo</image:caption>');
});
// ──────────────────────────────────────────────
// Video extension
// ──────────────────────────────────────────────
tap.test('should generate video sitemap extension', async () => {
const xml = smartsitemap.SmartSitemap.create()
.add({
loc: 'https://example.com/video-page',
videos: [
{
thumbnailLoc: 'https://example.com/thumb.jpg',
title: 'My Video',
description: 'A great video about testing.',
contentLoc: 'https://example.com/video.mp4',
duration: 120,
rating: 4.5,
},
],
})
.toXml();
expect(xml).toInclude('xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"');
expect(xml).toInclude('<video:video>');
expect(xml).toInclude('<video:thumbnail_loc>https://example.com/thumb.jpg</video:thumbnail_loc>');
expect(xml).toInclude('<video:title>My Video</video:title>');
expect(xml).toInclude('<video:duration>120</video:duration>');
expect(xml).toInclude('<video:rating>4.5</video:rating>');
});
// ──────────────────────────────────────────────
// News extension
// ──────────────────────────────────────────────
tap.test('should generate news sitemap', async () => {
const xml = smartsitemap.SmartSitemap.createNews({
publicationName: 'The Daily Test',
publicationLanguage: 'en',
})
.addNewsUrl(
'https://example.com/news/article-1',
'Breaking: Tests Pass!',
new Date('2025-06-15T12:00:00Z'),
['testing', 'CI'],
)
.toXml();
expect(xml).toInclude('xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"');
expect(xml).toInclude('<news:news>');
expect(xml).toInclude('<news:name>The Daily Test</news:name>');
expect(xml).toInclude('<news:language>en</news:language>');
expect(xml).toInclude('<news:title>Breaking: Tests Pass!</news:title>');
expect(xml).toInclude('<news:keywords>testing, CI</news:keywords>');
expect(xml).toInclude('<news:publication_date>');
});
// ──────────────────────────────────────────────
// hreflang alternates
// ──────────────────────────────────────────────
tap.test('should generate hreflang alternate links', async () => {
const xml = smartsitemap.SmartSitemap.create()
.add({
loc: 'https://example.com/page',
alternates: [
{ hreflang: 'en', href: 'https://example.com/page' },
{ hreflang: 'de', href: 'https://example.com/de/page' },
{ hreflang: 'x-default', href: 'https://example.com/page' },
],
})
.toXml();
expect(xml).toInclude('xmlns:xhtml="http://www.w3.org/1999/xhtml"');
expect(xml).toInclude('xhtml:link');
});
// ──────────────────────────────────────────────
// XSL stylesheet
// ──────────────────────────────────────────────
tap.test('should include XSL stylesheet processing instruction', async () => {
const xml = smartsitemap.SmartSitemap.create()
.setXslUrl('/sitemap.xsl')
.addUrl('https://example.com/')
.toXml();
expect(xml).toInclude('<?xml-stylesheet type="text/xsl" href="/sitemap.xsl"?>');
});
// ──────────────────────────────────────────────
// Builder operations: fromUrls, merge, dedupe, filter, sort
// ──────────────────────────────────────────────
tap.test('SmartSitemap.fromUrls() should create builder from string array', async () => {
const builder = smartsitemap.SmartSitemap.fromUrls([
'https://example.com/a',
'https://example.com/b',
]);
expect(builder.count).toEqual(2);
});
tap.test('should merge two builders', async () => {
const a = smartsitemap.SmartSitemap.create().addUrl('https://example.com/a');
const b = smartsitemap.SmartSitemap.create().addUrl('https://example.com/b');
const merged = smartsitemap.SmartSitemap.create().merge(a).merge(b);
expect(merged.count).toEqual(2);
});
tap.test('should deduplicate URLs', async () => {
const builder = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/a')
.addUrl('https://example.com/a')
.addUrl('https://example.com/b')
.dedupe();
expect(builder.count).toEqual(2);
});
tap.test('should filter URLs', async () => {
const builder = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/keep')
.addUrl('https://example.com/remove')
.filter((url) => !url.loc.includes('remove'));
expect(builder.count).toEqual(1);
expect(builder.getUrls()[0].loc).toEqual('https://example.com/keep');
});
tap.test('should sort URLs', async () => {
const builder = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/z')
.addUrl('https://example.com/a')
.addUrl('https://example.com/m')
.sort();
const urls = builder.getUrls();
expect(urls[0].loc).toEqual('https://example.com/a');
expect(urls[1].loc).toEqual('https://example.com/m');
expect(urls[2].loc).toEqual('https://example.com/z');
});
// ──────────────────────────────────────────────
// Output formats
// ──────────────────────────────────────────────
tap.test('should export as TXT format', async () => {
const txt = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/a')
.addUrl('https://example.com/b')
.toTxt();
expect(txt).toEqual('https://example.com/a\nhttps://example.com/b');
});
tap.test('should export as JSON format', async () => {
const json = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/a')
.toJson();
const parsed = JSON.parse(json);
expect(parsed).toBeArray();
expect(parsed[0].loc).toEqual('https://example.com/a');
});
tap.test('should export as gzip buffer', async () => {
const buffer = await smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/')
.toGzipBuffer();
expect(buffer).toBeInstanceOf(Buffer);
expect(buffer.length).toBeGreaterThan(0);
});
// ──────────────────────────────────────────────
// Auto-splitting (sitemap index)
// ──────────────────────────────────────────────
tap.test('should auto-split into sitemap index when URLs exceed max', async () => {
const builder = smartsitemap.SmartSitemap.create({
maxUrlsPerSitemap: 3,
baseUrl: 'https://example.com',
});
for (let i = 0; i < 7; i++) {
builder.addUrl(`https://example.com/page-${i}`);
}
const set = builder.toSitemapSet();
expect(set.needsIndex).toBeTrue();
expect(set.indexXml).toBeTruthy();
expect(set.sitemaps.length).toEqual(3); // ceil(7/3) = 3
expect(set.indexXml).toInclude('<sitemapindex');
expect(set.indexXml).toInclude('sitemap-1.xml');
expect(set.indexXml).toInclude('sitemap-2.xml');
expect(set.indexXml).toInclude('sitemap-3.xml');
});
tap.test('should not create index when URLs fit in one sitemap', async () => {
const builder = smartsitemap.SmartSitemap.create({ maxUrlsPerSitemap: 100 });
builder.addUrl('https://example.com/a');
builder.addUrl('https://example.com/b');
const set = builder.toSitemapSet();
expect(set.needsIndex).toBeFalse();
expect(set.indexXml).toBeNull();
expect(set.sitemaps.length).toEqual(1);
});
// ──────────────────────────────────────────────
// Sitemap Index Builder
// ──────────────────────────────────────────────
tap.test('SitemapIndexBuilder should generate valid index XML', async () => {
const xml = smartsitemap.SmartSitemap.createIndex()
.addSitemap('https://example.com/sitemap-1.xml')
.addSitemap('https://example.com/sitemap-2.xml', new Date('2025-06-15'))
.toXml();
expect(xml).toInclude('<sitemapindex');
expect(xml).toInclude('xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"');
expect(xml).toInclude('<loc>https://example.com/sitemap-1.xml</loc>');
expect(xml).toInclude('<loc>https://example.com/sitemap-2.xml</loc>');
expect(xml).toInclude('<lastmod>2025-06-15');
});
// ──────────────────────────────────────────────
// Validation
// ──────────────────────────────────────────────
tap.test('should validate URLs and detect errors', async () => {
const result = smartsitemap.SmartSitemap.create()
.addUrl('not-a-valid-url')
.add({ loc: 'https://example.com/', priority: 1.5 })
.validate();
expect(result.valid).toBeFalse();
expect(result.errors.length).toBeGreaterThan(0);
const locError = result.errors.find((e) => e.url === 'not-a-valid-url');
expect(locError).toBeTruthy();
const priorityError = result.errors.find((e) => e.field === 'priority');
expect(priorityError).toBeTruthy();
});
tap.test('should pass validation for valid sitemap', async () => {
const result = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/')
.add({ loc: 'https://example.com/about', priority: 0.8, changefreq: 'weekly' })
.validate();
expect(result.valid).toBeTrue();
expect(result.errors.length).toEqual(0);
});
// ──────────────────────────────────────────────
// Statistics
// ──────────────────────────────────────────────
tap.test('should compute stats', async () => {
const stats = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/')
.add({
loc: 'https://example.com/gallery',
images: [{ loc: 'https://example.com/img/1.jpg' }, { loc: 'https://example.com/img/2.jpg' }],
})
.add({
loc: 'https://example.com/news',
news: {
publication: { name: 'Test', language: 'en' },
publicationDate: new Date(),
title: 'Article',
},
})
.stats();
expect(stats.urlCount).toEqual(3);
expect(stats.imageCount).toEqual(2);
expect(stats.newsCount).toEqual(1);
expect(stats.estimatedSizeBytes).toBeGreaterThan(0);
});
// ──────────────────────────────────────────────
// YAML import
// ──────────────────────────────────────────────
tap.test('should import from YAML config', async () => {
const yaml = `
baseUrl: https://example.com
defaults:
priority: 0.5
urls:
daily:
- /
- /blog
monthly:
- /about
- /contact
yearly:
- /privacy
`;
const builder = await smartsitemap.SmartSitemap.fromYaml(yaml);
expect(builder.count).toEqual(5);
const xml = builder.toXml();
expect(xml).toInclude('https://example.com/');
expect(xml).toInclude('<changefreq>daily</changefreq>');
expect(xml).toInclude('<changefreq>monthly</changefreq>');
expect(xml).toInclude('<changefreq>yearly</changefreq>');
});
// ──────────────────────────────────────────────
// SitemapStream
// ──────────────────────────────────────────────
tap.test('should stream sitemap XML', async () => {
const stream = new smartsitemap.SitemapStream();
const chunks: string[] = [];
stream.on('data', (chunk: string) => {
chunks.push(chunk);
});
const done = new Promise<void>((resolve) => {
stream.on('end', resolve);
});
stream.pushUrl({ loc: 'https://example.com/' });
stream.pushUrl({ loc: 'https://example.com/about' });
stream.finish();
await done;
const xml = chunks.join('');
expect(xml).toInclude('<?xml version="1.0" encoding="UTF-8"?>');
expect(xml).toInclude('<urlset');
expect(xml).toInclude('<loc>https://example.com/</loc>');
expect(xml).toInclude('<loc>https://example.com/about</loc>');
expect(xml).toInclude('</urlset>');
expect(stream.count).toEqual(2);
});
// ──────────────────────────────────────────────
// Sitemap parsing
// ──────────────────────────────────────────────
tap.test('should parse a urlset sitemap XML', async () => {
const xml = smartsitemap.SmartSitemap.create()
.addUrl('https://example.com/', new Date('2025-06-15'))
.add({ loc: 'https://example.com/about', changefreq: 'monthly' })
.toXml();
const parsed = await smartsitemap.SmartSitemap.parse(xml);
expect(parsed.type).toEqual('urlset');
expect(parsed.urls.length).toEqual(2);
expect(parsed.urls[0].loc).toEqual('https://example.com/');
});
tap.test('should detect sitemap type', async () => {
expect(smartsitemap.SitemapParser.detectType('<urlset>')).toEqual('urlset');
expect(smartsitemap.SitemapParser.detectType('<sitemapindex>')).toEqual('sitemapindex');
expect(smartsitemap.SitemapParser.detectType('<html>')).toEqual('unknown');
});
// ──────────────────────────────────────────────
// XmlRenderer utility
// ──────────────────────────────────────────────
tap.test('XmlRenderer.escapeXml should escape special characters', async () => {
expect(smartsitemap.XmlRenderer.escapeXml('a&b')).toEqual('a&amp;b');
expect(smartsitemap.XmlRenderer.escapeXml('<tag>')).toEqual('&lt;tag&gt;');
expect(smartsitemap.XmlRenderer.escapeXml('"quoted"')).toEqual('&quot;quoted&quot;');
});
tap.test('XmlRenderer.formatDate should handle all date types', async () => {
const iso = smartsitemap.XmlRenderer.formatDate(new Date('2025-06-15T00:00:00.000Z'));
expect(iso).toEqual('2025-06-15T00:00:00.000Z');
const fromString = smartsitemap.XmlRenderer.formatDate('2025-06-15');
expect(fromString).toInclude('2025-06-15');
const fromNumber = smartsitemap.XmlRenderer.formatDate(new Date('2025-06-15').getTime());
expect(fromNumber).toInclude('2025-06-15');
});
export default tap.start();