feat(core): Enhance commit fetching with caching, concurrency improvements, and dependency upgrades

This commit is contained in:
Philipp Kunz 2025-04-25 20:44:32 +00:00
parent 6032867a13
commit e40e008429
7 changed files with 2620 additions and 1300 deletions

View File

@ -1,5 +1,13 @@
# Changelog
## 2025-04-25 - 1.7.0 - feat(core)
Enhance commit fetching with caching, concurrency improvements, and dependency upgrades
- Updated development dependencies (@git.zone/tsbuild, @git.zone/tsbundle, @git.zone/tstest, @push.rocks/tapbundle, @types/node) and dependency versions
- Introduced optional caching options (enableCache, cacheWindowMs, enableNpmCheck) in the CodeFeed constructor to optimize commit retrieval
- Refactored commit fetching to use AsyncExecutionStack for controlled concurrency and improved performance
- Removed deprecated ts/codefeed.plugins.ts in favor of a consolidated plugins.ts module
## 2024-12-16 - 1.6.5 - fix(CodeFeed)
Fixed timestamp initialization and commit fetching timeframe

View File

@ -16,18 +16,19 @@
"buildDocs": "(tsdoc)"
},
"devDependencies": {
"@git.zone/tsbuild": "^2.1.25",
"@git.zone/tsbundle": "^2.0.5",
"@git.zone/tsbuild": "^2.3.2",
"@git.zone/tsbundle": "^2.2.5",
"@git.zone/tsrun": "^1.2.46",
"@git.zone/tstest": "^1.0.44",
"@push.rocks/tapbundle": "^5.0.15",
"@types/node": "^22.10.2"
"@git.zone/tstest": "^1.0.96",
"@push.rocks/tapbundle": "^5.6.3",
"@types/node": "^22.15.2"
},
"dependencies": {
"@push.rocks/lik": "^6.2.2",
"@push.rocks/qenv": "^6.1.0",
"@push.rocks/smartnpm": "^2.0.4",
"@push.rocks/smarttime": "^4.1.1",
"@push.rocks/smartxml": "^1.0.8"
"@push.rocks/smartxml": "^1.1.1"
},
"repository": {
"type": "git",
@ -61,5 +62,6 @@
"code analysis",
"activity feed",
"version control"
]
],
"packageManager": "pnpm@10.7.0+sha512.6b865ad4b62a1d9842b61d674a393903b871d9244954f652b8842c2b553c72176b278f64c463e52d40fff8aba385c235c8c9ecf5cc7de4fd78b8bb6d49633ab6"
}

3368
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@ -9,12 +9,22 @@ let testCodeFeed: codefeed.CodeFeed;
tap.test('first test', async () => {
const token = await testQenv.getEnvVarOnDemand('GITEA_TOKEN');
// console.log('token', token);
testCodeFeed = new codefeed.CodeFeed('https://code.foss.global', token);
// seed lastRunTimestamp to 1 year ago and enable in-memory caching for 1 year
const oneYearMs = 365 * 24 * 60 * 60 * 1000;
const oneYearAgo = new Date(Date.now() - oneYearMs).toISOString();
testCodeFeed = new codefeed.CodeFeed(
'https://code.foss.global',
token,
oneYearAgo,
{ enableCache: true, cacheWindowMs: oneYearMs, enableNpmCheck: false }
);
expect(testCodeFeed).toBeInstanceOf(codefeed.CodeFeed);
});
tap.test('fetchAllCommitsFromInstance', async () => {
const commits = await testCodeFeed.fetchAllCommitsFromInstance();
// log the actual results so we can inspect them
console.log('Fetched commits:', JSON.stringify(commits, null, 2));
expect(commits).toBeArray();
expect(commits.length).toBeGreaterThan(0);
// expect(commits[0]).toBeTypeofObject();

View File

@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@foss.global/codefeed',
version: '1.6.5',
version: '1.7.0',
description: 'The @foss.global/codefeed module is designed for generating feeds from Gitea repositories, enhancing development workflows by processing commit data and repository activities.'
}

View File

@ -1,21 +1,187 @@
import * as plugins from './codefeed.plugins.js';
import * as plugins from './plugins.js';
export class CodeFeed {
private baseUrl: string;
private token?: string;
private npmRegistry = new plugins.smartnpm.NpmRegistry();
private smartxmlInstance = new plugins.smartxml.SmartXml();
private lastRunTimestamp: string;
private changelogContent: string;
// Raw changelog content for the current repository
private changelogContent: string = '';
// npm registry helper for published-on-npm checks
private npmRegistry: plugins.smartnpm.NpmRegistry;
// In-memory stateful cache of commits
private enableCache: boolean = false;
private cacheWindowMs?: number;
private cache: plugins.interfaces.ICommitResult[] = [];
// enable or disable npm publishedOnNpm checks (true by default)
private enableNpmCheck: boolean = true;
constructor(baseUrl: string, token?: string, lastRunTimestamp?: string) {
constructor(
baseUrl: string,
token?: string,
lastRunTimestamp?: string,
options?: {
enableCache?: boolean;
cacheWindowMs?: number;
enableNpmCheck?: boolean;
}
) {
this.baseUrl = baseUrl;
this.token = token;
this.lastRunTimestamp =
lastRunTimestamp || new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString();
lastRunTimestamp ?? new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString();
// configure stateful caching
this.enableCache = options?.enableCache ?? false;
this.cacheWindowMs = options?.cacheWindowMs;
this.enableNpmCheck = options?.enableNpmCheck ?? true;
this.cache = [];
// npm registry instance for version lookups
this.npmRegistry = new plugins.smartnpm.NpmRegistry();
console.log('CodeFeed initialized with last run timestamp:', this.lastRunTimestamp);
}
/**
* Fetch all new commits (since lastRunTimestamp) across all orgs and repos.
*/
public async fetchAllCommitsFromInstance(): Promise<plugins.interfaces.ICommitResult[]> {
// Controlled concurrency with AsyncExecutionStack
const stack = new plugins.lik.AsyncExecutionStack();
stack.setNonExclusiveMaxConcurrency(5);
// determine since timestamp for this run (stateful caching)
let effectiveSince = this.lastRunTimestamp;
if (this.enableCache && this.cache.length > 0) {
// use newest timestamp in cache to fetch only tail
effectiveSince = this.cache.reduce(
(max, c) => (c.timestamp > max ? c.timestamp : max),
effectiveSince
);
}
// 1) get all organizations
const orgs = await this.fetchAllOrganizations();
// 2) fetch repos per org in parallel
const repoLists = await Promise.all(
orgs.map((org) =>
stack.getNonExclusiveExecutionSlot(() => this.fetchRepositoriesForOrg(org))
)
);
// flatten to [{ owner, name }]
const allRepos = orgs.flatMap((org, i) =>
repoLists[i].map((r) => ({ owner: org, name: r.name }))
);
// 3) probe latest commit per repo and fetch full list only if new commits exist
const commitJobs = allRepos.map(({ owner, name }) =>
stack.getNonExclusiveExecutionSlot(async () => {
try {
// 3a) Probe the most recent commit (limit=1)
const probeResp = await this.fetchFunction(
`/api/v1/repos/${owner}/${name}/commits?limit=1`,
{ headers: this.token ? { Authorization: `token ${this.token}` } : {} }
);
if (!probeResp.ok) {
throw new Error(`Probe failed for ${owner}/${name}: ${probeResp.statusText}`);
}
const probeData: plugins.interfaces.ICommit[] = await probeResp.json();
// If no commits or no new commits since last run, skip
if (
probeData.length === 0 ||
new Date(probeData[0].commit.author.date).getTime() <=
new Date(effectiveSince).getTime()
) {
return { owner, name, commits: [] };
}
// 3b) Fetch commits since last run
const commits = await this.fetchRecentCommitsForRepo(
owner,
name,
effectiveSince
);
return { owner, name, commits };
} catch (e: any) {
console.error(`Failed to fetch commits for ${owner}/${name}:`, e.message);
return { owner, name, commits: [] };
}
})
);
const commitResults = await Promise.all(commitJobs);
// 4) build new commit entries with tagging, npm and changelog support
const newResults: plugins.interfaces.ICommitResult[] = [];
for (const { owner, name, commits } of commitResults) {
// skip repos with no new commits
if (commits.length === 0) {
this.changelogContent = '';
continue;
}
// load changelog for this repo
await this.loadChangelogFromRepo(owner, name);
// fetch tags for this repo
let taggedShas: Set<string>;
try {
taggedShas = await this.fetchTags(owner, name);
} catch (e: any) {
console.error(`Failed to fetch tags for ${owner}/${name}:`, e.message);
taggedShas = new Set<string>();
}
// fetch npm package info only if any new commits correspond to a tag
const hasTaggedCommit = commits.some((c) => taggedShas.has(c.sha));
let pkgInfo: { allVersions: Array<{ version: string }> } | null = null;
if (hasTaggedCommit && this.enableNpmCheck) {
try {
pkgInfo = await this.npmRegistry.getPackageInfo(`@${owner}/${name}`);
} catch (e: any) {
console.error(`Failed to fetch package info for ${owner}/${name}:`, e.message);
pkgInfo = null;
}
}
// build commit entries
for (const c of commits) {
const versionCandidate = c.commit.message.replace(/\n/g, '').trim();
const isTagged = taggedShas.has(c.sha);
const publishedOnNpm = isTagged && pkgInfo
? pkgInfo.allVersions.some((v) => v.version === versionCandidate)
: false;
let changelogEntry: string | undefined;
if (this.changelogContent) {
changelogEntry = this.getChangelogForVersion(versionCandidate);
}
newResults.push({
baseUrl: this.baseUrl,
org: owner,
repo: name,
timestamp: c.commit.author.date,
prettyAgoTime: plugins.smarttime.getMilliSecondsAsHumanReadableAgoTime(
new Date(c.commit.author.date).getTime()
),
hash: c.sha,
commitMessage: c.commit.message,
tagged: isTagged,
publishedOnNpm,
changelog: changelogEntry,
});
}
}
// if caching is enabled, merge into in-memory cache and return full cache
if (this.enableCache) {
const existingHashes = new Set(this.cache.map((c) => c.hash));
const uniqueNew = newResults.filter((c) => !existingHashes.has(c.hash));
this.cache.push(...uniqueNew);
// trim commits older than window
if (this.cacheWindowMs !== undefined) {
const cutoff = Date.now() - this.cacheWindowMs;
this.cache = this.cache.filter((c) => new Date(c.timestamp).getTime() >= cutoff);
}
// advance lastRunTimestamp to now
this.lastRunTimestamp = new Date().toISOString();
// sort descending by timestamp
this.cache.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
return this.cache;
}
// otherwise, return only newly fetched commits
return newResults;
}
/**
* Load the changelog directly from the Gitea repository.
*/
@ -27,7 +193,6 @@ export class CodeFeed {
}
const response = await this.fetchFunction(url, { headers });
if (!response.ok) {
console.error(
`Could not fetch CHANGELOG.md from ${owner}/${repo}: ${response.status} ${response.statusText}`
@ -43,8 +208,8 @@ export class CodeFeed {
return;
}
const decodedContent = Buffer.from(data.content, 'base64').toString('utf8');
this.changelogContent = decodedContent;
// decode base64 content
this.changelogContent = Buffer.from(data.content, 'base64').toString('utf8');
}
/**
@ -78,294 +243,77 @@ export class CodeFeed {
return changelogLines.join('\n').trim();
}
/**
* Fetch all tags for a given repo and return the set of tagged commit SHAs
*/
private async fetchTags(owner: string, repo: string): Promise<Set<string>> {
const taggedShas = new Set<string>();
let page = 1;
while (true) {
const url = `/api/v1/repos/${owner}/${repo}/tags?limit=50&page=${page}`;
const resp = await this.fetchFunction(url, {
headers: this.token ? { Authorization: `token ${this.token}` } : {},
});
if (!resp.ok) {
console.error(`Failed to fetch tags for ${owner}/${repo}: ${resp.status} ${resp.statusText}`);
return taggedShas;
}
const data: plugins.interfaces.ITag[] = await resp.json();
if (data.length === 0) break;
for (const t of data) {
if (t.commit?.sha) taggedShas.add(t.commit.sha);
}
if (data.length < 50) break;
page++;
}
return taggedShas;
}
private async fetchAllOrganizations(): Promise<string[]> {
const url = `/api/v1/orgs`;
const response = await this.fetchFunction(url, {
const resp = await this.fetchFunction('/api/v1/orgs', {
headers: this.token ? { Authorization: `token ${this.token}` } : {},
});
if (!response.ok) {
throw new Error(`Failed to fetch organizations: ${response.statusText}`);
if (!resp.ok) {
throw new Error(`Failed to fetch organizations: ${resp.statusText}`);
}
const data: { username: string }[] = await response.json();
return data.map((org) => org.username);
const data: { username: string }[] = await resp.json();
return data.map((o) => o.username);
}
private async fetchOrgRssFeed(optionsArg: {
orgName: string;
repoName?: string;
}): Promise<any[]> {
let rssUrl: string;
if (optionsArg.orgName && !optionsArg.repoName) {
rssUrl = `/${optionsArg.orgName}.atom`;
} else if (optionsArg.orgName && optionsArg.repoName) {
rssUrl = `/${optionsArg.orgName}/${optionsArg.repoName}.atom`;
} else {
throw new Error('Invalid arguments provided to fetchOrgRssFeed.');
}
const response = await this.fetchFunction(rssUrl, {});
if (!response.ok) {
throw new Error(
`Failed to fetch RSS feed for organization ${optionsArg.orgName}/${optionsArg.repoName}: ${response.statusText}`
);
}
const rssText = await response.text();
const rssData = this.smartxmlInstance.parseXmlToObject(rssText);
return rssData.feed.entry || [];
}
private async hasNewActivity(optionsArg: {
orgName: string;
repoName?: string;
}): Promise<boolean> {
const entries = await this.fetchOrgRssFeed(optionsArg);
return entries.some((entry: any) => {
const updated = new Date(entry.updated);
return updated > new Date(this.lastRunTimestamp);
private async fetchRepositoriesForOrg(org: string): Promise<plugins.interfaces.IRepository[]> {
const resp = await this.fetchFunction(`/api/v1/orgs/${org}/repos?limit=50`, {
headers: this.token ? { Authorization: `token ${this.token}` } : {},
});
}
private async fetchAllRepositories(): Promise<plugins.interfaces.IRepository[]> {
let page = 1;
const allRepos: plugins.interfaces.IRepository[] = [];
while (true) {
const url = `/api/v1/repos/search?limit=50&page=${page.toString()}`;
const resp = await this.fetchFunction(url, {
headers: this.token ? { Authorization: `token ${this.token}` } : {},
});
if (!resp.ok) {
throw new Error(`Failed to fetch repositories: ${resp.statusText}`);
}
const data: plugins.interfaces.IRepoSearchResponse = await resp.json();
allRepos.push(...data.data);
if (data.data.length < 50) {
break;
}
page++;
if (!resp.ok) {
throw new Error(`Failed to fetch repositories for ${org}: ${resp.statusText}`);
}
return allRepos;
}
private async fetchTags(owner: string, repo: string): Promise<Set<string>> {
let page = 1;
const tags: plugins.interfaces.ITag[] = [];
while (true) {
const url = `/api/v1/repos/${owner}/${repo}/tags?limit=50&page=${page.toString()}`;
const resp = await this.fetchFunction(url, {
headers: this.token ? { Authorization: `token ${this.token}` } : {},
});
if (!resp.ok) {
console.error(
`Failed to fetch tags for ${owner}/${repo}: ${resp.status} ${resp.statusText} at ${url}`
);
throw new Error(`Failed to fetch tags for ${owner}/${repo}: ${resp.statusText}`);
}
const data: plugins.interfaces.ITag[] = await resp.json();
tags.push(...data);
if (data.length < 50) {
break;
}
page++;
}
const taggedCommitShas = new Set<string>();
for (const t of tags) {
if (t.commit?.sha) {
taggedCommitShas.add(t.commit.sha);
}
}
return taggedCommitShas;
const data: plugins.interfaces.IRepository[] = await resp.json();
return data;
}
private async fetchRecentCommitsForRepo(
owner: string,
repo: string
repo: string,
sinceTimestamp?: string
): Promise<plugins.interfaces.ICommit[]> {
const commitTimeframe = new Date(Date.now() - (7 * 24 * 60 * 60 * 1000));
let page = 1;
const recentCommits: plugins.interfaces.ICommit[] = [];
while (true) {
const url = `/api/v1/repos/${owner}/${repo}/commits?limit=50&page=${page.toString()}`;
const resp = await this.fetchFunction(url, {
headers: this.token ? { Authorization: `token ${this.token}` } : {},
});
if (!resp.ok) {
console.error(
`Failed to fetch commits for ${owner}/${repo}: ${resp.status} ${resp.statusText} at ${url}`
);
throw new Error(`Failed to fetch commits for ${owner}/${repo}: ${resp.statusText}`);
}
const data: plugins.interfaces.ICommit[] = await resp.json();
if (data.length === 0) {
break;
}
for (const commit of data) {
const commitDate = new Date(commit.commit.author.date);
if (commitDate > commitTimeframe) {
recentCommits.push(commit);
} else {
return recentCommits;
}
}
page++;
const since = sinceTimestamp ?? this.lastRunTimestamp;
const resp = await this.fetchFunction(
`/api/v1/repos/${owner}/${repo}/commits?since=${encodeURIComponent(
since
)}&limit=50`,
{ headers: this.token ? { Authorization: `token ${this.token}` } : {} }
);
if (!resp.ok) {
throw new Error(`Failed to fetch commits for ${owner}/${repo}: ${resp.statusText}`);
}
return recentCommits;
const data: plugins.interfaces.ICommit[] = await resp.json();
return data;
}
public async fetchAllCommitsFromInstance(): Promise<plugins.interfaces.ICommitResult[]> {
const orgs = await this.fetchAllOrganizations();
console.log(`Found ${orgs.length} organizations`);
let allCommits: plugins.interfaces.ICommitResult[] = [];
for (const orgName of orgs) {
console.log(`Checking activity for organization: ${orgName}`);
try {
const hasActivity = await this.hasNewActivity({
orgName,
});
if (!hasActivity) {
console.log(`No new activity for organization: ${orgName}`);
continue;
}
} catch (error: any) {
console.error(`Error fetching activity for organization ${orgName}:`, error.message);
continue;
}
console.log(`New activity detected for organization: ${orgName}. Processing repositories...`);
const repos = await this.fetchAllRepositories();
for (const r of repos.filter((repo) => repo.owner.login === orgName)) {
try {
const hasActivity = await this.hasNewActivity({
orgName,
repoName: r.name,
});
if (!hasActivity) {
console.log(`No new activity for repository: ${orgName}/${r.name}`);
continue;
}
} catch (error: any) {
console.error(
`Error fetching activity for repository ${orgName}/${r.name}:`,
error.message
);
continue;
}
const org = r.owner.login;
const repo = r.name;
console.log(`Processing repository ${org}/${repo}`);
try {
const taggedCommitShas = await this.fetchTags(org, repo);
const commits = await this.fetchRecentCommitsForRepo(org, repo);
// Load the changelog from this repo.
await this.loadChangelogFromRepo(org, repo);
const commitResults = commits.map((c) => {
const commit: plugins.interfaces.ICommitResult = {
baseUrl: this.baseUrl,
org,
repo,
timestamp: c.commit.author.date,
prettyAgoTime: plugins.smarttime.getMilliSecondsAsHumanReadableAgoTime(
new Date(c.commit.author.date).getTime()
),
hash: c.sha,
commitMessage: c.commit.message,
tagged: taggedCommitShas.has(c.sha),
publishedOnNpm: false,
changelog: undefined,
};
return commit;
});
if (commitResults.length > 0) {
try {
const packageInfo = await this.npmRegistry.getPackageInfo(`@${org}/${repo}`);
for (const commitResult of commitResults.filter((c) => c.tagged)) {
const versionCandidate = commitResult.commitMessage.replace('\n', '').trim();
const correspondingVersion = packageInfo.allVersions.find((versionArg) => {
return versionArg.version === versionCandidate;
});
if (correspondingVersion) {
commitResult.publishedOnNpm = true;
}
}
} catch (error: any) {
console.error(`Failed to fetch package info for ${org}/${repo}:`, error.message);
}
try {
for (const commitResult of commitResults.filter((c) => c.tagged)) {
const versionCandidate = commitResult.commitMessage.replace('\n', '').trim();
const changelogEntry = this.getChangelogForVersion(versionCandidate);
if (changelogEntry) {
commitResult.changelog = changelogEntry;
}
}
} catch (error: any) {
console.error(`Failed to fetch changelog info for ${org}/${repo}:`, error.message);
}
}
allCommits.push(...commitResults);
} catch (error: any) {
console.error(`Skipping repository ${org}/${repo} due to error:`, error.message);
}
}
}
console.log(`Processed ${allCommits.length} commits in total.`);
allCommits = allCommits
.filter((commitArg) => commitArg.tagged)
.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
console.log(`Filtered to ${allCommits.length} commits with tagged statuses.`);
for (const c of allCommits) {
console.log(` ==========================================================================
${c.prettyAgoTime} ago:
${c.org}/${c.repo}
${c.commitMessage}
Published on npm: ${c.publishedOnNpm}
${c.changelog ? `Changelog:\n${c.changelog}\n` : ''}
`);
}
return allCommits;
public async fetchFunction(
urlArg: string,
optionsArg: RequestInit = {}
): Promise<Response> {
return fetch(`${this.baseUrl}${urlArg}`, optionsArg);
}
public async fetchFunction(urlArg: string, optionsArg: RequestInit): Promise<Response> {
const response = await fetch(`${this.baseUrl}${urlArg}`, optionsArg);
return response;
}
}
}

View File

@ -10,10 +10,12 @@ import * as qenv from '@push.rocks/qenv';
import * as smartnpm from '@push.rocks/smartnpm';
import * as smartxml from '@push.rocks/smartxml';
import * as smarttime from '@push.rocks/smarttime';
import * as lik from '@push.rocks/lik';
export {
qenv,
smartnpm,
smartxml,
smarttime,
lik,
}