Files
tsdoc/ts/aidocs_classes/commit.ts

300 lines
11 KiB
TypeScript

import * as plugins from '../plugins.js';
import { AiDoc } from '../classes.aidoc.js';
import { ProjectContext } from './projectcontext.js';
import { DiffProcessor } from '../classes.diffprocessor.js';
import { logger } from '../logging.js';
// Token budget configuration for OpenAI API limits
const TOKEN_BUDGET = {
OPENAI_CONTEXT_LIMIT: 272000, // OpenAI's configured limit
SAFETY_MARGIN: 10000, // Buffer to avoid hitting exact limit
SMARTAGENT_OVERHEAD: 180000, // System msgs, tools, history, formatting
TASK_PROMPT_OVERHEAD: 2000, // Task prompt template size
} as const;
/**
* Calculate max tokens available for diff content based on total budget
*/
function calculateMaxDiffTokens(): number {
const available = TOKEN_BUDGET.OPENAI_CONTEXT_LIMIT
- TOKEN_BUDGET.SAFETY_MARGIN
- TOKEN_BUDGET.SMARTAGENT_OVERHEAD
- TOKEN_BUDGET.TASK_PROMPT_OVERHEAD;
return Math.max(available, 30000);
}
export interface INextCommitObject {
recommendedNextVersionLevel: 'fix' | 'feat' | 'BREAKING CHANGE'; // the recommended next version level of the project
recommendedNextVersionScope: string; // the recommended scope name of the next version, like "core" or "cli", or specific class names.
recommendedNextVersionMessage: string; // the commit message. Don't put fix() feat() or BREAKING CHANGE in the message. Please just the message itself.
recommendedNextVersionDetails: string[]; // detailed bullet points for the changelog
recommendedNextVersion: string; // the recommended next version of the project, x.x.x
changelog?: string; // the changelog for the next version
}
export class Commit {
private aiDocsRef: AiDoc;
private projectDir: string;
constructor(aiDocsRef: AiDoc, projectDirArg: string) {
this.aiDocsRef = aiDocsRef;
this.projectDir = projectDirArg;
}
public async buildNextCommitObject(): Promise<INextCommitObject> {
const smartgitInstance = new plugins.smartgit.Smartgit();
await smartgitInstance.init();
const gitRepo = await plugins.smartgit.GitRepo.fromOpeningRepoDir(
smartgitInstance,
this.projectDir
);
// Define comprehensive exclusion patterns
// smartgit@3.3.0+ supports glob patterns natively
const excludePatterns = [
// Lock files
'pnpm-lock.yaml',
'package-lock.json',
'npm-shrinkwrap.json',
'yarn.lock',
'deno.lock',
'bun.lockb',
// Build artifacts (main culprit for large diffs!)
'dist/**',
'dist_*/**', // dist_ts, dist_web, etc.
'build/**',
'.next/**',
'out/**',
'public/dist/**',
// Compiled/bundled files
'**/*.js.map',
'**/*.d.ts.map',
'**/*.min.js',
'**/*.bundle.js',
'**/*.chunk.js',
// IDE/Editor directories
'.claude/**',
'.cursor/**',
'.vscode/**',
'.idea/**',
'**/*.swp',
'**/*.swo',
// Logs and caches
'.nogit/**',
'**/*.log',
'.cache/**',
'.rpt2_cache/**',
'coverage/**',
'.nyc_output/**',
];
// Pass glob patterns directly to smartgit - it handles matching internally
const diffStringArray = await gitRepo.getUncommittedDiff(excludePatterns);
// Process diffs intelligently using DiffProcessor
let processedDiffString: string;
if (diffStringArray.length > 0) {
// Diagnostic logging for raw diff statistics
const totalChars = diffStringArray.join('\n\n').length;
const estimatedTokens = Math.ceil(totalChars / 4);
console.log(`Raw git diff statistics:`);
console.log(` Files changed: ${diffStringArray.length}`);
console.log(` Total characters: ${totalChars.toLocaleString()}`);
console.log(` Estimated tokens: ${estimatedTokens.toLocaleString()}`);
console.log(` Exclusion patterns: ${excludePatterns.length}`);
// Calculate available tokens for diff based on total budget
const maxDiffTokens = calculateMaxDiffTokens();
console.log(`Token budget: ${maxDiffTokens.toLocaleString()} tokens for diff (limit: ${TOKEN_BUDGET.OPENAI_CONTEXT_LIMIT.toLocaleString()}, overhead: ${(TOKEN_BUDGET.SMARTAGENT_OVERHEAD + TOKEN_BUDGET.TASK_PROMPT_OVERHEAD).toLocaleString()})`);
// Use DiffProcessor to intelligently handle large diffs
const diffProcessor = new DiffProcessor({
maxDiffTokens, // Dynamic based on total budget
smallFileLines: 300, // Most source files are under 300 lines
mediumFileLines: 800, // Only very large files get head/tail treatment
sampleHeadLines: 75, // When sampling, show more context
sampleTailLines: 75, // When sampling, show more context
});
const processedDiff = diffProcessor.processDiffs(diffStringArray);
processedDiffString = diffProcessor.formatForContext(processedDiff);
console.log(`Processed diff statistics:`);
console.log(` Full diffs: ${processedDiff.fullDiffs.length} files`);
console.log(` Summarized: ${processedDiff.summarizedDiffs.length} files`);
console.log(` Metadata only: ${processedDiff.metadataOnly.length} files`);
console.log(` Final tokens: ${processedDiff.totalTokens.toLocaleString()}`);
if (estimatedTokens > 50000) {
console.log(`DiffProcessor reduced token usage: ${estimatedTokens.toLocaleString()} -> ${processedDiff.totalTokens.toLocaleString()}`);
}
// Validate total tokens won't exceed limit
const totalEstimatedTokens = processedDiff.totalTokens
+ TOKEN_BUDGET.SMARTAGENT_OVERHEAD
+ TOKEN_BUDGET.TASK_PROMPT_OVERHEAD;
if (totalEstimatedTokens > TOKEN_BUDGET.OPENAI_CONTEXT_LIMIT - TOKEN_BUDGET.SAFETY_MARGIN) {
console.log(`Warning: Estimated tokens (${totalEstimatedTokens.toLocaleString()}) approaching limit`);
console.log(` Consider splitting into smaller commits`);
}
} else {
processedDiffString = 'No changes.';
}
// Use runAgent for commit message generation with filesystem tool
const fsTools = plugins.smartagentTools.filesystemTool({ rootDir: this.projectDir });
const commitSystemPrompt = `
You create commit messages for git commits following semantic versioning conventions.
You have access to filesystem tools to explore the project if needed.
IMPORTANT RULES:
- Only READ files (package.json, source files) for context
- Do NOT write, delete, or modify any files
- Version level (fix/feat/BREAKING CHANGE) must match the scope of changes
- Commit message must be clear, professional, and follow conventional commit conventions
- Do NOT include personal information, licensing details, or AI mentions (Claude/Codex)
- JSON structure must be valid with all required fields
- Scope must accurately reflect the changed modules/files
`;
const commitTaskPrompt = `
Project directory: ${this.projectDir}
You have access to filesystem tools to explore the project if needed:
- Use list_directory to see project structure
- Use read_file to read package.json or source files for context
Analyze the git diff below to understand what changed and generate a commit message.
You should not include any licensing information or personal information.
Never mention CLAUDE code, or codex.
Your final response must be ONLY valid JSON - the raw JSON object, nothing else.
No explanations, no summaries, no markdown - just the JSON object that can be parsed with JSON.parse().
Here is the structure of the JSON you must return:
{
"recommendedNextVersionLevel": "fix" | "feat" | "BREAKING CHANGE",
"recommendedNextVersionScope": "string",
"recommendedNextVersionMessage": "string (ONLY the description body WITHOUT the type(scope): prefix - e.g. 'bump dependency to ^1.2.6' NOT 'fix(deps): bump dependency to ^1.2.6')",
"recommendedNextVersionDetails": ["string"],
"recommendedNextVersion": "x.x.x"
}
For recommendedNextVersionDetails, only add entries that have obvious value to the reader.
Here is the git diff showing what changed:
${processedDiffString}
Analyze these changes and output the JSON commit message object.
`;
logger.log('info', 'Starting commit message generation with agent...');
const commitResult = await plugins.smartagent.runAgent({
model: this.aiDocsRef.model,
prompt: commitTaskPrompt,
system: commitSystemPrompt,
tools: fsTools,
maxSteps: 10,
onToolCall: (toolName) => logger.log('info', `[Commit] Tool call: ${toolName}`),
});
// Extract JSON from result - handle cases where AI adds text around it
let jsonString = commitResult.text
.replace(/```json\n?/gi, '')
.replace(/```\n?/gi, '');
// Try to find JSON object in the result
const jsonMatch = jsonString.match(/\{[\s\S]*\}/);
if (!jsonMatch) {
throw new Error(`Could not find JSON object in result: ${jsonString.substring(0, 100)}...`);
}
jsonString = jsonMatch[0];
const resultObject: INextCommitObject = JSON.parse(jsonString);
const previousChangelogPath = plugins.path.join(this.projectDir, 'changelog.md');
let previousChangelog: plugins.smartfile.SmartFile;
if (await plugins.fsInstance.file(previousChangelogPath).exists()) {
previousChangelog = await plugins.smartfileFactory.fromFilePath(previousChangelogPath);
}
if (!previousChangelog) {
// lets build the changelog based on that
const commitMessages = await gitRepo.getAllCommitMessages();
console.log(JSON.stringify(commitMessages, null, 2));
const changelogSystemPrompt = `
You generate changelog.md files for software projects.
RULES:
- Changelog must follow proper markdown format with ## headers for each version
- Entries must be chronologically ordered (newest first)
- Version ranges for trivial commits should be properly summarized
- No duplicate or empty entries
- Format: ## yyyy-mm-dd - x.x.x - scope
`;
const changelogTaskPrompt = `
You are building a changelog.md file for the project.
Omit commits and versions that lack relevant changes, but make sure to mention them as a range with a summarizing message instead.
A changelog entry should look like this:
## yyyy-mm-dd - x.x.x - scope here
main description here
- detailed bullet points follow
You are given:
* the commit messages of the project
Only return the changelog file content, so it can be written directly to changelog.md.
Here are the commit messages:
${JSON.stringify(commitMessages, null, 2)}
`;
const changelogResult = await plugins.smartagent.runAgent({
model: this.aiDocsRef.model,
prompt: changelogTaskPrompt,
system: changelogSystemPrompt,
maxSteps: 1,
onToolCall: (toolName) => logger.log('info', `[Changelog] Tool call: ${toolName}`),
});
previousChangelog = plugins.smartfileFactory.fromString(
previousChangelogPath,
changelogResult.text.replaceAll('```markdown', '').replaceAll('```', ''),
'utf8'
);
}
let oldChangelog = previousChangelog.contents.toString().replace('# Changelog\n\n', '');
if (oldChangelog.startsWith('\n')) {
oldChangelog = oldChangelog.replace('\n', '');
}
let newDateString = new plugins.smarttime.ExtendedDate().exportToHyphedSortableDate();
let newChangelog = `# Changelog\n\n${`## ${newDateString} - {{nextVersion}} - {{nextVersionScope}}
{{nextVersionMessage}}
{{nextVersionDetails}}`}\n\n${oldChangelog}`;
resultObject.changelog = newChangelog;
return resultObject;
}
}