feat(context): Add intelligent DiffProcessor to summarize and prioritize git diffs and integrate it into the commit context pipeline

This commit is contained in:
2025-11-04 02:19:57 +00:00
parent f84a65217d
commit 4bf0c02618
7 changed files with 698 additions and 13 deletions

View File

@@ -1,6 +1,7 @@
import * as plugins from '../plugins.js';
import { AiDoc } from '../classes.aidoc.js';
import { ProjectContext } from './projectcontext.js';
import { DiffProcessor } from '../context/diff-processor.js';
export interface INextCommitObject {
recommendedNextVersionLevel: 'fix' | 'feat' | 'BREAKING CHANGE'; // the recommended next version level of the project
@@ -74,22 +75,43 @@ export class Commit {
// Pass glob patterns directly to smartgit - it handles matching internally
const diffStringArray = await gitRepo.getUncommittedDiff(excludePatterns);
// Diagnostic logging for diff statistics
// Process diffs intelligently using DiffProcessor
let processedDiffString: string;
if (diffStringArray.length > 0) {
// Diagnostic logging for raw diff statistics
const totalChars = diffStringArray.join('\n\n').length;
const estimatedTokens = Math.ceil(totalChars / 4);
console.log(`📊 Git diff statistics:`);
console.log(`📊 Raw git diff statistics:`);
console.log(` Files changed: ${diffStringArray.length}`);
console.log(` Total characters: ${totalChars.toLocaleString()}`);
console.log(` Estimated tokens: ${estimatedTokens.toLocaleString()}`);
console.log(` Exclusion patterns: ${excludePatterns.length}`);
// Use DiffProcessor to intelligently handle large diffs
const diffProcessor = new DiffProcessor({
maxDiffTokens: 100000, // Reserve 100k tokens for diffs
smallFileLines: 50, // Include files <= 50 lines fully
mediumFileLines: 200, // Summarize files <= 200 lines
sampleHeadLines: 20, // Show first 20 lines
sampleTailLines: 20, // Show last 20 lines
});
const processedDiff = diffProcessor.processDiffs(diffStringArray);
processedDiffString = diffProcessor.formatForContext(processedDiff);
console.log(`📝 Processed diff statistics:`);
console.log(` Full diffs: ${processedDiff.fullDiffs.length} files`);
console.log(` Summarized: ${processedDiff.summarizedDiffs.length} files`);
console.log(` Metadata only: ${processedDiff.metadataOnly.length} files`);
console.log(` Final tokens: ${processedDiff.totalTokens.toLocaleString()}`);
if (estimatedTokens > 50000) {
console.warn(`⚠️ WARNING: Unusually large diff (${estimatedTokens.toLocaleString()} tokens)`);
console.warn(` This may indicate build artifacts or large files in the diff.`);
console.warn(` Consider reviewing uncommitted changes or improving exclusion patterns.`);
console.log(`✅ DiffProcessor reduced token usage: ${estimatedTokens.toLocaleString()} ${processedDiff.totalTokens.toLocaleString()}`);
}
} else {
processedDiffString = 'No changes.';
}
// Use the new TaskContextFactory for optimized context
@@ -98,11 +120,9 @@ export class Commit {
this.aiDocsRef.openaiInstance
);
await taskContextFactory.initialize();
// Generate context specifically for commit task
const contextResult = await taskContextFactory.createContextForCommit(
diffStringArray[0] ? diffStringArray.join('\n\n') : 'No changes.'
);
const contextResult = await taskContextFactory.createContextForCommit(processedDiffString);
// Get the optimized context string
let contextString = contextResult.context;