feat(context): Add intelligent DiffProcessor to summarize and prioritize git diffs and integrate it into the commit context pipeline
This commit is contained in:
@@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@git.zone/tsdoc',
|
||||
version: '1.8.3',
|
||||
version: '1.9.0',
|
||||
description: 'A comprehensive TypeScript documentation tool that leverages AI to generate and enhance project documentation, including dynamic README creation, API docs via TypeDoc, and smart commit message generation.'
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import * as plugins from '../plugins.js';
|
||||
import { AiDoc } from '../classes.aidoc.js';
|
||||
import { ProjectContext } from './projectcontext.js';
|
||||
import { DiffProcessor } from '../context/diff-processor.js';
|
||||
|
||||
export interface INextCommitObject {
|
||||
recommendedNextVersionLevel: 'fix' | 'feat' | 'BREAKING CHANGE'; // the recommended next version level of the project
|
||||
@@ -74,22 +75,43 @@ export class Commit {
|
||||
// Pass glob patterns directly to smartgit - it handles matching internally
|
||||
const diffStringArray = await gitRepo.getUncommittedDiff(excludePatterns);
|
||||
|
||||
// Diagnostic logging for diff statistics
|
||||
// Process diffs intelligently using DiffProcessor
|
||||
let processedDiffString: string;
|
||||
|
||||
if (diffStringArray.length > 0) {
|
||||
// Diagnostic logging for raw diff statistics
|
||||
const totalChars = diffStringArray.join('\n\n').length;
|
||||
const estimatedTokens = Math.ceil(totalChars / 4);
|
||||
|
||||
console.log(`📊 Git diff statistics:`);
|
||||
console.log(`📊 Raw git diff statistics:`);
|
||||
console.log(` Files changed: ${diffStringArray.length}`);
|
||||
console.log(` Total characters: ${totalChars.toLocaleString()}`);
|
||||
console.log(` Estimated tokens: ${estimatedTokens.toLocaleString()}`);
|
||||
console.log(` Exclusion patterns: ${excludePatterns.length}`);
|
||||
|
||||
// Use DiffProcessor to intelligently handle large diffs
|
||||
const diffProcessor = new DiffProcessor({
|
||||
maxDiffTokens: 100000, // Reserve 100k tokens for diffs
|
||||
smallFileLines: 50, // Include files <= 50 lines fully
|
||||
mediumFileLines: 200, // Summarize files <= 200 lines
|
||||
sampleHeadLines: 20, // Show first 20 lines
|
||||
sampleTailLines: 20, // Show last 20 lines
|
||||
});
|
||||
|
||||
const processedDiff = diffProcessor.processDiffs(diffStringArray);
|
||||
processedDiffString = diffProcessor.formatForContext(processedDiff);
|
||||
|
||||
console.log(`📝 Processed diff statistics:`);
|
||||
console.log(` Full diffs: ${processedDiff.fullDiffs.length} files`);
|
||||
console.log(` Summarized: ${processedDiff.summarizedDiffs.length} files`);
|
||||
console.log(` Metadata only: ${processedDiff.metadataOnly.length} files`);
|
||||
console.log(` Final tokens: ${processedDiff.totalTokens.toLocaleString()}`);
|
||||
|
||||
if (estimatedTokens > 50000) {
|
||||
console.warn(`⚠️ WARNING: Unusually large diff (${estimatedTokens.toLocaleString()} tokens)`);
|
||||
console.warn(` This may indicate build artifacts or large files in the diff.`);
|
||||
console.warn(` Consider reviewing uncommitted changes or improving exclusion patterns.`);
|
||||
console.log(`✅ DiffProcessor reduced token usage: ${estimatedTokens.toLocaleString()} → ${processedDiff.totalTokens.toLocaleString()}`);
|
||||
}
|
||||
} else {
|
||||
processedDiffString = 'No changes.';
|
||||
}
|
||||
|
||||
// Use the new TaskContextFactory for optimized context
|
||||
@@ -98,11 +120,9 @@ export class Commit {
|
||||
this.aiDocsRef.openaiInstance
|
||||
);
|
||||
await taskContextFactory.initialize();
|
||||
|
||||
|
||||
// Generate context specifically for commit task
|
||||
const contextResult = await taskContextFactory.createContextForCommit(
|
||||
diffStringArray[0] ? diffStringArray.join('\n\n') : 'No changes.'
|
||||
);
|
||||
const contextResult = await taskContextFactory.createContextForCommit(processedDiffString);
|
||||
|
||||
// Get the optimized context string
|
||||
let contextString = contextResult.context;
|
||||
|
||||
341
ts/context/diff-processor.ts
Normal file
341
ts/context/diff-processor.ts
Normal file
@@ -0,0 +1,341 @@
|
||||
/**
|
||||
* Intelligent git diff processor that handles large diffs by sampling and prioritization
|
||||
* instead of blind truncation.
|
||||
*/
|
||||
|
||||
export interface IDiffFileInfo {
|
||||
filepath: string;
|
||||
status: 'added' | 'modified' | 'deleted';
|
||||
linesAdded: number;
|
||||
linesRemoved: number;
|
||||
totalLines: number;
|
||||
estimatedTokens: number;
|
||||
diffContent: string;
|
||||
}
|
||||
|
||||
export interface IProcessedDiff {
|
||||
summary: string; // Human-readable overview
|
||||
fullDiffs: string[]; // Small files included fully
|
||||
summarizedDiffs: string[]; // Medium files with head/tail
|
||||
metadataOnly: string[]; // Large files, just stats
|
||||
totalFiles: number;
|
||||
totalTokens: number;
|
||||
}
|
||||
|
||||
export interface IDiffProcessorOptions {
|
||||
maxDiffTokens?: number; // Maximum tokens for entire diff section (default: 100000)
|
||||
smallFileLines?: number; // Files <= this are included fully (default: 50)
|
||||
mediumFileLines?: number; // Files <= this are summarized (default: 200)
|
||||
sampleHeadLines?: number; // Lines to show at start of medium files (default: 20)
|
||||
sampleTailLines?: number; // Lines to show at end of medium files (default: 20)
|
||||
}
|
||||
|
||||
export class DiffProcessor {
|
||||
private options: Required<IDiffProcessorOptions>;
|
||||
|
||||
constructor(options: IDiffProcessorOptions = {}) {
|
||||
this.options = {
|
||||
maxDiffTokens: options.maxDiffTokens ?? 100000,
|
||||
smallFileLines: options.smallFileLines ?? 50,
|
||||
mediumFileLines: options.mediumFileLines ?? 200,
|
||||
sampleHeadLines: options.sampleHeadLines ?? 20,
|
||||
sampleTailLines: options.sampleTailLines ?? 20,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Process an array of git diffs into a structured, token-efficient format
|
||||
*/
|
||||
public processDiffs(diffStringArray: string[]): IProcessedDiff {
|
||||
// Parse all diffs into file info objects
|
||||
const fileInfos: IDiffFileInfo[] = diffStringArray
|
||||
.map(diffString => this.parseDiffFile(diffString))
|
||||
.filter(info => info !== null) as IDiffFileInfo[];
|
||||
|
||||
// Prioritize files (source files first, build artifacts last)
|
||||
const prioritized = this.prioritizeFiles(fileInfos);
|
||||
|
||||
const result: IProcessedDiff = {
|
||||
summary: '',
|
||||
fullDiffs: [],
|
||||
summarizedDiffs: [],
|
||||
metadataOnly: [],
|
||||
totalFiles: prioritized.length,
|
||||
totalTokens: 0,
|
||||
};
|
||||
|
||||
let tokensUsed = 0;
|
||||
const tokenBudget = this.options.maxDiffTokens;
|
||||
|
||||
// Categorize and include files based on size and token budget
|
||||
for (const fileInfo of prioritized) {
|
||||
const remainingBudget = tokenBudget - tokensUsed;
|
||||
|
||||
if (remainingBudget <= 0) {
|
||||
// Budget exhausted - rest are metadata only
|
||||
result.metadataOnly.push(this.formatMetadataOnly(fileInfo));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fileInfo.totalLines <= this.options.smallFileLines) {
|
||||
// Small file - include fully if budget allows
|
||||
if (fileInfo.estimatedTokens <= remainingBudget) {
|
||||
const statusPrefix = this.getFileStatusPrefix(fileInfo);
|
||||
result.fullDiffs.push(`${statusPrefix}${fileInfo.diffContent}`);
|
||||
tokensUsed += fileInfo.estimatedTokens;
|
||||
} else {
|
||||
result.metadataOnly.push(this.formatMetadataOnly(fileInfo));
|
||||
}
|
||||
} else if (fileInfo.totalLines <= this.options.mediumFileLines) {
|
||||
// Medium file - try to include summary with head/tail
|
||||
const summary = this.extractDiffSample(
|
||||
fileInfo,
|
||||
this.options.sampleHeadLines,
|
||||
this.options.sampleTailLines
|
||||
);
|
||||
const summaryTokens = Math.ceil(summary.length / 4); // Rough estimate
|
||||
|
||||
if (summaryTokens <= remainingBudget) {
|
||||
result.summarizedDiffs.push(summary);
|
||||
tokensUsed += summaryTokens;
|
||||
} else {
|
||||
result.metadataOnly.push(this.formatMetadataOnly(fileInfo));
|
||||
}
|
||||
} else {
|
||||
// Large file - metadata only
|
||||
result.metadataOnly.push(this.formatMetadataOnly(fileInfo));
|
||||
}
|
||||
}
|
||||
|
||||
result.totalTokens = tokensUsed;
|
||||
result.summary = this.generateSummary(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format the processed diff for inclusion in context
|
||||
*/
|
||||
public formatForContext(processed: IProcessedDiff): string {
|
||||
const sections: string[] = [];
|
||||
|
||||
// Summary section
|
||||
sections.push('====== GIT DIFF SUMMARY ======');
|
||||
sections.push(processed.summary);
|
||||
sections.push('');
|
||||
|
||||
// Full diffs section
|
||||
if (processed.fullDiffs.length > 0) {
|
||||
sections.push(`====== FULL DIFFS (${processed.fullDiffs.length} files) ======`);
|
||||
sections.push(processed.fullDiffs.join('\n\n'));
|
||||
sections.push('');
|
||||
}
|
||||
|
||||
// Summarized diffs section
|
||||
if (processed.summarizedDiffs.length > 0) {
|
||||
sections.push(`====== SUMMARIZED DIFFS (${processed.summarizedDiffs.length} files) ======`);
|
||||
sections.push(processed.summarizedDiffs.join('\n\n'));
|
||||
sections.push('');
|
||||
}
|
||||
|
||||
// Metadata only section
|
||||
if (processed.metadataOnly.length > 0) {
|
||||
sections.push(`====== METADATA ONLY (${processed.metadataOnly.length} files) ======`);
|
||||
sections.push(processed.metadataOnly.join('\n'));
|
||||
sections.push('');
|
||||
}
|
||||
|
||||
sections.push('====== END OF GIT DIFF ======');
|
||||
|
||||
return sections.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a single git diff string into file information
|
||||
*/
|
||||
private parseDiffFile(diffString: string): IDiffFileInfo | null {
|
||||
if (!diffString || diffString.trim().length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const lines = diffString.split('\n');
|
||||
let filepath = '';
|
||||
let status: 'added' | 'modified' | 'deleted' = 'modified';
|
||||
let linesAdded = 0;
|
||||
let linesRemoved = 0;
|
||||
|
||||
// Parse diff header to extract filepath and status
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('--- a/')) {
|
||||
filepath = line.substring(6);
|
||||
} else if (line.startsWith('+++ b/')) {
|
||||
const newPath = line.substring(6);
|
||||
if (newPath === '/dev/null') {
|
||||
status = 'deleted';
|
||||
} else if (filepath === '/dev/null') {
|
||||
status = 'added';
|
||||
filepath = newPath;
|
||||
} else {
|
||||
filepath = newPath;
|
||||
}
|
||||
} else if (line.startsWith('+') && !line.startsWith('+++')) {
|
||||
linesAdded++;
|
||||
} else if (line.startsWith('-') && !line.startsWith('---')) {
|
||||
linesRemoved++;
|
||||
}
|
||||
}
|
||||
|
||||
const totalLines = linesAdded + linesRemoved;
|
||||
const estimatedTokens = Math.ceil(diffString.length / 4);
|
||||
|
||||
return {
|
||||
filepath,
|
||||
status,
|
||||
linesAdded,
|
||||
linesRemoved,
|
||||
totalLines,
|
||||
estimatedTokens,
|
||||
diffContent: diffString,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Prioritize files by importance (source files before build artifacts)
|
||||
*/
|
||||
private prioritizeFiles(files: IDiffFileInfo[]): IDiffFileInfo[] {
|
||||
return files.sort((a, b) => {
|
||||
const scoreA = this.getFileImportanceScore(a.filepath);
|
||||
const scoreB = this.getFileImportanceScore(b.filepath);
|
||||
return scoreB - scoreA; // Higher score first
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate importance score for a file path
|
||||
*/
|
||||
private getFileImportanceScore(filepath: string): number {
|
||||
// Source files - highest priority
|
||||
if (filepath.match(/^(src|lib|app|components|pages|api)\//)) {
|
||||
return 100;
|
||||
}
|
||||
|
||||
// Test files - high priority
|
||||
if (filepath.match(/\.(test|spec)\.(ts|js|tsx|jsx)$/) || filepath.startsWith('test/')) {
|
||||
return 80;
|
||||
}
|
||||
|
||||
// Configuration files - medium-high priority
|
||||
if (filepath.match(/\.(json|yaml|yml|toml|config\.(ts|js))$/)) {
|
||||
return 60;
|
||||
}
|
||||
|
||||
// Documentation - medium priority
|
||||
if (filepath.match(/\.(md|txt|rst)$/)) {
|
||||
return 40;
|
||||
}
|
||||
|
||||
// Build artifacts - low priority
|
||||
if (filepath.match(/^(dist|build|out|\.next|public\/dist)\//)) {
|
||||
return 10;
|
||||
}
|
||||
|
||||
// Everything else - default priority
|
||||
return 50;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract head and tail lines from a diff, omitting the middle
|
||||
*/
|
||||
private extractDiffSample(fileInfo: IDiffFileInfo, headLines: number, tailLines: number): string {
|
||||
const lines = fileInfo.diffContent.split('\n');
|
||||
const totalLines = lines.length;
|
||||
|
||||
if (totalLines <= headLines + tailLines) {
|
||||
// File is small enough to include fully
|
||||
return fileInfo.diffContent;
|
||||
}
|
||||
|
||||
// Extract file metadata from diff header
|
||||
const headerLines: string[] = [];
|
||||
let bodyStartIndex = 0;
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
if (lines[i].startsWith('@@')) {
|
||||
headerLines.push(...lines.slice(0, i + 1));
|
||||
bodyStartIndex = i + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const bodyLines = lines.slice(bodyStartIndex);
|
||||
const head = bodyLines.slice(0, headLines);
|
||||
const tail = bodyLines.slice(-tailLines);
|
||||
const omittedLines = bodyLines.length - headLines - tailLines;
|
||||
|
||||
const statusEmoji = fileInfo.status === 'added' ? '➕' :
|
||||
fileInfo.status === 'deleted' ? '➖' : '📝';
|
||||
|
||||
const parts: string[] = [];
|
||||
parts.push(`${statusEmoji} FILE: ${fileInfo.filepath}`);
|
||||
parts.push(`CHANGES: +${fileInfo.linesAdded} lines, -${fileInfo.linesRemoved} lines (${fileInfo.totalLines} total)`);
|
||||
parts.push('');
|
||||
parts.push(...headerLines);
|
||||
parts.push(...head);
|
||||
parts.push('');
|
||||
parts.push(`[... ${omittedLines} lines omitted - use Read tool to see full file ...]`);
|
||||
parts.push('');
|
||||
parts.push(...tail);
|
||||
|
||||
return parts.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get file status prefix with emoji
|
||||
*/
|
||||
private getFileStatusPrefix(fileInfo: IDiffFileInfo): string {
|
||||
const statusEmoji = fileInfo.status === 'added' ? '➕' :
|
||||
fileInfo.status === 'deleted' ? '➖' : '📝';
|
||||
return `${statusEmoji} `;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract filepath from diff content
|
||||
*/
|
||||
private extractFilepathFromDiff(diffContent: string): string {
|
||||
const lines = diffContent.split('\n');
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('+++ b/')) {
|
||||
return line.substring(6);
|
||||
}
|
||||
}
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
/**
|
||||
* Format file info as metadata only
|
||||
*/
|
||||
private formatMetadataOnly(fileInfo: IDiffFileInfo): string {
|
||||
const statusEmoji = fileInfo.status === 'added' ? '➕' :
|
||||
fileInfo.status === 'deleted' ? '➖' : '📝';
|
||||
return `${statusEmoji} ${fileInfo.filepath} (+${fileInfo.linesAdded}, -${fileInfo.linesRemoved})`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate human-readable summary of processed diff
|
||||
*/
|
||||
private generateSummary(result: IProcessedDiff): string {
|
||||
const parts: string[] = [];
|
||||
parts.push(`Files changed: ${result.totalFiles} total`);
|
||||
parts.push(`- ${result.fullDiffs.length} included in full`);
|
||||
parts.push(`- ${result.summarizedDiffs.length} summarized (head/tail shown)`);
|
||||
parts.push(`- ${result.metadataOnly.length} metadata only`);
|
||||
parts.push(`Estimated tokens: ~${result.totalTokens.toLocaleString()}`);
|
||||
|
||||
if (result.metadataOnly.length > 0) {
|
||||
parts.push('');
|
||||
parts.push('NOTE: Some files excluded to stay within token budget.');
|
||||
parts.push('Use Read tool with specific file paths to see full content.');
|
||||
}
|
||||
|
||||
return parts.join('\n');
|
||||
}
|
||||
}
|
||||
@@ -5,6 +5,7 @@ import { ContextTrimmer } from './context-trimmer.js';
|
||||
import { LazyFileLoader } from './lazy-file-loader.js';
|
||||
import { ContextCache } from './context-cache.js';
|
||||
import { ContextAnalyzer } from './context-analyzer.js';
|
||||
import { DiffProcessor } from './diff-processor.js';
|
||||
import type {
|
||||
ContextMode,
|
||||
IContextConfig,
|
||||
@@ -24,7 +25,10 @@ import type {
|
||||
IFileAnalysis,
|
||||
IAnalysisResult,
|
||||
IIterativeConfig,
|
||||
IIterativeContextResult
|
||||
IIterativeContextResult,
|
||||
IDiffFileInfo,
|
||||
IProcessedDiff,
|
||||
IDiffProcessorOptions
|
||||
} from './types.js';
|
||||
|
||||
export {
|
||||
@@ -36,6 +40,7 @@ export {
|
||||
LazyFileLoader,
|
||||
ContextCache,
|
||||
ContextAnalyzer,
|
||||
DiffProcessor,
|
||||
};
|
||||
|
||||
// Types
|
||||
@@ -58,5 +63,8 @@ export type {
|
||||
IFileAnalysis,
|
||||
IAnalysisResult,
|
||||
IIterativeConfig,
|
||||
IIterativeContextResult
|
||||
IIterativeContextResult,
|
||||
IDiffFileInfo,
|
||||
IProcessedDiff,
|
||||
IDiffProcessorOptions
|
||||
};
|
||||
@@ -318,4 +318,7 @@ export interface IIterativeContextResult extends IContextResult {
|
||||
apiCallCount: number;
|
||||
/** Total duration in ms */
|
||||
totalDuration: number;
|
||||
}
|
||||
}
|
||||
|
||||
// Export DiffProcessor types
|
||||
export type { IDiffFileInfo, IProcessedDiff, IDiffProcessorOptions } from './diff-processor.js';
|
||||
Reference in New Issue
Block a user