feat(context): Add intelligent DiffProcessor to summarize and prioritize git diffs and integrate it into the commit context pipeline

This commit is contained in:
2025-11-04 02:19:57 +00:00
parent f84a65217d
commit 4bf0c02618
7 changed files with 698 additions and 13 deletions

View File

@@ -0,0 +1,304 @@
import { tap, expect } from '@git.zone/tstest/tapbundle';
import { DiffProcessor } from '../ts/context/diff-processor.js';
// Sample diff strings for testing
const createSmallDiff = (filepath: string, addedLines = 5, removedLines = 3): string => {
const lines: string[] = [];
lines.push(`--- a/${filepath}`);
lines.push(`+++ b/${filepath}`);
lines.push(`@@ -1,10 +1,12 @@`);
for (let i = 0; i < removedLines; i++) {
lines.push(`-removed line ${i + 1}`);
}
for (let i = 0; i < addedLines; i++) {
lines.push(`+added line ${i + 1}`);
}
lines.push(' unchanged line');
return lines.join('\n');
};
const createMediumDiff = (filepath: string): string => {
const lines: string[] = [];
lines.push(`--- a/${filepath}`);
lines.push(`+++ b/${filepath}`);
lines.push(`@@ -1,100 +1,150 @@`);
// 150 lines of changes
for (let i = 0; i < 75; i++) {
lines.push(`+added line ${i + 1}`);
}
for (let i = 0; i < 75; i++) {
lines.push(`-removed line ${i + 1}`);
}
return lines.join('\n');
};
const createLargeDiff = (filepath: string): string => {
const lines: string[] = [];
lines.push(`--- a/${filepath}`);
lines.push(`+++ b/${filepath}`);
lines.push(`@@ -1,1000 +1,1500 @@`);
// 2500 lines of changes
for (let i = 0; i < 1250; i++) {
lines.push(`+added line ${i + 1}`);
}
for (let i = 0; i < 1250; i++) {
lines.push(`-removed line ${i + 1}`);
}
return lines.join('\n');
};
const createDeletedFileDiff = (filepath: string): string => {
return `--- a/${filepath}
+++ /dev/null
@@ -1,5 +0,0 @@
-deleted line 1
-deleted line 2
-deleted line 3
-deleted line 4
-deleted line 5`;
};
const createAddedFileDiff = (filepath: string): string => {
return `--- /dev/null
+++ b/${filepath}
@@ -0,0 +1,5 @@
+added line 1
+added line 2
+added line 3
+added line 4
+added line 5`;
};
tap.test('DiffProcessor should parse small diff correctly', async () => {
const processor = new DiffProcessor();
const smallDiff = createSmallDiff('src/test.ts', 5, 3);
const result = processor.processDiffs([smallDiff]);
expect(result.totalFiles).toEqual(1);
expect(result.fullDiffs.length).toEqual(1);
expect(result.summarizedDiffs.length).toEqual(0);
expect(result.metadataOnly.length).toEqual(0);
expect(result.totalTokens).toBeGreaterThan(0);
});
tap.test('DiffProcessor should summarize medium diff', async () => {
const processor = new DiffProcessor();
const mediumDiff = createMediumDiff('src/medium-file.ts');
const result = processor.processDiffs([mediumDiff]);
expect(result.totalFiles).toEqual(1);
expect(result.fullDiffs.length).toEqual(0);
expect(result.summarizedDiffs.length).toEqual(1);
expect(result.metadataOnly.length).toEqual(0);
// Verify the summarized diff contains the sample
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('SUMMARIZED DIFFS');
expect(formatted).toInclude('lines omitted');
});
tap.test('DiffProcessor should handle large diff as metadata only', async () => {
const processor = new DiffProcessor();
const largeDiff = createLargeDiff('dist/bundle.js');
const result = processor.processDiffs([largeDiff]);
expect(result.totalFiles).toEqual(1);
expect(result.fullDiffs.length).toEqual(0);
expect(result.summarizedDiffs.length).toEqual(0);
expect(result.metadataOnly.length).toEqual(1);
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('METADATA ONLY');
expect(formatted).toInclude('dist/bundle.js');
});
tap.test('DiffProcessor should prioritize source files over build artifacts', async () => {
const processor = new DiffProcessor();
const diffs = [
createSmallDiff('dist/bundle.js'),
createSmallDiff('src/important.ts'),
createSmallDiff('build/output.js'),
createSmallDiff('src/core.ts'),
];
const result = processor.processDiffs(diffs);
expect(result.totalFiles).toEqual(4);
// Source files should be included fully first
const formatted = processor.formatForContext(result);
const srcImportantIndex = formatted.indexOf('src/important.ts');
const srcCoreIndex = formatted.indexOf('src/core.ts');
const distBundleIndex = formatted.indexOf('dist/bundle.js');
const buildOutputIndex = formatted.indexOf('build/output.js');
// Source files should appear before build artifacts
expect(srcImportantIndex).toBeLessThan(distBundleIndex);
expect(srcCoreIndex).toBeLessThan(buildOutputIndex);
});
tap.test('DiffProcessor should respect token budget', async () => {
const processor = new DiffProcessor({
maxDiffTokens: 500, // Very small budget to force metadata-only
});
// Create multiple large diffs that will exceed budget
const diffs = [
createLargeDiff('src/file1.ts'),
createLargeDiff('src/file2.ts'),
createLargeDiff('src/file3.ts'),
createLargeDiff('src/file4.ts'),
];
const result = processor.processDiffs(diffs);
expect(result.totalTokens).toBeLessThanOrEqual(500);
// With such a small budget and large files, most should be metadata only
expect(result.metadataOnly.length).toBeGreaterThanOrEqual(2);
});
tap.test('DiffProcessor should handle deleted files', async () => {
const processor = new DiffProcessor();
const deletedDiff = createDeletedFileDiff('src/old-file.ts');
const result = processor.processDiffs([deletedDiff]);
expect(result.totalFiles).toEqual(1);
// Small deleted file should be included fully
expect(result.fullDiffs.length).toEqual(1);
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('src/old-file.ts');
// Verify the file appears in the output
expect(formatted).toInclude('FULL DIFFS');
});
tap.test('DiffProcessor should handle added files', async () => {
const processor = new DiffProcessor();
const addedDiff = createAddedFileDiff('src/new-file.ts');
const result = processor.processDiffs([addedDiff]);
expect(result.totalFiles).toEqual(1);
// Small added file should be included fully
expect(result.fullDiffs.length).toEqual(1);
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('src/new-file.ts');
// Verify the file appears in the output
expect(formatted).toInclude('FULL DIFFS');
});
tap.test('DiffProcessor should handle mixed file sizes', async () => {
const processor = new DiffProcessor();
const diffs = [
createSmallDiff('src/small.ts'),
createMediumDiff('src/medium.ts'),
createLargeDiff('dist/large.js'),
];
const result = processor.processDiffs(diffs);
expect(result.totalFiles).toEqual(3);
expect(result.fullDiffs.length).toEqual(1); // small file
expect(result.summarizedDiffs.length).toEqual(1); // medium file
expect(result.metadataOnly.length).toEqual(1); // large file
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('FULL DIFFS (1 files)');
expect(formatted).toInclude('SUMMARIZED DIFFS (1 files)');
expect(formatted).toInclude('METADATA ONLY (1 files)');
});
tap.test('DiffProcessor should handle empty diff array', async () => {
const processor = new DiffProcessor();
const result = processor.processDiffs([]);
expect(result.totalFiles).toEqual(0);
expect(result.fullDiffs.length).toEqual(0);
expect(result.summarizedDiffs.length).toEqual(0);
expect(result.metadataOnly.length).toEqual(0);
expect(result.totalTokens).toEqual(0);
});
tap.test('DiffProcessor should generate comprehensive summary', async () => {
const processor = new DiffProcessor();
const diffs = [
createSmallDiff('src/file1.ts'),
createSmallDiff('src/file2.ts'),
createMediumDiff('src/file3.ts'),
createLargeDiff('dist/bundle.js'),
];
const result = processor.processDiffs(diffs);
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('GIT DIFF SUMMARY');
expect(formatted).toInclude('Files changed: 4 total');
expect(formatted).toInclude('included in full');
expect(formatted).toInclude('summarized');
expect(formatted).toInclude('metadata only');
expect(formatted).toInclude('Estimated tokens:');
expect(formatted).toInclude('END OF GIT DIFF');
});
tap.test('DiffProcessor should handle custom options', async () => {
const processor = new DiffProcessor({
maxDiffTokens: 50000,
smallFileLines: 30,
mediumFileLines: 150,
sampleHeadLines: 10,
sampleTailLines: 10,
});
const mediumDiff = createMediumDiff('src/file.ts'); // 150 lines
const result = processor.processDiffs([mediumDiff]);
// With custom settings, this should be summarized (exactly at the mediumFileLines threshold)
expect(result.summarizedDiffs.length).toEqual(1);
});
tap.test('DiffProcessor should prioritize test files appropriately', async () => {
const processor = new DiffProcessor();
const diffs = [
createSmallDiff('src/core.ts'),
createSmallDiff('test/core.test.ts'),
createSmallDiff('config.json'),
];
const result = processor.processDiffs(diffs);
const formatted = processor.formatForContext(result);
// Source files should come before test files
const srcIndex = formatted.indexOf('src/core.ts');
const testIndex = formatted.indexOf('test/core.test.ts');
expect(srcIndex).toBeLessThan(testIndex);
});
tap.test('DiffProcessor should handle files with no changes gracefully', async () => {
const processor = new DiffProcessor();
const emptyDiff = `--- a/src/file.ts
+++ b/src/file.ts
@@ -1,1 +1,1 @@`;
const result = processor.processDiffs([emptyDiff]);
expect(result.totalFiles).toEqual(1);
expect(result.fullDiffs.length).toEqual(1); // Still included as a small file
});
export default tap.start();