Compare commits

..

6 Commits

Author SHA1 Message Date
c24ce31b1f 1.9.2
Some checks failed
Default (tags) / security (push) Failing after 0s
Default (tags) / test (push) Failing after 0s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-11-04 03:43:27 +00:00
fec2017cc6 fix(deps): Update dependencies and devDependencies to newer versions (bump multiple packages) 2025-11-04 03:43:27 +00:00
88fac91c79 1.9.1
Some checks failed
Default (tags) / security (push) Failing after 0s
Default (tags) / test (push) Failing after 0s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-11-04 02:28:55 +00:00
ce4da89da9 fix(iterative-context-builder): Rely on DiffProcessor for git diff pre-processing; remove raw char truncation, raise diff token safety, and improve logging 2025-11-04 02:28:55 +00:00
6524adea18 1.9.0
Some checks failed
Default (tags) / security (push) Failing after 0s
Default (tags) / test (push) Failing after 0s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-11-04 02:19:57 +00:00
4bf0c02618 feat(context): Add intelligent DiffProcessor to summarize and prioritize git diffs and integrate it into the commit context pipeline 2025-11-04 02:19:57 +00:00
10 changed files with 2452 additions and 2435 deletions

View File

@@ -1,5 +1,30 @@
# Changelog # Changelog
## 2025-11-04 - 1.9.2 - fix(deps)
Update dependencies and devDependencies to newer versions (bump multiple packages)
- Bumped devDependencies: @git.zone/tsbuild 2.6.8 -> 2.7.1, @git.zone/tsrun 1.2.46 -> 1.6.2, @git.zone/tstest 2.3.6 -> 2.7.0
- Bumped runtime dependencies: @push.rocks/smartai 0.5.11 -> 0.8.0, @push.rocks/smartcli 4.0.11 -> 4.0.19, @push.rocks/smartgit 3.2.1 -> 3.3.1, @push.rocks/smartlog 3.1.9 -> 3.1.10, gpt-tokenizer 3.0.1 -> 3.2.0, typedoc 0.28.12 -> 0.28.14, typescript 5.9.2 -> 5.9.3
- No source code changes in this commit; dependency-only updates. Run the test suite and CI to verify compatibility.
## 2025-11-04 - 1.9.1 - fix(iterative-context-builder)
Rely on DiffProcessor for git diff pre-processing; remove raw char truncation, raise diff token safety, and improve logging
- Removed raw character-based truncation of additionalContext — diffs are expected to be pre-processed by DiffProcessor instead of blind substring truncation.
- Now validates pre-processed diff token count only and treats DiffProcessor as the primary sampler (DiffProcessor typically uses a ~100k token budget).
- Increased MAX_DIFF_TOKENS safety net to 200,000 to cover edge cases and avoid false positives; updated logs to reflect pre-processed diffs.
- Improved error messaging to indicate a likely DiffProcessor misconfiguration when pre-processed diffs exceed the safety limit.
- Updated informational logs to state that a pre-processed git diff was added to context.
## 2025-11-04 - 1.9.0 - feat(context)
Add intelligent DiffProcessor to summarize and prioritize git diffs and integrate it into the commit context pipeline
- Add DiffProcessor (ts/context/diff-processor.ts) to intelligently process git diffs: include small files fully, summarize medium files (head/tail sampling), and mark very large files as metadata-only to stay within token budgets.
- Integrate DiffProcessor into commit workflow (ts/aidocs_classes/commit.ts): preprocess raw diffs, emit processed diff statistics, and pass a token-efficient diff section into the TaskContextFactory for commit context generation.
- Export DiffProcessor and its types through the context index and types (ts/context/index.ts, ts/context/types.ts) so other context components can reuse it.
- Add comprehensive tests for the DiffProcessor behavior and integration (test/test.diffprocessor.node.ts) covering small/medium/large diffs, added/deleted files, prioritization, token budgets, and formatting for context.
- Minor adjustments across context/task factories and builders to accept and propagate processed diff strings rather than raw diffs, reducing risk of token overflows during iterative context building.
## 2025-11-04 - 1.8.3 - fix(context) ## 2025-11-04 - 1.8.3 - fix(context)
Prevent enormous git diffs and OOM during context building by adding exclusion patterns, truncation, and diagnostic logging Prevent enormous git diffs and OOM during context building by adding exclusion patterns, truncation, and diagnostic logging

View File

@@ -1,6 +1,6 @@
{ {
"name": "@git.zone/tsdoc", "name": "@git.zone/tsdoc",
"version": "1.8.3", "version": "1.9.2",
"private": false, "private": false,
"description": "A comprehensive TypeScript documentation tool that leverages AI to generate and enhance project documentation, including dynamic README creation, API docs via TypeDoc, and smart commit message generation.", "description": "A comprehensive TypeScript documentation tool that leverages AI to generate and enhance project documentation, including dynamic README creation, API docs via TypeDoc, and smart commit message generation.",
"type": "module", "type": "module",
@@ -19,9 +19,9 @@
"buildDocs": "tsdoc" "buildDocs": "tsdoc"
}, },
"devDependencies": { "devDependencies": {
"@git.zone/tsbuild": "^2.6.8", "@git.zone/tsbuild": "^2.7.1",
"@git.zone/tsrun": "^1.2.46", "@git.zone/tsrun": "^1.6.2",
"@git.zone/tstest": "^2.3.6", "@git.zone/tstest": "^2.7.0",
"@types/node": "^22.15.17" "@types/node": "^22.15.17"
}, },
"dependencies": { "dependencies": {
@@ -29,20 +29,20 @@
"@push.rocks/early": "^4.0.3", "@push.rocks/early": "^4.0.3",
"@push.rocks/npmextra": "^5.3.3", "@push.rocks/npmextra": "^5.3.3",
"@push.rocks/qenv": "^6.1.3", "@push.rocks/qenv": "^6.1.3",
"@push.rocks/smartai": "^0.5.11", "@push.rocks/smartai": "^0.8.0",
"@push.rocks/smartcli": "^4.0.11", "@push.rocks/smartcli": "^4.0.19",
"@push.rocks/smartdelay": "^3.0.5", "@push.rocks/smartdelay": "^3.0.5",
"@push.rocks/smartfile": "^11.2.7", "@push.rocks/smartfile": "^11.2.7",
"@push.rocks/smartgit": "^3.2.1", "@push.rocks/smartgit": "^3.3.1",
"@push.rocks/smartinteract": "^2.0.15", "@push.rocks/smartinteract": "^2.0.15",
"@push.rocks/smartlog": "^3.1.9", "@push.rocks/smartlog": "^3.1.10",
"@push.rocks/smartlog-destination-local": "^9.0.2", "@push.rocks/smartlog-destination-local": "^9.0.2",
"@push.rocks/smartpath": "^6.0.0", "@push.rocks/smartpath": "^6.0.0",
"@push.rocks/smartshell": "^3.3.0", "@push.rocks/smartshell": "^3.3.0",
"@push.rocks/smarttime": "^4.0.6", "@push.rocks/smarttime": "^4.0.6",
"gpt-tokenizer": "^3.0.1", "gpt-tokenizer": "^3.2.0",
"typedoc": "^0.28.12", "typedoc": "^0.28.14",
"typescript": "^5.9.2" "typescript": "^5.9.3"
}, },
"files": [ "files": [
"ts/**/*", "ts/**/*",

4107
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,304 @@
import { tap, expect } from '@git.zone/tstest/tapbundle';
import { DiffProcessor } from '../ts/context/diff-processor.js';
// Sample diff strings for testing
const createSmallDiff = (filepath: string, addedLines = 5, removedLines = 3): string => {
const lines: string[] = [];
lines.push(`--- a/${filepath}`);
lines.push(`+++ b/${filepath}`);
lines.push(`@@ -1,10 +1,12 @@`);
for (let i = 0; i < removedLines; i++) {
lines.push(`-removed line ${i + 1}`);
}
for (let i = 0; i < addedLines; i++) {
lines.push(`+added line ${i + 1}`);
}
lines.push(' unchanged line');
return lines.join('\n');
};
const createMediumDiff = (filepath: string): string => {
const lines: string[] = [];
lines.push(`--- a/${filepath}`);
lines.push(`+++ b/${filepath}`);
lines.push(`@@ -1,100 +1,150 @@`);
// 150 lines of changes
for (let i = 0; i < 75; i++) {
lines.push(`+added line ${i + 1}`);
}
for (let i = 0; i < 75; i++) {
lines.push(`-removed line ${i + 1}`);
}
return lines.join('\n');
};
const createLargeDiff = (filepath: string): string => {
const lines: string[] = [];
lines.push(`--- a/${filepath}`);
lines.push(`+++ b/${filepath}`);
lines.push(`@@ -1,1000 +1,1500 @@`);
// 2500 lines of changes
for (let i = 0; i < 1250; i++) {
lines.push(`+added line ${i + 1}`);
}
for (let i = 0; i < 1250; i++) {
lines.push(`-removed line ${i + 1}`);
}
return lines.join('\n');
};
const createDeletedFileDiff = (filepath: string): string => {
return `--- a/${filepath}
+++ /dev/null
@@ -1,5 +0,0 @@
-deleted line 1
-deleted line 2
-deleted line 3
-deleted line 4
-deleted line 5`;
};
const createAddedFileDiff = (filepath: string): string => {
return `--- /dev/null
+++ b/${filepath}
@@ -0,0 +1,5 @@
+added line 1
+added line 2
+added line 3
+added line 4
+added line 5`;
};
tap.test('DiffProcessor should parse small diff correctly', async () => {
const processor = new DiffProcessor();
const smallDiff = createSmallDiff('src/test.ts', 5, 3);
const result = processor.processDiffs([smallDiff]);
expect(result.totalFiles).toEqual(1);
expect(result.fullDiffs.length).toEqual(1);
expect(result.summarizedDiffs.length).toEqual(0);
expect(result.metadataOnly.length).toEqual(0);
expect(result.totalTokens).toBeGreaterThan(0);
});
tap.test('DiffProcessor should summarize medium diff', async () => {
const processor = new DiffProcessor();
const mediumDiff = createMediumDiff('src/medium-file.ts');
const result = processor.processDiffs([mediumDiff]);
expect(result.totalFiles).toEqual(1);
expect(result.fullDiffs.length).toEqual(0);
expect(result.summarizedDiffs.length).toEqual(1);
expect(result.metadataOnly.length).toEqual(0);
// Verify the summarized diff contains the sample
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('SUMMARIZED DIFFS');
expect(formatted).toInclude('lines omitted');
});
tap.test('DiffProcessor should handle large diff as metadata only', async () => {
const processor = new DiffProcessor();
const largeDiff = createLargeDiff('dist/bundle.js');
const result = processor.processDiffs([largeDiff]);
expect(result.totalFiles).toEqual(1);
expect(result.fullDiffs.length).toEqual(0);
expect(result.summarizedDiffs.length).toEqual(0);
expect(result.metadataOnly.length).toEqual(1);
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('METADATA ONLY');
expect(formatted).toInclude('dist/bundle.js');
});
tap.test('DiffProcessor should prioritize source files over build artifacts', async () => {
const processor = new DiffProcessor();
const diffs = [
createSmallDiff('dist/bundle.js'),
createSmallDiff('src/important.ts'),
createSmallDiff('build/output.js'),
createSmallDiff('src/core.ts'),
];
const result = processor.processDiffs(diffs);
expect(result.totalFiles).toEqual(4);
// Source files should be included fully first
const formatted = processor.formatForContext(result);
const srcImportantIndex = formatted.indexOf('src/important.ts');
const srcCoreIndex = formatted.indexOf('src/core.ts');
const distBundleIndex = formatted.indexOf('dist/bundle.js');
const buildOutputIndex = formatted.indexOf('build/output.js');
// Source files should appear before build artifacts
expect(srcImportantIndex).toBeLessThan(distBundleIndex);
expect(srcCoreIndex).toBeLessThan(buildOutputIndex);
});
tap.test('DiffProcessor should respect token budget', async () => {
const processor = new DiffProcessor({
maxDiffTokens: 500, // Very small budget to force metadata-only
});
// Create multiple large diffs that will exceed budget
const diffs = [
createLargeDiff('src/file1.ts'),
createLargeDiff('src/file2.ts'),
createLargeDiff('src/file3.ts'),
createLargeDiff('src/file4.ts'),
];
const result = processor.processDiffs(diffs);
expect(result.totalTokens).toBeLessThanOrEqual(500);
// With such a small budget and large files, most should be metadata only
expect(result.metadataOnly.length).toBeGreaterThanOrEqual(2);
});
tap.test('DiffProcessor should handle deleted files', async () => {
const processor = new DiffProcessor();
const deletedDiff = createDeletedFileDiff('src/old-file.ts');
const result = processor.processDiffs([deletedDiff]);
expect(result.totalFiles).toEqual(1);
// Small deleted file should be included fully
expect(result.fullDiffs.length).toEqual(1);
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('src/old-file.ts');
// Verify the file appears in the output
expect(formatted).toInclude('FULL DIFFS');
});
tap.test('DiffProcessor should handle added files', async () => {
const processor = new DiffProcessor();
const addedDiff = createAddedFileDiff('src/new-file.ts');
const result = processor.processDiffs([addedDiff]);
expect(result.totalFiles).toEqual(1);
// Small added file should be included fully
expect(result.fullDiffs.length).toEqual(1);
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('src/new-file.ts');
// Verify the file appears in the output
expect(formatted).toInclude('FULL DIFFS');
});
tap.test('DiffProcessor should handle mixed file sizes', async () => {
const processor = new DiffProcessor();
const diffs = [
createSmallDiff('src/small.ts'),
createMediumDiff('src/medium.ts'),
createLargeDiff('dist/large.js'),
];
const result = processor.processDiffs(diffs);
expect(result.totalFiles).toEqual(3);
expect(result.fullDiffs.length).toEqual(1); // small file
expect(result.summarizedDiffs.length).toEqual(1); // medium file
expect(result.metadataOnly.length).toEqual(1); // large file
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('FULL DIFFS (1 files)');
expect(formatted).toInclude('SUMMARIZED DIFFS (1 files)');
expect(formatted).toInclude('METADATA ONLY (1 files)');
});
tap.test('DiffProcessor should handle empty diff array', async () => {
const processor = new DiffProcessor();
const result = processor.processDiffs([]);
expect(result.totalFiles).toEqual(0);
expect(result.fullDiffs.length).toEqual(0);
expect(result.summarizedDiffs.length).toEqual(0);
expect(result.metadataOnly.length).toEqual(0);
expect(result.totalTokens).toEqual(0);
});
tap.test('DiffProcessor should generate comprehensive summary', async () => {
const processor = new DiffProcessor();
const diffs = [
createSmallDiff('src/file1.ts'),
createSmallDiff('src/file2.ts'),
createMediumDiff('src/file3.ts'),
createLargeDiff('dist/bundle.js'),
];
const result = processor.processDiffs(diffs);
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('GIT DIFF SUMMARY');
expect(formatted).toInclude('Files changed: 4 total');
expect(formatted).toInclude('included in full');
expect(formatted).toInclude('summarized');
expect(formatted).toInclude('metadata only');
expect(formatted).toInclude('Estimated tokens:');
expect(formatted).toInclude('END OF GIT DIFF');
});
tap.test('DiffProcessor should handle custom options', async () => {
const processor = new DiffProcessor({
maxDiffTokens: 50000,
smallFileLines: 30,
mediumFileLines: 150,
sampleHeadLines: 10,
sampleTailLines: 10,
});
const mediumDiff = createMediumDiff('src/file.ts'); // 150 lines
const result = processor.processDiffs([mediumDiff]);
// With custom settings, this should be summarized (exactly at the mediumFileLines threshold)
expect(result.summarizedDiffs.length).toEqual(1);
});
tap.test('DiffProcessor should prioritize test files appropriately', async () => {
const processor = new DiffProcessor();
const diffs = [
createSmallDiff('src/core.ts'),
createSmallDiff('test/core.test.ts'),
createSmallDiff('config.json'),
];
const result = processor.processDiffs(diffs);
const formatted = processor.formatForContext(result);
// Source files should come before test files
const srcIndex = formatted.indexOf('src/core.ts');
const testIndex = formatted.indexOf('test/core.test.ts');
expect(srcIndex).toBeLessThan(testIndex);
});
tap.test('DiffProcessor should handle files with no changes gracefully', async () => {
const processor = new DiffProcessor();
const emptyDiff = `--- a/src/file.ts
+++ b/src/file.ts
@@ -1,1 +1,1 @@`;
const result = processor.processDiffs([emptyDiff]);
expect(result.totalFiles).toEqual(1);
expect(result.fullDiffs.length).toEqual(1); // Still included as a small file
});
export default tap.start();

View File

@@ -3,6 +3,6 @@
*/ */
export const commitinfo = { export const commitinfo = {
name: '@git.zone/tsdoc', name: '@git.zone/tsdoc',
version: '1.8.3', version: '1.9.2',
description: 'A comprehensive TypeScript documentation tool that leverages AI to generate and enhance project documentation, including dynamic README creation, API docs via TypeDoc, and smart commit message generation.' description: 'A comprehensive TypeScript documentation tool that leverages AI to generate and enhance project documentation, including dynamic README creation, API docs via TypeDoc, and smart commit message generation.'
} }

View File

@@ -1,6 +1,7 @@
import * as plugins from '../plugins.js'; import * as plugins from '../plugins.js';
import { AiDoc } from '../classes.aidoc.js'; import { AiDoc } from '../classes.aidoc.js';
import { ProjectContext } from './projectcontext.js'; import { ProjectContext } from './projectcontext.js';
import { DiffProcessor } from '../context/diff-processor.js';
export interface INextCommitObject { export interface INextCommitObject {
recommendedNextVersionLevel: 'fix' | 'feat' | 'BREAKING CHANGE'; // the recommended next version level of the project recommendedNextVersionLevel: 'fix' | 'feat' | 'BREAKING CHANGE'; // the recommended next version level of the project
@@ -74,22 +75,43 @@ export class Commit {
// Pass glob patterns directly to smartgit - it handles matching internally // Pass glob patterns directly to smartgit - it handles matching internally
const diffStringArray = await gitRepo.getUncommittedDiff(excludePatterns); const diffStringArray = await gitRepo.getUncommittedDiff(excludePatterns);
// Diagnostic logging for diff statistics // Process diffs intelligently using DiffProcessor
let processedDiffString: string;
if (diffStringArray.length > 0) { if (diffStringArray.length > 0) {
// Diagnostic logging for raw diff statistics
const totalChars = diffStringArray.join('\n\n').length; const totalChars = diffStringArray.join('\n\n').length;
const estimatedTokens = Math.ceil(totalChars / 4); const estimatedTokens = Math.ceil(totalChars / 4);
console.log(`📊 Git diff statistics:`); console.log(`📊 Raw git diff statistics:`);
console.log(` Files changed: ${diffStringArray.length}`); console.log(` Files changed: ${diffStringArray.length}`);
console.log(` Total characters: ${totalChars.toLocaleString()}`); console.log(` Total characters: ${totalChars.toLocaleString()}`);
console.log(` Estimated tokens: ${estimatedTokens.toLocaleString()}`); console.log(` Estimated tokens: ${estimatedTokens.toLocaleString()}`);
console.log(` Exclusion patterns: ${excludePatterns.length}`); console.log(` Exclusion patterns: ${excludePatterns.length}`);
// Use DiffProcessor to intelligently handle large diffs
const diffProcessor = new DiffProcessor({
maxDiffTokens: 100000, // Reserve 100k tokens for diffs
smallFileLines: 50, // Include files <= 50 lines fully
mediumFileLines: 200, // Summarize files <= 200 lines
sampleHeadLines: 20, // Show first 20 lines
sampleTailLines: 20, // Show last 20 lines
});
const processedDiff = diffProcessor.processDiffs(diffStringArray);
processedDiffString = diffProcessor.formatForContext(processedDiff);
console.log(`📝 Processed diff statistics:`);
console.log(` Full diffs: ${processedDiff.fullDiffs.length} files`);
console.log(` Summarized: ${processedDiff.summarizedDiffs.length} files`);
console.log(` Metadata only: ${processedDiff.metadataOnly.length} files`);
console.log(` Final tokens: ${processedDiff.totalTokens.toLocaleString()}`);
if (estimatedTokens > 50000) { if (estimatedTokens > 50000) {
console.warn(`⚠️ WARNING: Unusually large diff (${estimatedTokens.toLocaleString()} tokens)`); console.log(`✅ DiffProcessor reduced token usage: ${estimatedTokens.toLocaleString()} ${processedDiff.totalTokens.toLocaleString()}`);
console.warn(` This may indicate build artifacts or large files in the diff.`);
console.warn(` Consider reviewing uncommitted changes or improving exclusion patterns.`);
} }
} else {
processedDiffString = 'No changes.';
} }
// Use the new TaskContextFactory for optimized context // Use the new TaskContextFactory for optimized context
@@ -100,9 +122,7 @@ export class Commit {
await taskContextFactory.initialize(); await taskContextFactory.initialize();
// Generate context specifically for commit task // Generate context specifically for commit task
const contextResult = await taskContextFactory.createContextForCommit( const contextResult = await taskContextFactory.createContextForCommit(processedDiffString);
diffStringArray[0] ? diffStringArray.join('\n\n') : 'No changes.'
);
// Get the optimized context string // Get the optimized context string
let contextString = contextResult.context; let contextString = contextResult.context;

View File

@@ -0,0 +1,341 @@
/**
* Intelligent git diff processor that handles large diffs by sampling and prioritization
* instead of blind truncation.
*/
export interface IDiffFileInfo {
filepath: string;
status: 'added' | 'modified' | 'deleted';
linesAdded: number;
linesRemoved: number;
totalLines: number;
estimatedTokens: number;
diffContent: string;
}
export interface IProcessedDiff {
summary: string; // Human-readable overview
fullDiffs: string[]; // Small files included fully
summarizedDiffs: string[]; // Medium files with head/tail
metadataOnly: string[]; // Large files, just stats
totalFiles: number;
totalTokens: number;
}
export interface IDiffProcessorOptions {
maxDiffTokens?: number; // Maximum tokens for entire diff section (default: 100000)
smallFileLines?: number; // Files <= this are included fully (default: 50)
mediumFileLines?: number; // Files <= this are summarized (default: 200)
sampleHeadLines?: number; // Lines to show at start of medium files (default: 20)
sampleTailLines?: number; // Lines to show at end of medium files (default: 20)
}
export class DiffProcessor {
private options: Required<IDiffProcessorOptions>;
constructor(options: IDiffProcessorOptions = {}) {
this.options = {
maxDiffTokens: options.maxDiffTokens ?? 100000,
smallFileLines: options.smallFileLines ?? 50,
mediumFileLines: options.mediumFileLines ?? 200,
sampleHeadLines: options.sampleHeadLines ?? 20,
sampleTailLines: options.sampleTailLines ?? 20,
};
}
/**
* Process an array of git diffs into a structured, token-efficient format
*/
public processDiffs(diffStringArray: string[]): IProcessedDiff {
// Parse all diffs into file info objects
const fileInfos: IDiffFileInfo[] = diffStringArray
.map(diffString => this.parseDiffFile(diffString))
.filter(info => info !== null) as IDiffFileInfo[];
// Prioritize files (source files first, build artifacts last)
const prioritized = this.prioritizeFiles(fileInfos);
const result: IProcessedDiff = {
summary: '',
fullDiffs: [],
summarizedDiffs: [],
metadataOnly: [],
totalFiles: prioritized.length,
totalTokens: 0,
};
let tokensUsed = 0;
const tokenBudget = this.options.maxDiffTokens;
// Categorize and include files based on size and token budget
for (const fileInfo of prioritized) {
const remainingBudget = tokenBudget - tokensUsed;
if (remainingBudget <= 0) {
// Budget exhausted - rest are metadata only
result.metadataOnly.push(this.formatMetadataOnly(fileInfo));
continue;
}
if (fileInfo.totalLines <= this.options.smallFileLines) {
// Small file - include fully if budget allows
if (fileInfo.estimatedTokens <= remainingBudget) {
const statusPrefix = this.getFileStatusPrefix(fileInfo);
result.fullDiffs.push(`${statusPrefix}${fileInfo.diffContent}`);
tokensUsed += fileInfo.estimatedTokens;
} else {
result.metadataOnly.push(this.formatMetadataOnly(fileInfo));
}
} else if (fileInfo.totalLines <= this.options.mediumFileLines) {
// Medium file - try to include summary with head/tail
const summary = this.extractDiffSample(
fileInfo,
this.options.sampleHeadLines,
this.options.sampleTailLines
);
const summaryTokens = Math.ceil(summary.length / 4); // Rough estimate
if (summaryTokens <= remainingBudget) {
result.summarizedDiffs.push(summary);
tokensUsed += summaryTokens;
} else {
result.metadataOnly.push(this.formatMetadataOnly(fileInfo));
}
} else {
// Large file - metadata only
result.metadataOnly.push(this.formatMetadataOnly(fileInfo));
}
}
result.totalTokens = tokensUsed;
result.summary = this.generateSummary(result);
return result;
}
/**
* Format the processed diff for inclusion in context
*/
public formatForContext(processed: IProcessedDiff): string {
const sections: string[] = [];
// Summary section
sections.push('====== GIT DIFF SUMMARY ======');
sections.push(processed.summary);
sections.push('');
// Full diffs section
if (processed.fullDiffs.length > 0) {
sections.push(`====== FULL DIFFS (${processed.fullDiffs.length} files) ======`);
sections.push(processed.fullDiffs.join('\n\n'));
sections.push('');
}
// Summarized diffs section
if (processed.summarizedDiffs.length > 0) {
sections.push(`====== SUMMARIZED DIFFS (${processed.summarizedDiffs.length} files) ======`);
sections.push(processed.summarizedDiffs.join('\n\n'));
sections.push('');
}
// Metadata only section
if (processed.metadataOnly.length > 0) {
sections.push(`====== METADATA ONLY (${processed.metadataOnly.length} files) ======`);
sections.push(processed.metadataOnly.join('\n'));
sections.push('');
}
sections.push('====== END OF GIT DIFF ======');
return sections.join('\n');
}
/**
* Parse a single git diff string into file information
*/
private parseDiffFile(diffString: string): IDiffFileInfo | null {
if (!diffString || diffString.trim().length === 0) {
return null;
}
const lines = diffString.split('\n');
let filepath = '';
let status: 'added' | 'modified' | 'deleted' = 'modified';
let linesAdded = 0;
let linesRemoved = 0;
// Parse diff header to extract filepath and status
for (const line of lines) {
if (line.startsWith('--- a/')) {
filepath = line.substring(6);
} else if (line.startsWith('+++ b/')) {
const newPath = line.substring(6);
if (newPath === '/dev/null') {
status = 'deleted';
} else if (filepath === '/dev/null') {
status = 'added';
filepath = newPath;
} else {
filepath = newPath;
}
} else if (line.startsWith('+') && !line.startsWith('+++')) {
linesAdded++;
} else if (line.startsWith('-') && !line.startsWith('---')) {
linesRemoved++;
}
}
const totalLines = linesAdded + linesRemoved;
const estimatedTokens = Math.ceil(diffString.length / 4);
return {
filepath,
status,
linesAdded,
linesRemoved,
totalLines,
estimatedTokens,
diffContent: diffString,
};
}
/**
* Prioritize files by importance (source files before build artifacts)
*/
private prioritizeFiles(files: IDiffFileInfo[]): IDiffFileInfo[] {
return files.sort((a, b) => {
const scoreA = this.getFileImportanceScore(a.filepath);
const scoreB = this.getFileImportanceScore(b.filepath);
return scoreB - scoreA; // Higher score first
});
}
/**
* Calculate importance score for a file path
*/
private getFileImportanceScore(filepath: string): number {
// Source files - highest priority
if (filepath.match(/^(src|lib|app|components|pages|api)\//)) {
return 100;
}
// Test files - high priority
if (filepath.match(/\.(test|spec)\.(ts|js|tsx|jsx)$/) || filepath.startsWith('test/')) {
return 80;
}
// Configuration files - medium-high priority
if (filepath.match(/\.(json|yaml|yml|toml|config\.(ts|js))$/)) {
return 60;
}
// Documentation - medium priority
if (filepath.match(/\.(md|txt|rst)$/)) {
return 40;
}
// Build artifacts - low priority
if (filepath.match(/^(dist|build|out|\.next|public\/dist)\//)) {
return 10;
}
// Everything else - default priority
return 50;
}
/**
* Extract head and tail lines from a diff, omitting the middle
*/
private extractDiffSample(fileInfo: IDiffFileInfo, headLines: number, tailLines: number): string {
const lines = fileInfo.diffContent.split('\n');
const totalLines = lines.length;
if (totalLines <= headLines + tailLines) {
// File is small enough to include fully
return fileInfo.diffContent;
}
// Extract file metadata from diff header
const headerLines: string[] = [];
let bodyStartIndex = 0;
for (let i = 0; i < lines.length; i++) {
if (lines[i].startsWith('@@')) {
headerLines.push(...lines.slice(0, i + 1));
bodyStartIndex = i + 1;
break;
}
}
const bodyLines = lines.slice(bodyStartIndex);
const head = bodyLines.slice(0, headLines);
const tail = bodyLines.slice(-tailLines);
const omittedLines = bodyLines.length - headLines - tailLines;
const statusEmoji = fileInfo.status === 'added' ? '' :
fileInfo.status === 'deleted' ? '' : '📝';
const parts: string[] = [];
parts.push(`${statusEmoji} FILE: ${fileInfo.filepath}`);
parts.push(`CHANGES: +${fileInfo.linesAdded} lines, -${fileInfo.linesRemoved} lines (${fileInfo.totalLines} total)`);
parts.push('');
parts.push(...headerLines);
parts.push(...head);
parts.push('');
parts.push(`[... ${omittedLines} lines omitted - use Read tool to see full file ...]`);
parts.push('');
parts.push(...tail);
return parts.join('\n');
}
/**
* Get file status prefix with emoji
*/
private getFileStatusPrefix(fileInfo: IDiffFileInfo): string {
const statusEmoji = fileInfo.status === 'added' ? '' :
fileInfo.status === 'deleted' ? '' : '📝';
return `${statusEmoji} `;
}
/**
* Extract filepath from diff content
*/
private extractFilepathFromDiff(diffContent: string): string {
const lines = diffContent.split('\n');
for (const line of lines) {
if (line.startsWith('+++ b/')) {
return line.substring(6);
}
}
return 'unknown';
}
/**
* Format file info as metadata only
*/
private formatMetadataOnly(fileInfo: IDiffFileInfo): string {
const statusEmoji = fileInfo.status === 'added' ? '' :
fileInfo.status === 'deleted' ? '' : '📝';
return `${statusEmoji} ${fileInfo.filepath} (+${fileInfo.linesAdded}, -${fileInfo.linesRemoved})`;
}
/**
* Generate human-readable summary of processed diff
*/
private generateSummary(result: IProcessedDiff): string {
const parts: string[] = [];
parts.push(`Files changed: ${result.totalFiles} total`);
parts.push(`- ${result.fullDiffs.length} included in full`);
parts.push(`- ${result.summarizedDiffs.length} summarized (head/tail shown)`);
parts.push(`- ${result.metadataOnly.length} metadata only`);
parts.push(`Estimated tokens: ~${result.totalTokens.toLocaleString()}`);
if (result.metadataOnly.length > 0) {
parts.push('');
parts.push('NOTE: Some files excluded to stay within token budget.');
parts.push('Use Read tool with specific file paths to see full content.');
}
return parts.join('\n');
}
}

View File

@@ -5,6 +5,7 @@ import { ContextTrimmer } from './context-trimmer.js';
import { LazyFileLoader } from './lazy-file-loader.js'; import { LazyFileLoader } from './lazy-file-loader.js';
import { ContextCache } from './context-cache.js'; import { ContextCache } from './context-cache.js';
import { ContextAnalyzer } from './context-analyzer.js'; import { ContextAnalyzer } from './context-analyzer.js';
import { DiffProcessor } from './diff-processor.js';
import type { import type {
ContextMode, ContextMode,
IContextConfig, IContextConfig,
@@ -24,7 +25,10 @@ import type {
IFileAnalysis, IFileAnalysis,
IAnalysisResult, IAnalysisResult,
IIterativeConfig, IIterativeConfig,
IIterativeContextResult IIterativeContextResult,
IDiffFileInfo,
IProcessedDiff,
IDiffProcessorOptions
} from './types.js'; } from './types.js';
export { export {
@@ -36,6 +40,7 @@ export {
LazyFileLoader, LazyFileLoader,
ContextCache, ContextCache,
ContextAnalyzer, ContextAnalyzer,
DiffProcessor,
}; };
// Types // Types
@@ -58,5 +63,8 @@ export type {
IFileAnalysis, IFileAnalysis,
IAnalysisResult, IAnalysisResult,
IIterativeConfig, IIterativeConfig,
IIterativeContextResult IIterativeContextResult,
IDiffFileInfo,
IProcessedDiff,
IDiffProcessorOptions
}; };

View File

@@ -115,21 +115,9 @@ export class IterativeContextBuilder {
// If additional context (e.g., git diff) is provided, prepend it // If additional context (e.g., git diff) is provided, prepend it
if (additionalContext) { if (additionalContext) {
// CRITICAL SAFETY: Check raw string size BEFORE tokenization to prevent OOM // NOTE: additionalContext is expected to be pre-processed by DiffProcessor
const MAX_DIFF_CHARS = 500000; // ~125k tokens max (conservative 4 chars/token ratio) // which intelligently samples large diffs to stay within token budget (100k default)
const MAX_DIFF_TOKENS = 150000; // Hard token limit for safety const MAX_DIFF_TOKENS = 200000; // Safety net for edge cases (DiffProcessor uses 100k budget)
// First check: raw character count
if (additionalContext.length > MAX_DIFF_CHARS) {
const originalSize = additionalContext.length;
logger.log('warn', `⚠️ Git diff too large (${originalSize.toLocaleString()} chars > ${MAX_DIFF_CHARS.toLocaleString()} limit)`);
logger.log('warn', ` This likely includes build artifacts (dist/, *.js.map, bundles, etc.)`);
logger.log('warn', ` Truncating to first ${MAX_DIFF_CHARS.toLocaleString()} characters.`);
logger.log('warn', ` Consider: git stash build files, improve .gitignore, or review uncommitted changes.`);
additionalContext = additionalContext.substring(0, MAX_DIFF_CHARS) +
'\n\n[... DIFF TRUNCATED - exceeded size limit of ' + MAX_DIFF_CHARS.toLocaleString() + ' chars ...]';
}
const diffSection = ` const diffSection = `
====== GIT DIFF ====== ====== GIT DIFF ======
@@ -139,21 +127,22 @@ ${additionalContext}
====== END OF GIT DIFF ====== ====== END OF GIT DIFF ======
`; `;
// Second check: actual token count after truncation // Validate token count (should already be under budget from DiffProcessor)
const diffTokens = this.countTokens(diffSection); const diffTokens = this.countTokens(diffSection);
if (diffTokens > MAX_DIFF_TOKENS) { if (diffTokens > MAX_DIFF_TOKENS) {
logger.log('error', `Git diff still too large after truncation (${diffTokens.toLocaleString()} tokens > ${MAX_DIFF_TOKENS.toLocaleString()} limit)`); logger.log('error', `Pre-processed git diff exceeds safety limit (${diffTokens.toLocaleString()} tokens > ${MAX_DIFF_TOKENS.toLocaleString()} limit)`);
logger.log('error', ` This should not happen - DiffProcessor should have limited to ~100k tokens.`);
logger.log('error', ` Please check DiffProcessor configuration and output.`);
throw new Error( throw new Error(
`Git diff size (${diffTokens.toLocaleString()} tokens) exceeds maximum (${MAX_DIFF_TOKENS.toLocaleString()} tokens). ` + `Pre-processed git diff size (${diffTokens.toLocaleString()} tokens) exceeds safety limit (${MAX_DIFF_TOKENS.toLocaleString()} tokens). ` +
`This indicates massive uncommitted changes, likely build artifacts. ` + `This indicates a bug in DiffProcessor or misconfiguration.`
`Please commit or stash dist/, build/, or other generated files.`
); );
} }
loadedContent = diffSection; loadedContent = diffSection;
totalTokensUsed += diffTokens; totalTokensUsed += diffTokens;
logger.log('info', `📝 Added git diff to context (${diffTokens.toLocaleString()} tokens)`); logger.log('info', `📝 Added pre-processed git diff to context (${diffTokens.toLocaleString()} tokens)`);
} }
// Phase 3: Iterative file selection and loading // Phase 3: Iterative file selection and loading

View File

@@ -319,3 +319,6 @@ export interface IIterativeContextResult extends IContextResult {
/** Total duration in ms */ /** Total duration in ms */
totalDuration: number; totalDuration: number;
} }
// Export DiffProcessor types
export type { IDiffFileInfo, IProcessedDiff, IDiffProcessorOptions } from './diff-processor.js';