Compare commits

..

9 Commits

Author SHA1 Message Date
6524adea18 1.9.0
Some checks failed
Default (tags) / security (push) Failing after 0s
Default (tags) / test (push) Failing after 0s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-11-04 02:19:57 +00:00
4bf0c02618 feat(context): Add intelligent DiffProcessor to summarize and prioritize git diffs and integrate it into the commit context pipeline 2025-11-04 02:19:57 +00:00
f84a65217d 1.8.3
Some checks failed
Default (tags) / security (push) Failing after 0s
Default (tags) / test (push) Failing after 0s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-11-04 01:37:15 +00:00
3f22fc91ae fix(context): Prevent enormous git diffs and OOM during context building by adding exclusion patterns, truncation, and diagnostic logging 2025-11-04 01:37:15 +00:00
11e65b92ec 1.8.2
Some checks failed
Default (tags) / security (push) Failing after 0s
Default (tags) / test (push) Failing after 0s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-11-03 17:53:03 +00:00
0a3080518f 1.8.1
Some checks failed
Default (tags) / security (push) Failing after 0s
Default (tags) / test (push) Failing after 0s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-11-03 17:50:09 +00:00
d0a4ddbb4b fix(git diff): improve git diff 2025-11-03 17:49:35 +00:00
481339d3cb 1.8.0
Some checks failed
Default (tags) / security (push) Failing after 0s
Default (tags) / test (push) Failing after 0s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-11-03 13:37:16 +00:00
ebc3d760af feat(context): Wire OpenAI provider through task context factory and add git-diff support to iterative context builder 2025-11-03 13:37:16 +00:00
16 changed files with 881 additions and 48 deletions

View File

@@ -1,5 +1,32 @@
# Changelog
## 2025-11-04 - 1.9.0 - feat(context)
Add intelligent DiffProcessor to summarize and prioritize git diffs and integrate it into the commit context pipeline
- Add DiffProcessor (ts/context/diff-processor.ts) to intelligently process git diffs: include small files fully, summarize medium files (head/tail sampling), and mark very large files as metadata-only to stay within token budgets.
- Integrate DiffProcessor into commit workflow (ts/aidocs_classes/commit.ts): preprocess raw diffs, emit processed diff statistics, and pass a token-efficient diff section into the TaskContextFactory for commit context generation.
- Export DiffProcessor and its types through the context index and types (ts/context/index.ts, ts/context/types.ts) so other context components can reuse it.
- Add comprehensive tests for the DiffProcessor behavior and integration (test/test.diffprocessor.node.ts) covering small/medium/large diffs, added/deleted files, prioritization, token budgets, and formatting for context.
- Minor adjustments across context/task factories and builders to accept and propagate processed diff strings rather than raw diffs, reducing risk of token overflows during iterative context building.
## 2025-11-04 - 1.8.3 - fix(context)
Prevent enormous git diffs and OOM during context building by adding exclusion patterns, truncation, and diagnostic logging
- Add comprehensive git diff exclusion globs (locks, build artifacts, maps, bundles, IDE folders, logs, caches) when collecting uncommitted diffs to avoid noisy/huge diffs
- Pass glob patterns directly to smartgit.getUncommittedDiff for efficient server-side matching
- Emit diagnostic statistics for diffs (files changed, total characters, estimated tokens, number of exclusion patterns) and warn on unusually large diffs
- Introduce pre-tokenization safety checks in iterative context builder: truncate raw diff text if it exceeds MAX_DIFF_CHARS and throw a clear error if token count still exceeds MAX_DIFF_TOKENS
- Format and log token counts using locale-aware formatting for clarity
- Improve robustness of commit context generation to reduce risk of OOM / model-limit overruns
## 2025-11-03 - 1.8.0 - feat(context)
Wire OpenAI provider through task context factory and add git-diff support to iterative context builder
- Pass AiDoc.openaiInstance through TaskContextFactory into IterativeContextBuilder to reuse the same OpenAI provider and avoid reinitialization.
- IterativeContextBuilder now accepts an optional OpenAiProvider and an additionalContext string; when provided, git diffs (or other extra context) are prepended to the AI context and token counts are updated.
- createContextForCommit now forwards the git diff into the iterative builder so commit-specific context includes the diff.
- Updated aidocs_classes (commit, description, readme) to supply the existing openaiInstance when creating the TaskContextFactory.
## 2025-11-03 - 1.7.0 - feat(IterativeContextBuilder)
Add iterative AI-driven context builder and integrate into task factory; add tests and iterative configuration

View File

@@ -1,6 +1,6 @@
{
"name": "@git.zone/tsdoc",
"version": "1.7.0",
"version": "1.9.0",
"private": false,
"description": "A comprehensive TypeScript documentation tool that leverages AI to generate and enhance project documentation, including dynamic README creation, API docs via TypeDoc, and smart commit message generation.",
"type": "module",

8
pnpm-lock.yaml generated
View File

@@ -3799,8 +3799,8 @@ packages:
resolution: {integrity: sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==}
engines: {node: '>=4'}
minimatch@10.0.3:
resolution: {integrity: sha512-IPZ167aShDZZUMdRk66cyQAW3qr0WzbHkPdMYa8bzZhlHhO3jALbKdxcaak7W9FfT2rZNpQuUu4Od7ILEpXSaw==}
minimatch@10.1.1:
resolution: {integrity: sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==}
engines: {node: 20 || >=22}
minimatch@3.1.2:
@@ -9797,7 +9797,7 @@ snapshots:
dependencies:
foreground-child: 3.3.1
jackspeak: 4.1.1
minimatch: 10.0.3
minimatch: 10.1.1
minipass: 7.1.2
package-json-from-dist: 1.0.1
path-scurry: 2.0.0
@@ -10680,7 +10680,7 @@ snapshots:
min-indent@1.0.1: {}
minimatch@10.0.3:
minimatch@10.1.1:
dependencies:
'@isaacs/brace-expansion': 5.0.0

View File

@@ -33,7 +33,10 @@ tap.test('should build commit object', async () => {
expect(commitObject).toHaveProperty('recommendedNextVersionLevel');
expect(commitObject).toHaveProperty('recommendedNextVersionScope');
expect(commitObject).toHaveProperty('recommendedNextVersionMessage');
});
})
tap.test('should stop AIdocs', async () => {
await aidocs.stop();
});
tap.start();

View File

@@ -0,0 +1,304 @@
import { tap, expect } from '@git.zone/tstest/tapbundle';
import { DiffProcessor } from '../ts/context/diff-processor.js';
// Sample diff strings for testing
const createSmallDiff = (filepath: string, addedLines = 5, removedLines = 3): string => {
const lines: string[] = [];
lines.push(`--- a/${filepath}`);
lines.push(`+++ b/${filepath}`);
lines.push(`@@ -1,10 +1,12 @@`);
for (let i = 0; i < removedLines; i++) {
lines.push(`-removed line ${i + 1}`);
}
for (let i = 0; i < addedLines; i++) {
lines.push(`+added line ${i + 1}`);
}
lines.push(' unchanged line');
return lines.join('\n');
};
const createMediumDiff = (filepath: string): string => {
const lines: string[] = [];
lines.push(`--- a/${filepath}`);
lines.push(`+++ b/${filepath}`);
lines.push(`@@ -1,100 +1,150 @@`);
// 150 lines of changes
for (let i = 0; i < 75; i++) {
lines.push(`+added line ${i + 1}`);
}
for (let i = 0; i < 75; i++) {
lines.push(`-removed line ${i + 1}`);
}
return lines.join('\n');
};
const createLargeDiff = (filepath: string): string => {
const lines: string[] = [];
lines.push(`--- a/${filepath}`);
lines.push(`+++ b/${filepath}`);
lines.push(`@@ -1,1000 +1,1500 @@`);
// 2500 lines of changes
for (let i = 0; i < 1250; i++) {
lines.push(`+added line ${i + 1}`);
}
for (let i = 0; i < 1250; i++) {
lines.push(`-removed line ${i + 1}`);
}
return lines.join('\n');
};
const createDeletedFileDiff = (filepath: string): string => {
return `--- a/${filepath}
+++ /dev/null
@@ -1,5 +0,0 @@
-deleted line 1
-deleted line 2
-deleted line 3
-deleted line 4
-deleted line 5`;
};
const createAddedFileDiff = (filepath: string): string => {
return `--- /dev/null
+++ b/${filepath}
@@ -0,0 +1,5 @@
+added line 1
+added line 2
+added line 3
+added line 4
+added line 5`;
};
tap.test('DiffProcessor should parse small diff correctly', async () => {
const processor = new DiffProcessor();
const smallDiff = createSmallDiff('src/test.ts', 5, 3);
const result = processor.processDiffs([smallDiff]);
expect(result.totalFiles).toEqual(1);
expect(result.fullDiffs.length).toEqual(1);
expect(result.summarizedDiffs.length).toEqual(0);
expect(result.metadataOnly.length).toEqual(0);
expect(result.totalTokens).toBeGreaterThan(0);
});
tap.test('DiffProcessor should summarize medium diff', async () => {
const processor = new DiffProcessor();
const mediumDiff = createMediumDiff('src/medium-file.ts');
const result = processor.processDiffs([mediumDiff]);
expect(result.totalFiles).toEqual(1);
expect(result.fullDiffs.length).toEqual(0);
expect(result.summarizedDiffs.length).toEqual(1);
expect(result.metadataOnly.length).toEqual(0);
// Verify the summarized diff contains the sample
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('SUMMARIZED DIFFS');
expect(formatted).toInclude('lines omitted');
});
tap.test('DiffProcessor should handle large diff as metadata only', async () => {
const processor = new DiffProcessor();
const largeDiff = createLargeDiff('dist/bundle.js');
const result = processor.processDiffs([largeDiff]);
expect(result.totalFiles).toEqual(1);
expect(result.fullDiffs.length).toEqual(0);
expect(result.summarizedDiffs.length).toEqual(0);
expect(result.metadataOnly.length).toEqual(1);
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('METADATA ONLY');
expect(formatted).toInclude('dist/bundle.js');
});
tap.test('DiffProcessor should prioritize source files over build artifacts', async () => {
const processor = new DiffProcessor();
const diffs = [
createSmallDiff('dist/bundle.js'),
createSmallDiff('src/important.ts'),
createSmallDiff('build/output.js'),
createSmallDiff('src/core.ts'),
];
const result = processor.processDiffs(diffs);
expect(result.totalFiles).toEqual(4);
// Source files should be included fully first
const formatted = processor.formatForContext(result);
const srcImportantIndex = formatted.indexOf('src/important.ts');
const srcCoreIndex = formatted.indexOf('src/core.ts');
const distBundleIndex = formatted.indexOf('dist/bundle.js');
const buildOutputIndex = formatted.indexOf('build/output.js');
// Source files should appear before build artifacts
expect(srcImportantIndex).toBeLessThan(distBundleIndex);
expect(srcCoreIndex).toBeLessThan(buildOutputIndex);
});
tap.test('DiffProcessor should respect token budget', async () => {
const processor = new DiffProcessor({
maxDiffTokens: 500, // Very small budget to force metadata-only
});
// Create multiple large diffs that will exceed budget
const diffs = [
createLargeDiff('src/file1.ts'),
createLargeDiff('src/file2.ts'),
createLargeDiff('src/file3.ts'),
createLargeDiff('src/file4.ts'),
];
const result = processor.processDiffs(diffs);
expect(result.totalTokens).toBeLessThanOrEqual(500);
// With such a small budget and large files, most should be metadata only
expect(result.metadataOnly.length).toBeGreaterThanOrEqual(2);
});
tap.test('DiffProcessor should handle deleted files', async () => {
const processor = new DiffProcessor();
const deletedDiff = createDeletedFileDiff('src/old-file.ts');
const result = processor.processDiffs([deletedDiff]);
expect(result.totalFiles).toEqual(1);
// Small deleted file should be included fully
expect(result.fullDiffs.length).toEqual(1);
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('src/old-file.ts');
// Verify the file appears in the output
expect(formatted).toInclude('FULL DIFFS');
});
tap.test('DiffProcessor should handle added files', async () => {
const processor = new DiffProcessor();
const addedDiff = createAddedFileDiff('src/new-file.ts');
const result = processor.processDiffs([addedDiff]);
expect(result.totalFiles).toEqual(1);
// Small added file should be included fully
expect(result.fullDiffs.length).toEqual(1);
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('src/new-file.ts');
// Verify the file appears in the output
expect(formatted).toInclude('FULL DIFFS');
});
tap.test('DiffProcessor should handle mixed file sizes', async () => {
const processor = new DiffProcessor();
const diffs = [
createSmallDiff('src/small.ts'),
createMediumDiff('src/medium.ts'),
createLargeDiff('dist/large.js'),
];
const result = processor.processDiffs(diffs);
expect(result.totalFiles).toEqual(3);
expect(result.fullDiffs.length).toEqual(1); // small file
expect(result.summarizedDiffs.length).toEqual(1); // medium file
expect(result.metadataOnly.length).toEqual(1); // large file
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('FULL DIFFS (1 files)');
expect(formatted).toInclude('SUMMARIZED DIFFS (1 files)');
expect(formatted).toInclude('METADATA ONLY (1 files)');
});
tap.test('DiffProcessor should handle empty diff array', async () => {
const processor = new DiffProcessor();
const result = processor.processDiffs([]);
expect(result.totalFiles).toEqual(0);
expect(result.fullDiffs.length).toEqual(0);
expect(result.summarizedDiffs.length).toEqual(0);
expect(result.metadataOnly.length).toEqual(0);
expect(result.totalTokens).toEqual(0);
});
tap.test('DiffProcessor should generate comprehensive summary', async () => {
const processor = new DiffProcessor();
const diffs = [
createSmallDiff('src/file1.ts'),
createSmallDiff('src/file2.ts'),
createMediumDiff('src/file3.ts'),
createLargeDiff('dist/bundle.js'),
];
const result = processor.processDiffs(diffs);
const formatted = processor.formatForContext(result);
expect(formatted).toInclude('GIT DIFF SUMMARY');
expect(formatted).toInclude('Files changed: 4 total');
expect(formatted).toInclude('included in full');
expect(formatted).toInclude('summarized');
expect(formatted).toInclude('metadata only');
expect(formatted).toInclude('Estimated tokens:');
expect(formatted).toInclude('END OF GIT DIFF');
});
tap.test('DiffProcessor should handle custom options', async () => {
const processor = new DiffProcessor({
maxDiffTokens: 50000,
smallFileLines: 30,
mediumFileLines: 150,
sampleHeadLines: 10,
sampleTailLines: 10,
});
const mediumDiff = createMediumDiff('src/file.ts'); // 150 lines
const result = processor.processDiffs([mediumDiff]);
// With custom settings, this should be summarized (exactly at the mediumFileLines threshold)
expect(result.summarizedDiffs.length).toEqual(1);
});
tap.test('DiffProcessor should prioritize test files appropriately', async () => {
const processor = new DiffProcessor();
const diffs = [
createSmallDiff('src/core.ts'),
createSmallDiff('test/core.test.ts'),
createSmallDiff('config.json'),
];
const result = processor.processDiffs(diffs);
const formatted = processor.formatForContext(result);
// Source files should come before test files
const srcIndex = formatted.indexOf('src/core.ts');
const testIndex = formatted.indexOf('test/core.test.ts');
expect(srcIndex).toBeLessThan(testIndex);
});
tap.test('DiffProcessor should handle files with no changes gracefully', async () => {
const processor = new DiffProcessor();
const emptyDiff = `--- a/src/file.ts
+++ b/src/file.ts
@@ -1,1 +1,1 @@`;
const result = processor.processDiffs([emptyDiff]);
expect(result.totalFiles).toEqual(1);
expect(result.fullDiffs.length).toEqual(1); // Still included as a small file
});
export default tap.start();

View File

@@ -1,8 +0,0 @@
import { expect, tap } from '@push.rocks/tapbundle';
import * as tsdoc from '../ts/index.js';
tap.test('first test', async () => {
console.log('test');
});
tap.start();

View File

@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@git.zone/tsdoc',
version: '1.7.0',
version: '1.9.0',
description: 'A comprehensive TypeScript documentation tool that leverages AI to generate and enhance project documentation, including dynamic README creation, API docs via TypeDoc, and smart commit message generation.'
}

View File

@@ -1,6 +1,7 @@
import * as plugins from '../plugins.js';
import { AiDoc } from '../classes.aidoc.js';
import { ProjectContext } from './projectcontext.js';
import { DiffProcessor } from '../context/diff-processor.js';
export interface INextCommitObject {
recommendedNextVersionLevel: 'fix' | 'feat' | 'BREAKING CHANGE'; // the recommended next version level of the project
@@ -27,18 +28,101 @@ export class Commit {
smartgitInstance,
this.projectDir
);
const diffStringArray = await gitRepo.getUncommittedDiff([
// Define comprehensive exclusion patterns
// smartgit@3.3.0+ supports glob patterns natively
const excludePatterns = [
// Lock files
'pnpm-lock.yaml',
'package-lock.json',
]);
'npm-shrinkwrap.json',
'yarn.lock',
'deno.lock',
'bun.lockb',
// Build artifacts (main culprit for large diffs!)
'dist/**',
'dist_*/**', // dist_ts, dist_web, etc.
'build/**',
'.next/**',
'out/**',
'public/dist/**',
// Compiled/bundled files
'**/*.js.map',
'**/*.d.ts.map',
'**/*.min.js',
'**/*.bundle.js',
'**/*.chunk.js',
// IDE/Editor directories
'.claude/**',
'.cursor/**',
'.vscode/**',
'.idea/**',
'**/*.swp',
'**/*.swo',
// Logs and caches
'.nogit/**',
'**/*.log',
'.cache/**',
'.rpt2_cache/**',
'coverage/**',
'.nyc_output/**',
];
// Pass glob patterns directly to smartgit - it handles matching internally
const diffStringArray = await gitRepo.getUncommittedDiff(excludePatterns);
// Process diffs intelligently using DiffProcessor
let processedDiffString: string;
if (diffStringArray.length > 0) {
// Diagnostic logging for raw diff statistics
const totalChars = diffStringArray.join('\n\n').length;
const estimatedTokens = Math.ceil(totalChars / 4);
console.log(`📊 Raw git diff statistics:`);
console.log(` Files changed: ${diffStringArray.length}`);
console.log(` Total characters: ${totalChars.toLocaleString()}`);
console.log(` Estimated tokens: ${estimatedTokens.toLocaleString()}`);
console.log(` Exclusion patterns: ${excludePatterns.length}`);
// Use DiffProcessor to intelligently handle large diffs
const diffProcessor = new DiffProcessor({
maxDiffTokens: 100000, // Reserve 100k tokens for diffs
smallFileLines: 50, // Include files <= 50 lines fully
mediumFileLines: 200, // Summarize files <= 200 lines
sampleHeadLines: 20, // Show first 20 lines
sampleTailLines: 20, // Show last 20 lines
});
const processedDiff = diffProcessor.processDiffs(diffStringArray);
processedDiffString = diffProcessor.formatForContext(processedDiff);
console.log(`📝 Processed diff statistics:`);
console.log(` Full diffs: ${processedDiff.fullDiffs.length} files`);
console.log(` Summarized: ${processedDiff.summarizedDiffs.length} files`);
console.log(` Metadata only: ${processedDiff.metadataOnly.length} files`);
console.log(` Final tokens: ${processedDiff.totalTokens.toLocaleString()}`);
if (estimatedTokens > 50000) {
console.log(`✅ DiffProcessor reduced token usage: ${estimatedTokens.toLocaleString()}${processedDiff.totalTokens.toLocaleString()}`);
}
} else {
processedDiffString = 'No changes.';
}
// Use the new TaskContextFactory for optimized context
const taskContextFactory = new (await import('../context/index.js')).TaskContextFactory(this.projectDir);
const taskContextFactory = new (await import('../context/index.js')).TaskContextFactory(
this.projectDir,
this.aiDocsRef.openaiInstance
);
await taskContextFactory.initialize();
// Generate context specifically for commit task
const contextResult = await taskContextFactory.createContextForCommit(
diffStringArray[0] ? diffStringArray.join('\n\n') : 'No changes.'
);
const contextResult = await taskContextFactory.createContextForCommit(processedDiffString);
// Get the optimized context string
let contextString = contextResult.context;

View File

@@ -19,7 +19,10 @@ export class Description {
public async build() {
// Use the new TaskContextFactory for optimized context
const taskContextFactory = new (await import('../context/index.js')).TaskContextFactory(this.projectDir);
const taskContextFactory = new (await import('../context/index.js')).TaskContextFactory(
this.projectDir,
this.aiDocsRef.openaiInstance
);
await taskContextFactory.initialize();
// Generate context specifically for description task

View File

@@ -18,7 +18,10 @@ export class Readme {
let finalReadmeString = ``;
// Use the new TaskContextFactory for optimized context
const taskContextFactory = new (await import('../context/index.js')).TaskContextFactory(this.projectDir);
const taskContextFactory = new (await import('../context/index.js')).TaskContextFactory(
this.projectDir,
this.aiDocsRef.openaiInstance
);
await taskContextFactory.initialize();
// Generate context specifically for readme task

View File

@@ -64,7 +64,7 @@ export class AiDoc {
await this.npmextraKV.writeKey('OPENAI_TOKEN', this.openaiToken);
}
}
if (!this.openaiToken) {
if (!this.openaiToken && this.npmextraKV) {
this.openaiToken = await this.npmextraKV.readKey('OPENAI_TOKEN');
}
@@ -76,7 +76,11 @@ export class AiDoc {
}
public async stop() {
await this.openaiInstance.stop();
if (this.openaiInstance) {
await this.openaiInstance.stop();
}
// No explicit cleanup needed for npmextraKV or aidocInteract
// They don't keep event loop alive
}
public async buildReadme(projectDirArg: string) {

View File

@@ -0,0 +1,341 @@
/**
* Intelligent git diff processor that handles large diffs by sampling and prioritization
* instead of blind truncation.
*/
export interface IDiffFileInfo {
filepath: string;
status: 'added' | 'modified' | 'deleted';
linesAdded: number;
linesRemoved: number;
totalLines: number;
estimatedTokens: number;
diffContent: string;
}
export interface IProcessedDiff {
summary: string; // Human-readable overview
fullDiffs: string[]; // Small files included fully
summarizedDiffs: string[]; // Medium files with head/tail
metadataOnly: string[]; // Large files, just stats
totalFiles: number;
totalTokens: number;
}
export interface IDiffProcessorOptions {
maxDiffTokens?: number; // Maximum tokens for entire diff section (default: 100000)
smallFileLines?: number; // Files <= this are included fully (default: 50)
mediumFileLines?: number; // Files <= this are summarized (default: 200)
sampleHeadLines?: number; // Lines to show at start of medium files (default: 20)
sampleTailLines?: number; // Lines to show at end of medium files (default: 20)
}
export class DiffProcessor {
private options: Required<IDiffProcessorOptions>;
constructor(options: IDiffProcessorOptions = {}) {
this.options = {
maxDiffTokens: options.maxDiffTokens ?? 100000,
smallFileLines: options.smallFileLines ?? 50,
mediumFileLines: options.mediumFileLines ?? 200,
sampleHeadLines: options.sampleHeadLines ?? 20,
sampleTailLines: options.sampleTailLines ?? 20,
};
}
/**
* Process an array of git diffs into a structured, token-efficient format
*/
public processDiffs(diffStringArray: string[]): IProcessedDiff {
// Parse all diffs into file info objects
const fileInfos: IDiffFileInfo[] = diffStringArray
.map(diffString => this.parseDiffFile(diffString))
.filter(info => info !== null) as IDiffFileInfo[];
// Prioritize files (source files first, build artifacts last)
const prioritized = this.prioritizeFiles(fileInfos);
const result: IProcessedDiff = {
summary: '',
fullDiffs: [],
summarizedDiffs: [],
metadataOnly: [],
totalFiles: prioritized.length,
totalTokens: 0,
};
let tokensUsed = 0;
const tokenBudget = this.options.maxDiffTokens;
// Categorize and include files based on size and token budget
for (const fileInfo of prioritized) {
const remainingBudget = tokenBudget - tokensUsed;
if (remainingBudget <= 0) {
// Budget exhausted - rest are metadata only
result.metadataOnly.push(this.formatMetadataOnly(fileInfo));
continue;
}
if (fileInfo.totalLines <= this.options.smallFileLines) {
// Small file - include fully if budget allows
if (fileInfo.estimatedTokens <= remainingBudget) {
const statusPrefix = this.getFileStatusPrefix(fileInfo);
result.fullDiffs.push(`${statusPrefix}${fileInfo.diffContent}`);
tokensUsed += fileInfo.estimatedTokens;
} else {
result.metadataOnly.push(this.formatMetadataOnly(fileInfo));
}
} else if (fileInfo.totalLines <= this.options.mediumFileLines) {
// Medium file - try to include summary with head/tail
const summary = this.extractDiffSample(
fileInfo,
this.options.sampleHeadLines,
this.options.sampleTailLines
);
const summaryTokens = Math.ceil(summary.length / 4); // Rough estimate
if (summaryTokens <= remainingBudget) {
result.summarizedDiffs.push(summary);
tokensUsed += summaryTokens;
} else {
result.metadataOnly.push(this.formatMetadataOnly(fileInfo));
}
} else {
// Large file - metadata only
result.metadataOnly.push(this.formatMetadataOnly(fileInfo));
}
}
result.totalTokens = tokensUsed;
result.summary = this.generateSummary(result);
return result;
}
/**
* Format the processed diff for inclusion in context
*/
public formatForContext(processed: IProcessedDiff): string {
const sections: string[] = [];
// Summary section
sections.push('====== GIT DIFF SUMMARY ======');
sections.push(processed.summary);
sections.push('');
// Full diffs section
if (processed.fullDiffs.length > 0) {
sections.push(`====== FULL DIFFS (${processed.fullDiffs.length} files) ======`);
sections.push(processed.fullDiffs.join('\n\n'));
sections.push('');
}
// Summarized diffs section
if (processed.summarizedDiffs.length > 0) {
sections.push(`====== SUMMARIZED DIFFS (${processed.summarizedDiffs.length} files) ======`);
sections.push(processed.summarizedDiffs.join('\n\n'));
sections.push('');
}
// Metadata only section
if (processed.metadataOnly.length > 0) {
sections.push(`====== METADATA ONLY (${processed.metadataOnly.length} files) ======`);
sections.push(processed.metadataOnly.join('\n'));
sections.push('');
}
sections.push('====== END OF GIT DIFF ======');
return sections.join('\n');
}
/**
* Parse a single git diff string into file information
*/
private parseDiffFile(diffString: string): IDiffFileInfo | null {
if (!diffString || diffString.trim().length === 0) {
return null;
}
const lines = diffString.split('\n');
let filepath = '';
let status: 'added' | 'modified' | 'deleted' = 'modified';
let linesAdded = 0;
let linesRemoved = 0;
// Parse diff header to extract filepath and status
for (const line of lines) {
if (line.startsWith('--- a/')) {
filepath = line.substring(6);
} else if (line.startsWith('+++ b/')) {
const newPath = line.substring(6);
if (newPath === '/dev/null') {
status = 'deleted';
} else if (filepath === '/dev/null') {
status = 'added';
filepath = newPath;
} else {
filepath = newPath;
}
} else if (line.startsWith('+') && !line.startsWith('+++')) {
linesAdded++;
} else if (line.startsWith('-') && !line.startsWith('---')) {
linesRemoved++;
}
}
const totalLines = linesAdded + linesRemoved;
const estimatedTokens = Math.ceil(diffString.length / 4);
return {
filepath,
status,
linesAdded,
linesRemoved,
totalLines,
estimatedTokens,
diffContent: diffString,
};
}
/**
* Prioritize files by importance (source files before build artifacts)
*/
private prioritizeFiles(files: IDiffFileInfo[]): IDiffFileInfo[] {
return files.sort((a, b) => {
const scoreA = this.getFileImportanceScore(a.filepath);
const scoreB = this.getFileImportanceScore(b.filepath);
return scoreB - scoreA; // Higher score first
});
}
/**
* Calculate importance score for a file path
*/
private getFileImportanceScore(filepath: string): number {
// Source files - highest priority
if (filepath.match(/^(src|lib|app|components|pages|api)\//)) {
return 100;
}
// Test files - high priority
if (filepath.match(/\.(test|spec)\.(ts|js|tsx|jsx)$/) || filepath.startsWith('test/')) {
return 80;
}
// Configuration files - medium-high priority
if (filepath.match(/\.(json|yaml|yml|toml|config\.(ts|js))$/)) {
return 60;
}
// Documentation - medium priority
if (filepath.match(/\.(md|txt|rst)$/)) {
return 40;
}
// Build artifacts - low priority
if (filepath.match(/^(dist|build|out|\.next|public\/dist)\//)) {
return 10;
}
// Everything else - default priority
return 50;
}
/**
* Extract head and tail lines from a diff, omitting the middle
*/
private extractDiffSample(fileInfo: IDiffFileInfo, headLines: number, tailLines: number): string {
const lines = fileInfo.diffContent.split('\n');
const totalLines = lines.length;
if (totalLines <= headLines + tailLines) {
// File is small enough to include fully
return fileInfo.diffContent;
}
// Extract file metadata from diff header
const headerLines: string[] = [];
let bodyStartIndex = 0;
for (let i = 0; i < lines.length; i++) {
if (lines[i].startsWith('@@')) {
headerLines.push(...lines.slice(0, i + 1));
bodyStartIndex = i + 1;
break;
}
}
const bodyLines = lines.slice(bodyStartIndex);
const head = bodyLines.slice(0, headLines);
const tail = bodyLines.slice(-tailLines);
const omittedLines = bodyLines.length - headLines - tailLines;
const statusEmoji = fileInfo.status === 'added' ? '' :
fileInfo.status === 'deleted' ? '' : '📝';
const parts: string[] = [];
parts.push(`${statusEmoji} FILE: ${fileInfo.filepath}`);
parts.push(`CHANGES: +${fileInfo.linesAdded} lines, -${fileInfo.linesRemoved} lines (${fileInfo.totalLines} total)`);
parts.push('');
parts.push(...headerLines);
parts.push(...head);
parts.push('');
parts.push(`[... ${omittedLines} lines omitted - use Read tool to see full file ...]`);
parts.push('');
parts.push(...tail);
return parts.join('\n');
}
/**
* Get file status prefix with emoji
*/
private getFileStatusPrefix(fileInfo: IDiffFileInfo): string {
const statusEmoji = fileInfo.status === 'added' ? '' :
fileInfo.status === 'deleted' ? '' : '📝';
return `${statusEmoji} `;
}
/**
* Extract filepath from diff content
*/
private extractFilepathFromDiff(diffContent: string): string {
const lines = diffContent.split('\n');
for (const line of lines) {
if (line.startsWith('+++ b/')) {
return line.substring(6);
}
}
return 'unknown';
}
/**
* Format file info as metadata only
*/
private formatMetadataOnly(fileInfo: IDiffFileInfo): string {
const statusEmoji = fileInfo.status === 'added' ? '' :
fileInfo.status === 'deleted' ? '' : '📝';
return `${statusEmoji} ${fileInfo.filepath} (+${fileInfo.linesAdded}, -${fileInfo.linesRemoved})`;
}
/**
* Generate human-readable summary of processed diff
*/
private generateSummary(result: IProcessedDiff): string {
const parts: string[] = [];
parts.push(`Files changed: ${result.totalFiles} total`);
parts.push(`- ${result.fullDiffs.length} included in full`);
parts.push(`- ${result.summarizedDiffs.length} summarized (head/tail shown)`);
parts.push(`- ${result.metadataOnly.length} metadata only`);
parts.push(`Estimated tokens: ~${result.totalTokens.toLocaleString()}`);
if (result.metadataOnly.length > 0) {
parts.push('');
parts.push('NOTE: Some files excluded to stay within token budget.');
parts.push('Use Read tool with specific file paths to see full content.');
}
return parts.join('\n');
}
}

View File

@@ -5,6 +5,7 @@ import { ContextTrimmer } from './context-trimmer.js';
import { LazyFileLoader } from './lazy-file-loader.js';
import { ContextCache } from './context-cache.js';
import { ContextAnalyzer } from './context-analyzer.js';
import { DiffProcessor } from './diff-processor.js';
import type {
ContextMode,
IContextConfig,
@@ -24,7 +25,10 @@ import type {
IFileAnalysis,
IAnalysisResult,
IIterativeConfig,
IIterativeContextResult
IIterativeContextResult,
IDiffFileInfo,
IProcessedDiff,
IDiffProcessorOptions
} from './types.js';
export {
@@ -36,6 +40,7 @@ export {
LazyFileLoader,
ContextCache,
ContextAnalyzer,
DiffProcessor,
};
// Types
@@ -58,5 +63,8 @@ export type {
IFileAnalysis,
IAnalysisResult,
IIterativeConfig,
IIterativeContextResult
IIterativeContextResult,
IDiffFileInfo,
IProcessedDiff,
IDiffProcessorOptions
};

View File

@@ -28,17 +28,24 @@ export class IterativeContextBuilder {
private config: Required<IIterativeConfig>;
private tokenBudget: number = 190000;
private openaiInstance: plugins.smartai.OpenAiProvider;
private externalOpenaiInstance?: plugins.smartai.OpenAiProvider;
/**
* Creates a new IterativeContextBuilder
* @param projectRoot - Root directory of the project
* @param config - Iterative configuration
* @param openaiInstance - Optional pre-configured OpenAI provider instance
*/
constructor(projectRoot: string, config?: Partial<IIterativeConfig>) {
constructor(
projectRoot: string,
config?: Partial<IIterativeConfig>,
openaiInstance?: plugins.smartai.OpenAiProvider
) {
this.projectRoot = projectRoot;
this.lazyLoader = new LazyFileLoader(projectRoot);
this.cache = new ContextCache(projectRoot);
this.analyzer = new ContextAnalyzer(projectRoot);
this.externalOpenaiInstance = openaiInstance;
// Default configuration
this.config = {
@@ -60,24 +67,30 @@ export class IterativeContextBuilder {
await configManager.initialize(this.projectRoot);
this.tokenBudget = configManager.getMaxTokens();
// Initialize OpenAI instance
const qenvInstance = new plugins.qenv.Qenv();
const openaiToken = await qenvInstance.getEnvVarOnDemand('OPENAI_TOKEN');
if (!openaiToken) {
throw new Error('OPENAI_TOKEN environment variable is required for iterative context building');
// Use external OpenAI instance if provided, otherwise create a new one
if (this.externalOpenaiInstance) {
this.openaiInstance = this.externalOpenaiInstance;
} else {
// Initialize OpenAI instance from environment
const qenvInstance = new plugins.qenv.Qenv();
const openaiToken = await qenvInstance.getEnvVarOnDemand('OPENAI_TOKEN');
if (!openaiToken) {
throw new Error('OPENAI_TOKEN environment variable is required for iterative context building');
}
this.openaiInstance = new plugins.smartai.OpenAiProvider({
openaiToken,
});
await this.openaiInstance.start();
}
this.openaiInstance = new plugins.smartai.OpenAiProvider({
openaiToken,
});
await this.openaiInstance.start();
}
/**
* Build context iteratively using AI decision making
* @param taskType - Type of task being performed
* @param additionalContext - Optional additional context (e.g., git diff for commit tasks)
* @returns Complete iterative context result
*/
public async buildContextIteratively(taskType: TaskType): Promise<IIterativeContextResult> {
public async buildContextIteratively(taskType: TaskType, additionalContext?: string): Promise<IIterativeContextResult> {
const startTime = Date.now();
logger.log('info', '🤖 Starting iterative context building...');
logger.log('info', ` Task: ${taskType}, Budget: ${this.tokenBudget} tokens, Max iterations: ${this.config.maxIterations}`);
@@ -100,6 +113,49 @@ export class IterativeContextBuilder {
let loadedContent = '';
const includedFiles: IFileInfo[] = [];
// If additional context (e.g., git diff) is provided, prepend it
if (additionalContext) {
// CRITICAL SAFETY: Check raw string size BEFORE tokenization to prevent OOM
const MAX_DIFF_CHARS = 500000; // ~125k tokens max (conservative 4 chars/token ratio)
const MAX_DIFF_TOKENS = 150000; // Hard token limit for safety
// First check: raw character count
if (additionalContext.length > MAX_DIFF_CHARS) {
const originalSize = additionalContext.length;
logger.log('warn', `⚠️ Git diff too large (${originalSize.toLocaleString()} chars > ${MAX_DIFF_CHARS.toLocaleString()} limit)`);
logger.log('warn', ` This likely includes build artifacts (dist/, *.js.map, bundles, etc.)`);
logger.log('warn', ` Truncating to first ${MAX_DIFF_CHARS.toLocaleString()} characters.`);
logger.log('warn', ` Consider: git stash build files, improve .gitignore, or review uncommitted changes.`);
additionalContext = additionalContext.substring(0, MAX_DIFF_CHARS) +
'\n\n[... DIFF TRUNCATED - exceeded size limit of ' + MAX_DIFF_CHARS.toLocaleString() + ' chars ...]';
}
const diffSection = `
====== GIT DIFF ======
${additionalContext}
====== END OF GIT DIFF ======
`;
// Second check: actual token count after truncation
const diffTokens = this.countTokens(diffSection);
if (diffTokens > MAX_DIFF_TOKENS) {
logger.log('error', `❌ Git diff still too large after truncation (${diffTokens.toLocaleString()} tokens > ${MAX_DIFF_TOKENS.toLocaleString()} limit)`);
throw new Error(
`Git diff size (${diffTokens.toLocaleString()} tokens) exceeds maximum (${MAX_DIFF_TOKENS.toLocaleString()} tokens). ` +
`This indicates massive uncommitted changes, likely build artifacts. ` +
`Please commit or stash dist/, build/, or other generated files.`
);
}
loadedContent = diffSection;
totalTokensUsed += diffTokens;
logger.log('info', `📝 Added git diff to context (${diffTokens.toLocaleString()} tokens)`);
}
// Phase 3: Iterative file selection and loading
for (let iteration = 1; iteration <= this.config.maxIterations; iteration++) {
const iterationStart = Date.now();

View File

@@ -9,14 +9,17 @@ import type { IIterativeContextResult, TaskType } from './types.js';
export class TaskContextFactory {
private projectDir: string;
private configManager: ConfigManager;
private openaiInstance?: any; // OpenAI provider instance
/**
* Create a new TaskContextFactory
* @param projectDirArg The project directory
* @param openaiInstance Optional pre-configured OpenAI provider instance
*/
constructor(projectDirArg: string) {
constructor(projectDirArg: string, openaiInstance?: any) {
this.projectDir = projectDirArg;
this.configManager = ConfigManager.getInstance();
this.openaiInstance = openaiInstance;
}
/**
@@ -32,7 +35,8 @@ export class TaskContextFactory {
public async createContextForReadme(): Promise<IIterativeContextResult> {
const iterativeBuilder = new IterativeContextBuilder(
this.projectDir,
this.configManager.getIterativeConfig()
this.configManager.getIterativeConfig(),
this.openaiInstance
);
await iterativeBuilder.initialize();
return await iterativeBuilder.buildContextIteratively('readme');
@@ -44,7 +48,8 @@ export class TaskContextFactory {
public async createContextForDescription(): Promise<IIterativeContextResult> {
const iterativeBuilder = new IterativeContextBuilder(
this.projectDir,
this.configManager.getIterativeConfig()
this.configManager.getIterativeConfig(),
this.openaiInstance
);
await iterativeBuilder.initialize();
return await iterativeBuilder.buildContextIteratively('description');
@@ -52,16 +57,16 @@ export class TaskContextFactory {
/**
* Create context for commit message generation
* @param gitDiff Optional git diff to include (currently not used in iterative mode)
* @param gitDiff Optional git diff to include in the context
*/
public async createContextForCommit(gitDiff?: string): Promise<IIterativeContextResult> {
const iterativeBuilder = new IterativeContextBuilder(
this.projectDir,
this.configManager.getIterativeConfig()
this.configManager.getIterativeConfig(),
this.openaiInstance
);
await iterativeBuilder.initialize();
// Note: git diff could be incorporated into the iterative prompts if needed
return await iterativeBuilder.buildContextIteratively('commit');
return await iterativeBuilder.buildContextIteratively('commit', gitDiff);
}
/**

View File

@@ -319,3 +319,6 @@ export interface IIterativeContextResult extends IContextResult {
/** Total duration in ms */
totalDuration: number;
}
// Export DiffProcessor types
export type { IDiffFileInfo, IProcessedDiff, IDiffProcessorOptions } from './diff-processor.js';