From ce4da89da9c4d9d7bf9afab6103bfcdcb3681e02 Mon Sep 17 00:00:00 2001 From: Juergen Kunz Date: Tue, 4 Nov 2025 02:28:55 +0000 Subject: [PATCH] fix(iterative-context-builder): Rely on DiffProcessor for git diff pre-processing; remove raw char truncation, raise diff token safety, and improve logging --- changelog.md | 9 +++++++ ts/00_commitinfo_data.ts | 2 +- ts/context/iterative-context-builder.ts | 31 ++++++++----------------- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/changelog.md b/changelog.md index d307d29..8ce6876 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,14 @@ # Changelog +## 2025-11-04 - 1.9.1 - fix(iterative-context-builder) +Rely on DiffProcessor for git diff pre-processing; remove raw char truncation, raise diff token safety, and improve logging + +- Removed raw character-based truncation of additionalContext — diffs are expected to be pre-processed by DiffProcessor instead of blind substring truncation. +- Now validates pre-processed diff token count only and treats DiffProcessor as the primary sampler (DiffProcessor typically uses a ~100k token budget). +- Increased MAX_DIFF_TOKENS safety net to 200,000 to cover edge cases and avoid false positives; updated logs to reflect pre-processed diffs. +- Improved error messaging to indicate a likely DiffProcessor misconfiguration when pre-processed diffs exceed the safety limit. +- Updated informational logs to state that a pre-processed git diff was added to context. + ## 2025-11-04 - 1.9.0 - feat(context) Add intelligent DiffProcessor to summarize and prioritize git diffs and integrate it into the commit context pipeline diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts index b9f8084..39197d9 100644 --- a/ts/00_commitinfo_data.ts +++ b/ts/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: '@git.zone/tsdoc', - version: '1.9.0', + version: '1.9.1', description: 'A comprehensive TypeScript documentation tool that leverages AI to generate and enhance project documentation, including dynamic README creation, API docs via TypeDoc, and smart commit message generation.' } diff --git a/ts/context/iterative-context-builder.ts b/ts/context/iterative-context-builder.ts index 27ddb8f..58db162 100644 --- a/ts/context/iterative-context-builder.ts +++ b/ts/context/iterative-context-builder.ts @@ -115,21 +115,9 @@ export class IterativeContextBuilder { // If additional context (e.g., git diff) is provided, prepend it if (additionalContext) { - // CRITICAL SAFETY: Check raw string size BEFORE tokenization to prevent OOM - const MAX_DIFF_CHARS = 500000; // ~125k tokens max (conservative 4 chars/token ratio) - const MAX_DIFF_TOKENS = 150000; // Hard token limit for safety - - // First check: raw character count - if (additionalContext.length > MAX_DIFF_CHARS) { - const originalSize = additionalContext.length; - logger.log('warn', `⚠️ Git diff too large (${originalSize.toLocaleString()} chars > ${MAX_DIFF_CHARS.toLocaleString()} limit)`); - logger.log('warn', ` This likely includes build artifacts (dist/, *.js.map, bundles, etc.)`); - logger.log('warn', ` Truncating to first ${MAX_DIFF_CHARS.toLocaleString()} characters.`); - logger.log('warn', ` Consider: git stash build files, improve .gitignore, or review uncommitted changes.`); - - additionalContext = additionalContext.substring(0, MAX_DIFF_CHARS) + - '\n\n[... DIFF TRUNCATED - exceeded size limit of ' + MAX_DIFF_CHARS.toLocaleString() + ' chars ...]'; - } + // NOTE: additionalContext is expected to be pre-processed by DiffProcessor + // which intelligently samples large diffs to stay within token budget (100k default) + const MAX_DIFF_TOKENS = 200000; // Safety net for edge cases (DiffProcessor uses 100k budget) const diffSection = ` ====== GIT DIFF ====== @@ -139,21 +127,22 @@ ${additionalContext} ====== END OF GIT DIFF ====== `; - // Second check: actual token count after truncation + // Validate token count (should already be under budget from DiffProcessor) const diffTokens = this.countTokens(diffSection); if (diffTokens > MAX_DIFF_TOKENS) { - logger.log('error', `❌ Git diff still too large after truncation (${diffTokens.toLocaleString()} tokens > ${MAX_DIFF_TOKENS.toLocaleString()} limit)`); + logger.log('error', `❌ Pre-processed git diff exceeds safety limit (${diffTokens.toLocaleString()} tokens > ${MAX_DIFF_TOKENS.toLocaleString()} limit)`); + logger.log('error', ` This should not happen - DiffProcessor should have limited to ~100k tokens.`); + logger.log('error', ` Please check DiffProcessor configuration and output.`); throw new Error( - `Git diff size (${diffTokens.toLocaleString()} tokens) exceeds maximum (${MAX_DIFF_TOKENS.toLocaleString()} tokens). ` + - `This indicates massive uncommitted changes, likely build artifacts. ` + - `Please commit or stash dist/, build/, or other generated files.` + `Pre-processed git diff size (${diffTokens.toLocaleString()} tokens) exceeds safety limit (${MAX_DIFF_TOKENS.toLocaleString()} tokens). ` + + `This indicates a bug in DiffProcessor or misconfiguration.` ); } loadedContent = diffSection; totalTokensUsed += diffTokens; - logger.log('info', `📝 Added git diff to context (${diffTokens.toLocaleString()} tokens)`); + logger.log('info', `📝 Added pre-processed git diff to context (${diffTokens.toLocaleString()} tokens)`); } // Phase 3: Iterative file selection and loading