fix(iterative-context-builder): Rely on DiffProcessor for git diff pre-processing; remove raw char truncation, raise diff token safety, and improve logging

This commit is contained in:
2025-11-04 02:28:55 +00:00
parent 6524adea18
commit ce4da89da9
3 changed files with 20 additions and 22 deletions

View File

@@ -1,5 +1,14 @@
# Changelog # Changelog
## 2025-11-04 - 1.9.1 - fix(iterative-context-builder)
Rely on DiffProcessor for git diff pre-processing; remove raw char truncation, raise diff token safety, and improve logging
- Removed raw character-based truncation of additionalContext — diffs are expected to be pre-processed by DiffProcessor instead of blind substring truncation.
- Now validates pre-processed diff token count only and treats DiffProcessor as the primary sampler (DiffProcessor typically uses a ~100k token budget).
- Increased MAX_DIFF_TOKENS safety net to 200,000 to cover edge cases and avoid false positives; updated logs to reflect pre-processed diffs.
- Improved error messaging to indicate a likely DiffProcessor misconfiguration when pre-processed diffs exceed the safety limit.
- Updated informational logs to state that a pre-processed git diff was added to context.
## 2025-11-04 - 1.9.0 - feat(context) ## 2025-11-04 - 1.9.0 - feat(context)
Add intelligent DiffProcessor to summarize and prioritize git diffs and integrate it into the commit context pipeline Add intelligent DiffProcessor to summarize and prioritize git diffs and integrate it into the commit context pipeline

View File

@@ -3,6 +3,6 @@
*/ */
export const commitinfo = { export const commitinfo = {
name: '@git.zone/tsdoc', name: '@git.zone/tsdoc',
version: '1.9.0', version: '1.9.1',
description: 'A comprehensive TypeScript documentation tool that leverages AI to generate and enhance project documentation, including dynamic README creation, API docs via TypeDoc, and smart commit message generation.' description: 'A comprehensive TypeScript documentation tool that leverages AI to generate and enhance project documentation, including dynamic README creation, API docs via TypeDoc, and smart commit message generation.'
} }

View File

@@ -115,21 +115,9 @@ export class IterativeContextBuilder {
// If additional context (e.g., git diff) is provided, prepend it // If additional context (e.g., git diff) is provided, prepend it
if (additionalContext) { if (additionalContext) {
// CRITICAL SAFETY: Check raw string size BEFORE tokenization to prevent OOM // NOTE: additionalContext is expected to be pre-processed by DiffProcessor
const MAX_DIFF_CHARS = 500000; // ~125k tokens max (conservative 4 chars/token ratio) // which intelligently samples large diffs to stay within token budget (100k default)
const MAX_DIFF_TOKENS = 150000; // Hard token limit for safety const MAX_DIFF_TOKENS = 200000; // Safety net for edge cases (DiffProcessor uses 100k budget)
// First check: raw character count
if (additionalContext.length > MAX_DIFF_CHARS) {
const originalSize = additionalContext.length;
logger.log('warn', `⚠️ Git diff too large (${originalSize.toLocaleString()} chars > ${MAX_DIFF_CHARS.toLocaleString()} limit)`);
logger.log('warn', ` This likely includes build artifacts (dist/, *.js.map, bundles, etc.)`);
logger.log('warn', ` Truncating to first ${MAX_DIFF_CHARS.toLocaleString()} characters.`);
logger.log('warn', ` Consider: git stash build files, improve .gitignore, or review uncommitted changes.`);
additionalContext = additionalContext.substring(0, MAX_DIFF_CHARS) +
'\n\n[... DIFF TRUNCATED - exceeded size limit of ' + MAX_DIFF_CHARS.toLocaleString() + ' chars ...]';
}
const diffSection = ` const diffSection = `
====== GIT DIFF ====== ====== GIT DIFF ======
@@ -139,21 +127,22 @@ ${additionalContext}
====== END OF GIT DIFF ====== ====== END OF GIT DIFF ======
`; `;
// Second check: actual token count after truncation // Validate token count (should already be under budget from DiffProcessor)
const diffTokens = this.countTokens(diffSection); const diffTokens = this.countTokens(diffSection);
if (diffTokens > MAX_DIFF_TOKENS) { if (diffTokens > MAX_DIFF_TOKENS) {
logger.log('error', `Git diff still too large after truncation (${diffTokens.toLocaleString()} tokens > ${MAX_DIFF_TOKENS.toLocaleString()} limit)`); logger.log('error', `Pre-processed git diff exceeds safety limit (${diffTokens.toLocaleString()} tokens > ${MAX_DIFF_TOKENS.toLocaleString()} limit)`);
logger.log('error', ` This should not happen - DiffProcessor should have limited to ~100k tokens.`);
logger.log('error', ` Please check DiffProcessor configuration and output.`);
throw new Error( throw new Error(
`Git diff size (${diffTokens.toLocaleString()} tokens) exceeds maximum (${MAX_DIFF_TOKENS.toLocaleString()} tokens). ` + `Pre-processed git diff size (${diffTokens.toLocaleString()} tokens) exceeds safety limit (${MAX_DIFF_TOKENS.toLocaleString()} tokens). ` +
`This indicates massive uncommitted changes, likely build artifacts. ` + `This indicates a bug in DiffProcessor or misconfiguration.`
`Please commit or stash dist/, build/, or other generated files.`
); );
} }
loadedContent = diffSection; loadedContent = diffSection;
totalTokensUsed += diffTokens; totalTokensUsed += diffTokens;
logger.log('info', `📝 Added git diff to context (${diffTokens.toLocaleString()} tokens)`); logger.log('info', `📝 Added pre-processed git diff to context (${diffTokens.toLocaleString()} tokens)`);
} }
// Phase 3: Iterative file selection and loading // Phase 3: Iterative file selection and loading