fix(iterative-context-builder): Rely on DiffProcessor for git diff pre-processing; remove raw char truncation, raise diff token safety, and improve logging

This commit is contained in:
2025-11-04 02:28:55 +00:00
parent 6524adea18
commit ce4da89da9
3 changed files with 20 additions and 22 deletions

View File

@@ -115,21 +115,9 @@ export class IterativeContextBuilder {
// If additional context (e.g., git diff) is provided, prepend it
if (additionalContext) {
// CRITICAL SAFETY: Check raw string size BEFORE tokenization to prevent OOM
const MAX_DIFF_CHARS = 500000; // ~125k tokens max (conservative 4 chars/token ratio)
const MAX_DIFF_TOKENS = 150000; // Hard token limit for safety
// First check: raw character count
if (additionalContext.length > MAX_DIFF_CHARS) {
const originalSize = additionalContext.length;
logger.log('warn', `⚠️ Git diff too large (${originalSize.toLocaleString()} chars > ${MAX_DIFF_CHARS.toLocaleString()} limit)`);
logger.log('warn', ` This likely includes build artifacts (dist/, *.js.map, bundles, etc.)`);
logger.log('warn', ` Truncating to first ${MAX_DIFF_CHARS.toLocaleString()} characters.`);
logger.log('warn', ` Consider: git stash build files, improve .gitignore, or review uncommitted changes.`);
additionalContext = additionalContext.substring(0, MAX_DIFF_CHARS) +
'\n\n[... DIFF TRUNCATED - exceeded size limit of ' + MAX_DIFF_CHARS.toLocaleString() + ' chars ...]';
}
// NOTE: additionalContext is expected to be pre-processed by DiffProcessor
// which intelligently samples large diffs to stay within token budget (100k default)
const MAX_DIFF_TOKENS = 200000; // Safety net for edge cases (DiffProcessor uses 100k budget)
const diffSection = `
====== GIT DIFF ======
@@ -139,21 +127,22 @@ ${additionalContext}
====== END OF GIT DIFF ======
`;
// Second check: actual token count after truncation
// Validate token count (should already be under budget from DiffProcessor)
const diffTokens = this.countTokens(diffSection);
if (diffTokens > MAX_DIFF_TOKENS) {
logger.log('error', `Git diff still too large after truncation (${diffTokens.toLocaleString()} tokens > ${MAX_DIFF_TOKENS.toLocaleString()} limit)`);
logger.log('error', `Pre-processed git diff exceeds safety limit (${diffTokens.toLocaleString()} tokens > ${MAX_DIFF_TOKENS.toLocaleString()} limit)`);
logger.log('error', ` This should not happen - DiffProcessor should have limited to ~100k tokens.`);
logger.log('error', ` Please check DiffProcessor configuration and output.`);
throw new Error(
`Git diff size (${diffTokens.toLocaleString()} tokens) exceeds maximum (${MAX_DIFF_TOKENS.toLocaleString()} tokens). ` +
`This indicates massive uncommitted changes, likely build artifacts. ` +
`Please commit or stash dist/, build/, or other generated files.`
`Pre-processed git diff size (${diffTokens.toLocaleString()} tokens) exceeds safety limit (${MAX_DIFF_TOKENS.toLocaleString()} tokens). ` +
`This indicates a bug in DiffProcessor or misconfiguration.`
);
}
loadedContent = diffSection;
totalTokensUsed += diffTokens;
logger.log('info', `📝 Added git diff to context (${diffTokens.toLocaleString()} tokens)`);
logger.log('info', `📝 Added pre-processed git diff to context (${diffTokens.toLocaleString()} tokens)`);
}
// Phase 3: Iterative file selection and loading