fix(context): Prevent enormous git diffs and OOM during context building by adding exclusion patterns, truncation, and diagnostic logging
This commit is contained in:
10
changelog.md
10
changelog.md
@@ -1,5 +1,15 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## 2025-11-04 - 1.8.3 - fix(context)
|
||||||
|
Prevent enormous git diffs and OOM during context building by adding exclusion patterns, truncation, and diagnostic logging
|
||||||
|
|
||||||
|
- Add comprehensive git diff exclusion globs (locks, build artifacts, maps, bundles, IDE folders, logs, caches) when collecting uncommitted diffs to avoid noisy/huge diffs
|
||||||
|
- Pass glob patterns directly to smartgit.getUncommittedDiff for efficient server-side matching
|
||||||
|
- Emit diagnostic statistics for diffs (files changed, total characters, estimated tokens, number of exclusion patterns) and warn on unusually large diffs
|
||||||
|
- Introduce pre-tokenization safety checks in iterative context builder: truncate raw diff text if it exceeds MAX_DIFF_CHARS and throw a clear error if token count still exceeds MAX_DIFF_TOKENS
|
||||||
|
- Format and log token counts using locale-aware formatting for clarity
|
||||||
|
- Improve robustness of commit context generation to reduce risk of OOM / model-limit overruns
|
||||||
|
|
||||||
## 2025-11-03 - 1.8.0 - feat(context)
|
## 2025-11-03 - 1.8.0 - feat(context)
|
||||||
Wire OpenAI provider through task context factory and add git-diff support to iterative context builder
|
Wire OpenAI provider through task context factory and add git-diff support to iterative context builder
|
||||||
|
|
||||||
|
|||||||
8
pnpm-lock.yaml
generated
8
pnpm-lock.yaml
generated
@@ -3799,8 +3799,8 @@ packages:
|
|||||||
resolution: {integrity: sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==}
|
resolution: {integrity: sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==}
|
||||||
engines: {node: '>=4'}
|
engines: {node: '>=4'}
|
||||||
|
|
||||||
minimatch@10.0.3:
|
minimatch@10.1.1:
|
||||||
resolution: {integrity: sha512-IPZ167aShDZZUMdRk66cyQAW3qr0WzbHkPdMYa8bzZhlHhO3jALbKdxcaak7W9FfT2rZNpQuUu4Od7ILEpXSaw==}
|
resolution: {integrity: sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==}
|
||||||
engines: {node: 20 || >=22}
|
engines: {node: 20 || >=22}
|
||||||
|
|
||||||
minimatch@3.1.2:
|
minimatch@3.1.2:
|
||||||
@@ -9797,7 +9797,7 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
foreground-child: 3.3.1
|
foreground-child: 3.3.1
|
||||||
jackspeak: 4.1.1
|
jackspeak: 4.1.1
|
||||||
minimatch: 10.0.3
|
minimatch: 10.1.1
|
||||||
minipass: 7.1.2
|
minipass: 7.1.2
|
||||||
package-json-from-dist: 1.0.1
|
package-json-from-dist: 1.0.1
|
||||||
path-scurry: 2.0.0
|
path-scurry: 2.0.0
|
||||||
@@ -10680,7 +10680,7 @@ snapshots:
|
|||||||
|
|
||||||
min-indent@1.0.1: {}
|
min-indent@1.0.1: {}
|
||||||
|
|
||||||
minimatch@10.0.3:
|
minimatch@10.1.1:
|
||||||
dependencies:
|
dependencies:
|
||||||
'@isaacs/brace-expansion': 5.0.0
|
'@isaacs/brace-expansion': 5.0.0
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,6 @@
|
|||||||
*/
|
*/
|
||||||
export const commitinfo = {
|
export const commitinfo = {
|
||||||
name: '@git.zone/tsdoc',
|
name: '@git.zone/tsdoc',
|
||||||
version: '1.8.0',
|
version: '1.8.3',
|
||||||
description: 'A comprehensive TypeScript documentation tool that leverages AI to generate and enhance project documentation, including dynamic README creation, API docs via TypeDoc, and smart commit message generation.'
|
description: 'A comprehensive TypeScript documentation tool that leverages AI to generate and enhance project documentation, including dynamic README creation, API docs via TypeDoc, and smart commit message generation.'
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,18 +27,71 @@ export class Commit {
|
|||||||
smartgitInstance,
|
smartgitInstance,
|
||||||
this.projectDir
|
this.projectDir
|
||||||
);
|
);
|
||||||
const diffStringArray = await gitRepo.getUncommittedDiff([
|
|
||||||
|
// Define comprehensive exclusion patterns
|
||||||
|
// smartgit@3.3.0+ supports glob patterns natively
|
||||||
|
const excludePatterns = [
|
||||||
|
// Lock files
|
||||||
'pnpm-lock.yaml',
|
'pnpm-lock.yaml',
|
||||||
'package-lock.json',
|
'package-lock.json',
|
||||||
'npm-shrinkwrap.json',
|
'npm-shrinkwrap.json',
|
||||||
'yarn.lock',
|
'yarn.lock',
|
||||||
'deno.lock',
|
'deno.lock',
|
||||||
'bun.lockb',
|
'bun.lockb',
|
||||||
'.claude/*',
|
|
||||||
'.cursor/*',
|
// Build artifacts (main culprit for large diffs!)
|
||||||
'.vscode/*',
|
'dist/**',
|
||||||
'.idea/*',
|
'dist_*/**', // dist_ts, dist_web, etc.
|
||||||
]);
|
'build/**',
|
||||||
|
'.next/**',
|
||||||
|
'out/**',
|
||||||
|
'public/dist/**',
|
||||||
|
|
||||||
|
// Compiled/bundled files
|
||||||
|
'**/*.js.map',
|
||||||
|
'**/*.d.ts.map',
|
||||||
|
'**/*.min.js',
|
||||||
|
'**/*.bundle.js',
|
||||||
|
'**/*.chunk.js',
|
||||||
|
|
||||||
|
// IDE/Editor directories
|
||||||
|
'.claude/**',
|
||||||
|
'.cursor/**',
|
||||||
|
'.vscode/**',
|
||||||
|
'.idea/**',
|
||||||
|
'**/*.swp',
|
||||||
|
'**/*.swo',
|
||||||
|
|
||||||
|
// Logs and caches
|
||||||
|
'.nogit/**',
|
||||||
|
'**/*.log',
|
||||||
|
'.cache/**',
|
||||||
|
'.rpt2_cache/**',
|
||||||
|
'coverage/**',
|
||||||
|
'.nyc_output/**',
|
||||||
|
];
|
||||||
|
|
||||||
|
// Pass glob patterns directly to smartgit - it handles matching internally
|
||||||
|
const diffStringArray = await gitRepo.getUncommittedDiff(excludePatterns);
|
||||||
|
|
||||||
|
// Diagnostic logging for diff statistics
|
||||||
|
if (diffStringArray.length > 0) {
|
||||||
|
const totalChars = diffStringArray.join('\n\n').length;
|
||||||
|
const estimatedTokens = Math.ceil(totalChars / 4);
|
||||||
|
|
||||||
|
console.log(`📊 Git diff statistics:`);
|
||||||
|
console.log(` Files changed: ${diffStringArray.length}`);
|
||||||
|
console.log(` Total characters: ${totalChars.toLocaleString()}`);
|
||||||
|
console.log(` Estimated tokens: ${estimatedTokens.toLocaleString()}`);
|
||||||
|
console.log(` Exclusion patterns: ${excludePatterns.length}`);
|
||||||
|
|
||||||
|
if (estimatedTokens > 50000) {
|
||||||
|
console.warn(`⚠️ WARNING: Unusually large diff (${estimatedTokens.toLocaleString()} tokens)`);
|
||||||
|
console.warn(` This may indicate build artifacts or large files in the diff.`);
|
||||||
|
console.warn(` Consider reviewing uncommitted changes or improving exclusion patterns.`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Use the new TaskContextFactory for optimized context
|
// Use the new TaskContextFactory for optimized context
|
||||||
const taskContextFactory = new (await import('../context/index.js')).TaskContextFactory(
|
const taskContextFactory = new (await import('../context/index.js')).TaskContextFactory(
|
||||||
this.projectDir,
|
this.projectDir,
|
||||||
|
|||||||
@@ -115,6 +115,22 @@ export class IterativeContextBuilder {
|
|||||||
|
|
||||||
// If additional context (e.g., git diff) is provided, prepend it
|
// If additional context (e.g., git diff) is provided, prepend it
|
||||||
if (additionalContext) {
|
if (additionalContext) {
|
||||||
|
// CRITICAL SAFETY: Check raw string size BEFORE tokenization to prevent OOM
|
||||||
|
const MAX_DIFF_CHARS = 500000; // ~125k tokens max (conservative 4 chars/token ratio)
|
||||||
|
const MAX_DIFF_TOKENS = 150000; // Hard token limit for safety
|
||||||
|
|
||||||
|
// First check: raw character count
|
||||||
|
if (additionalContext.length > MAX_DIFF_CHARS) {
|
||||||
|
const originalSize = additionalContext.length;
|
||||||
|
logger.log('warn', `⚠️ Git diff too large (${originalSize.toLocaleString()} chars > ${MAX_DIFF_CHARS.toLocaleString()} limit)`);
|
||||||
|
logger.log('warn', ` This likely includes build artifacts (dist/, *.js.map, bundles, etc.)`);
|
||||||
|
logger.log('warn', ` Truncating to first ${MAX_DIFF_CHARS.toLocaleString()} characters.`);
|
||||||
|
logger.log('warn', ` Consider: git stash build files, improve .gitignore, or review uncommitted changes.`);
|
||||||
|
|
||||||
|
additionalContext = additionalContext.substring(0, MAX_DIFF_CHARS) +
|
||||||
|
'\n\n[... DIFF TRUNCATED - exceeded size limit of ' + MAX_DIFF_CHARS.toLocaleString() + ' chars ...]';
|
||||||
|
}
|
||||||
|
|
||||||
const diffSection = `
|
const diffSection = `
|
||||||
====== GIT DIFF ======
|
====== GIT DIFF ======
|
||||||
|
|
||||||
@@ -122,10 +138,22 @@ ${additionalContext}
|
|||||||
|
|
||||||
====== END OF GIT DIFF ======
|
====== END OF GIT DIFF ======
|
||||||
`;
|
`;
|
||||||
loadedContent = diffSection;
|
|
||||||
|
// Second check: actual token count after truncation
|
||||||
const diffTokens = this.countTokens(diffSection);
|
const diffTokens = this.countTokens(diffSection);
|
||||||
|
|
||||||
|
if (diffTokens > MAX_DIFF_TOKENS) {
|
||||||
|
logger.log('error', `❌ Git diff still too large after truncation (${diffTokens.toLocaleString()} tokens > ${MAX_DIFF_TOKENS.toLocaleString()} limit)`);
|
||||||
|
throw new Error(
|
||||||
|
`Git diff size (${diffTokens.toLocaleString()} tokens) exceeds maximum (${MAX_DIFF_TOKENS.toLocaleString()} tokens). ` +
|
||||||
|
`This indicates massive uncommitted changes, likely build artifacts. ` +
|
||||||
|
`Please commit or stash dist/, build/, or other generated files.`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
loadedContent = diffSection;
|
||||||
totalTokensUsed += diffTokens;
|
totalTokensUsed += diffTokens;
|
||||||
logger.log('info', `📝 Added git diff to context (${diffTokens} tokens)`);
|
logger.log('info', `📝 Added git diff to context (${diffTokens.toLocaleString()} tokens)`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Phase 3: Iterative file selection and loading
|
// Phase 3: Iterative file selection and loading
|
||||||
|
|||||||
Reference in New Issue
Block a user