524 lines
18 KiB
TypeScript
524 lines
18 KiB
TypeScript
import * as plugins from '../plugins.js';
|
|
import * as fs from 'fs';
|
|
import { logger } from '../logging.js';
|
|
import type {
|
|
TaskType,
|
|
IFileMetadata,
|
|
IFileInfo,
|
|
IIterativeContextResult,
|
|
IIterationState,
|
|
IFileSelectionDecision,
|
|
IContextSufficiencyDecision,
|
|
IIterativeConfig,
|
|
} from './types.js';
|
|
import { LazyFileLoader } from './lazy-file-loader.js';
|
|
import { ContextCache } from './context-cache.js';
|
|
import { ContextAnalyzer } from './context-analyzer.js';
|
|
import { ConfigManager } from './config-manager.js';
|
|
|
|
/**
|
|
* Iterative context builder that uses AI to intelligently select files
|
|
* across multiple iterations until sufficient context is gathered
|
|
*/
|
|
export class IterativeContextBuilder {
|
|
private projectRoot: string;
|
|
private lazyLoader: LazyFileLoader;
|
|
private cache: ContextCache;
|
|
private analyzer: ContextAnalyzer;
|
|
private config: Required<IIterativeConfig>;
|
|
private tokenBudget: number = 190000;
|
|
private openaiInstance: plugins.smartai.OpenAiProvider;
|
|
private externalOpenaiInstance?: plugins.smartai.OpenAiProvider;
|
|
|
|
/**
|
|
* Creates a new IterativeContextBuilder
|
|
* @param projectRoot - Root directory of the project
|
|
* @param config - Iterative configuration
|
|
* @param openaiInstance - Optional pre-configured OpenAI provider instance
|
|
*/
|
|
constructor(
|
|
projectRoot: string,
|
|
config?: Partial<IIterativeConfig>,
|
|
openaiInstance?: plugins.smartai.OpenAiProvider
|
|
) {
|
|
this.projectRoot = projectRoot;
|
|
this.lazyLoader = new LazyFileLoader(projectRoot);
|
|
this.cache = new ContextCache(projectRoot);
|
|
this.analyzer = new ContextAnalyzer(projectRoot);
|
|
this.externalOpenaiInstance = openaiInstance;
|
|
|
|
// Default configuration
|
|
this.config = {
|
|
maxIterations: config?.maxIterations ?? 5,
|
|
firstPassFileLimit: config?.firstPassFileLimit ?? 10,
|
|
subsequentPassFileLimit: config?.subsequentPassFileLimit ?? 5,
|
|
temperature: config?.temperature ?? 0.3,
|
|
model: config?.model ?? 'gpt-4-turbo-preview',
|
|
};
|
|
|
|
}
|
|
|
|
/**
|
|
* Initialize the builder
|
|
*/
|
|
public async initialize(): Promise<void> {
|
|
await this.cache.init();
|
|
const configManager = ConfigManager.getInstance();
|
|
await configManager.initialize(this.projectRoot);
|
|
this.tokenBudget = configManager.getMaxTokens();
|
|
|
|
// Use external OpenAI instance if provided, otherwise create a new one
|
|
if (this.externalOpenaiInstance) {
|
|
this.openaiInstance = this.externalOpenaiInstance;
|
|
} else {
|
|
// Initialize OpenAI instance from environment
|
|
const qenvInstance = new plugins.qenv.Qenv();
|
|
const openaiToken = await qenvInstance.getEnvVarOnDemand('OPENAI_TOKEN');
|
|
if (!openaiToken) {
|
|
throw new Error('OPENAI_TOKEN environment variable is required for iterative context building');
|
|
}
|
|
this.openaiInstance = new plugins.smartai.OpenAiProvider({
|
|
openaiToken,
|
|
});
|
|
await this.openaiInstance.start();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Build context iteratively using AI decision making
|
|
* @param taskType - Type of task being performed
|
|
* @param additionalContext - Optional additional context (e.g., git diff for commit tasks)
|
|
* @returns Complete iterative context result
|
|
*/
|
|
public async buildContextIteratively(taskType: TaskType, additionalContext?: string): Promise<IIterativeContextResult> {
|
|
const startTime = Date.now();
|
|
logger.log('info', '🤖 Starting iterative context building...');
|
|
logger.log('info', ` Task: ${taskType}, Budget: ${this.tokenBudget} tokens, Max iterations: ${this.config.maxIterations}`);
|
|
|
|
// Phase 1: Scan project files for metadata
|
|
logger.log('info', '📋 Scanning project files...');
|
|
const metadata = await this.scanProjectFiles(taskType);
|
|
const totalEstimatedTokens = metadata.reduce((sum, m) => sum + m.estimatedTokens, 0);
|
|
logger.log('info', ` Found ${metadata.length} files (~${totalEstimatedTokens} estimated tokens)`);
|
|
|
|
// Phase 2: Analyze files for initial prioritization
|
|
logger.log('info', '🔍 Analyzing file dependencies and importance...');
|
|
const analysis = await this.analyzer.analyze(metadata, taskType, []);
|
|
logger.log('info', ` Analysis complete in ${analysis.analysisDuration}ms`);
|
|
|
|
// Track state across iterations
|
|
const iterations: IIterationState[] = [];
|
|
let totalTokensUsed = 0;
|
|
let apiCallCount = 0;
|
|
let loadedContent = '';
|
|
const includedFiles: IFileInfo[] = [];
|
|
|
|
// If additional context (e.g., git diff) is provided, prepend it
|
|
if (additionalContext) {
|
|
// CRITICAL SAFETY: Check raw string size BEFORE tokenization to prevent OOM
|
|
const MAX_DIFF_CHARS = 500000; // ~125k tokens max (conservative 4 chars/token ratio)
|
|
const MAX_DIFF_TOKENS = 150000; // Hard token limit for safety
|
|
|
|
// First check: raw character count
|
|
if (additionalContext.length > MAX_DIFF_CHARS) {
|
|
const originalSize = additionalContext.length;
|
|
logger.log('warn', `⚠️ Git diff too large (${originalSize.toLocaleString()} chars > ${MAX_DIFF_CHARS.toLocaleString()} limit)`);
|
|
logger.log('warn', ` This likely includes build artifacts (dist/, *.js.map, bundles, etc.)`);
|
|
logger.log('warn', ` Truncating to first ${MAX_DIFF_CHARS.toLocaleString()} characters.`);
|
|
logger.log('warn', ` Consider: git stash build files, improve .gitignore, or review uncommitted changes.`);
|
|
|
|
additionalContext = additionalContext.substring(0, MAX_DIFF_CHARS) +
|
|
'\n\n[... DIFF TRUNCATED - exceeded size limit of ' + MAX_DIFF_CHARS.toLocaleString() + ' chars ...]';
|
|
}
|
|
|
|
const diffSection = `
|
|
====== GIT DIFF ======
|
|
|
|
${additionalContext}
|
|
|
|
====== END OF GIT DIFF ======
|
|
`;
|
|
|
|
// Second check: actual token count after truncation
|
|
const diffTokens = this.countTokens(diffSection);
|
|
|
|
if (diffTokens > MAX_DIFF_TOKENS) {
|
|
logger.log('error', `❌ Git diff still too large after truncation (${diffTokens.toLocaleString()} tokens > ${MAX_DIFF_TOKENS.toLocaleString()} limit)`);
|
|
throw new Error(
|
|
`Git diff size (${diffTokens.toLocaleString()} tokens) exceeds maximum (${MAX_DIFF_TOKENS.toLocaleString()} tokens). ` +
|
|
`This indicates massive uncommitted changes, likely build artifacts. ` +
|
|
`Please commit or stash dist/, build/, or other generated files.`
|
|
);
|
|
}
|
|
|
|
loadedContent = diffSection;
|
|
totalTokensUsed += diffTokens;
|
|
logger.log('info', `📝 Added git diff to context (${diffTokens.toLocaleString()} tokens)`);
|
|
}
|
|
|
|
// Phase 3: Iterative file selection and loading
|
|
for (let iteration = 1; iteration <= this.config.maxIterations; iteration++) {
|
|
const iterationStart = Date.now();
|
|
logger.log('info', `\n🤔 Iteration ${iteration}/${this.config.maxIterations}: Asking AI which files to examine...`);
|
|
|
|
const remainingBudget = this.tokenBudget - totalTokensUsed;
|
|
logger.log('info', ` Token budget remaining: ${remainingBudget}/${this.tokenBudget} (${Math.round((remainingBudget / this.tokenBudget) * 100)}%)`);
|
|
|
|
// Get AI decision on which files to load
|
|
const decision = await this.getFileSelectionDecision(
|
|
metadata,
|
|
analysis.files.slice(0, 30), // Top 30 files by importance
|
|
taskType,
|
|
iteration,
|
|
totalTokensUsed,
|
|
remainingBudget,
|
|
loadedContent
|
|
);
|
|
apiCallCount++;
|
|
|
|
logger.log('info', ` AI reasoning: ${decision.reasoning}`);
|
|
logger.log('info', ` AI requested ${decision.filesToLoad.length} files`);
|
|
|
|
// Load requested files
|
|
const iterationFiles: IFileInfo[] = [];
|
|
let iterationTokens = 0;
|
|
|
|
if (decision.filesToLoad.length > 0) {
|
|
logger.log('info', '📥 Loading requested files...');
|
|
|
|
for (const filePath of decision.filesToLoad) {
|
|
try {
|
|
const fileInfo = await this.loadFile(filePath);
|
|
if (totalTokensUsed + fileInfo.tokenCount! <= this.tokenBudget) {
|
|
const formattedFile = this.formatFileForContext(fileInfo);
|
|
loadedContent += formattedFile;
|
|
includedFiles.push(fileInfo);
|
|
iterationFiles.push(fileInfo);
|
|
iterationTokens += fileInfo.tokenCount!;
|
|
totalTokensUsed += fileInfo.tokenCount!;
|
|
|
|
logger.log('info', ` ✓ ${fileInfo.relativePath} (${fileInfo.tokenCount} tokens)`);
|
|
} else {
|
|
logger.log('warn', ` ✗ ${fileInfo.relativePath} - would exceed budget, skipping`);
|
|
}
|
|
} catch (error) {
|
|
logger.log('warn', ` ✗ Failed to load ${filePath}: ${error.message}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Record iteration state
|
|
const iterationDuration = Date.now() - iterationStart;
|
|
iterations.push({
|
|
iteration,
|
|
filesLoaded: iterationFiles,
|
|
tokensUsed: iterationTokens,
|
|
totalTokensUsed,
|
|
decision,
|
|
duration: iterationDuration,
|
|
});
|
|
|
|
logger.log('info', ` Iteration ${iteration} complete: ${iterationFiles.length} files loaded, ${iterationTokens} tokens used`);
|
|
|
|
// Check if we should continue
|
|
if (totalTokensUsed >= this.tokenBudget * 0.95) {
|
|
logger.log('warn', '⚠️ Approaching token budget limit, stopping iterations');
|
|
break;
|
|
}
|
|
|
|
// Ask AI if context is sufficient
|
|
if (iteration < this.config.maxIterations) {
|
|
logger.log('info', '🤔 Asking AI if context is sufficient...');
|
|
const sufficiencyDecision = await this.evaluateContextSufficiency(
|
|
loadedContent,
|
|
taskType,
|
|
iteration,
|
|
totalTokensUsed,
|
|
remainingBudget - iterationTokens
|
|
);
|
|
apiCallCount++;
|
|
|
|
logger.log('info', ` AI decision: ${sufficiencyDecision.sufficient ? '✅ SUFFICIENT' : '⏭️ NEEDS MORE'}`);
|
|
logger.log('info', ` Reasoning: ${sufficiencyDecision.reasoning}`);
|
|
|
|
if (sufficiencyDecision.sufficient) {
|
|
logger.log('ok', '✅ Context building complete - AI determined context is sufficient');
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
const totalDuration = Date.now() - startTime;
|
|
logger.log('ok', `\n✅ Iterative context building complete!`);
|
|
logger.log('info', ` Files included: ${includedFiles.length}`);
|
|
logger.log('info', ` Token usage: ${totalTokensUsed}/${this.tokenBudget} (${Math.round((totalTokensUsed / this.tokenBudget) * 100)}%)`);
|
|
logger.log('info', ` Iterations: ${iterations.length}, API calls: ${apiCallCount}`);
|
|
logger.log('info', ` Total duration: ${(totalDuration / 1000).toFixed(2)}s`);
|
|
|
|
return {
|
|
context: loadedContent,
|
|
tokenCount: totalTokensUsed,
|
|
includedFiles,
|
|
trimmedFiles: [],
|
|
excludedFiles: [],
|
|
tokenSavings: 0,
|
|
iterationCount: iterations.length,
|
|
iterations,
|
|
apiCallCount,
|
|
totalDuration,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Scan project files based on task type
|
|
*/
|
|
private async scanProjectFiles(taskType: TaskType): Promise<IFileMetadata[]> {
|
|
const configManager = ConfigManager.getInstance();
|
|
const taskConfig = configManager.getTaskConfig(taskType);
|
|
|
|
const includeGlobs = taskConfig?.includePaths?.map(p => `${p}/**/*.ts`) || [
|
|
'ts/**/*.ts',
|
|
'ts*/**/*.ts'
|
|
];
|
|
|
|
const configGlobs = [
|
|
'package.json',
|
|
'readme.md',
|
|
'readme.hints.md',
|
|
'npmextra.json'
|
|
];
|
|
|
|
return await this.lazyLoader.scanFiles([...configGlobs, ...includeGlobs]);
|
|
}
|
|
|
|
/**
|
|
* Get AI decision on which files to load
|
|
*/
|
|
private async getFileSelectionDecision(
|
|
allMetadata: IFileMetadata[],
|
|
analyzedFiles: any[],
|
|
taskType: TaskType,
|
|
iteration: number,
|
|
tokensUsed: number,
|
|
remainingBudget: number,
|
|
loadedContent: string
|
|
): Promise<IFileSelectionDecision> {
|
|
const isFirstIteration = iteration === 1;
|
|
const fileLimit = isFirstIteration
|
|
? this.config.firstPassFileLimit
|
|
: this.config.subsequentPassFileLimit;
|
|
|
|
const systemPrompt = this.buildFileSelectionPrompt(
|
|
allMetadata,
|
|
analyzedFiles,
|
|
taskType,
|
|
iteration,
|
|
tokensUsed,
|
|
remainingBudget,
|
|
loadedContent,
|
|
fileLimit
|
|
);
|
|
|
|
const response = await this.openaiInstance.chat({
|
|
systemMessage: `You are an AI assistant that helps select the most relevant files for code analysis.
|
|
You must respond ONLY with valid JSON that can be parsed with JSON.parse().
|
|
Do not wrap the JSON in markdown code blocks or add any other text.`,
|
|
userMessage: systemPrompt,
|
|
messageHistory: [],
|
|
});
|
|
|
|
// Parse JSON response, handling potential markdown formatting
|
|
const content = response.message.replace('```json', '').replace('```', '').trim();
|
|
const parsed = JSON.parse(content);
|
|
|
|
return {
|
|
reasoning: parsed.reasoning || 'No reasoning provided',
|
|
filesToLoad: parsed.files_to_load || [],
|
|
estimatedTokensNeeded: parsed.estimated_tokens_needed,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Build prompt for file selection
|
|
*/
|
|
private buildFileSelectionPrompt(
|
|
metadata: IFileMetadata[],
|
|
analyzedFiles: any[],
|
|
taskType: TaskType,
|
|
iteration: number,
|
|
tokensUsed: number,
|
|
remainingBudget: number,
|
|
loadedContent: string,
|
|
fileLimit: number
|
|
): string {
|
|
const taskDescriptions = {
|
|
readme: 'generating a comprehensive README that explains the project\'s purpose, features, and API',
|
|
commit: 'analyzing code changes to generate an intelligent commit message',
|
|
description: 'generating a concise project description for package.json',
|
|
};
|
|
|
|
const alreadyLoadedFiles = loadedContent
|
|
? loadedContent.split('\n======').slice(1).map(section => {
|
|
const match = section.match(/START OF FILE (.+?) ======/);
|
|
return match ? match[1] : '';
|
|
}).filter(Boolean)
|
|
: [];
|
|
|
|
const availableFiles = metadata
|
|
.filter(m => !alreadyLoadedFiles.includes(m.relativePath))
|
|
.map(m => {
|
|
const analysis = analyzedFiles.find(a => a.path === m.path);
|
|
return `- ${m.relativePath} (${m.size} bytes, ~${m.estimatedTokens} tokens${analysis ? `, importance: ${analysis.importanceScore.toFixed(2)}` : ''})`;
|
|
})
|
|
.join('\n');
|
|
|
|
return `You are building context for ${taskDescriptions[taskType]} in a TypeScript project.
|
|
|
|
ITERATION: ${iteration}
|
|
TOKENS USED: ${tokensUsed}/${tokensUsed + remainingBudget} (${Math.round((tokensUsed / (tokensUsed + remainingBudget)) * 100)}%)
|
|
REMAINING BUDGET: ${remainingBudget} tokens
|
|
|
|
${alreadyLoadedFiles.length > 0 ? `FILES ALREADY LOADED:\n${alreadyLoadedFiles.map(f => `- ${f}`).join('\n')}\n\n` : ''}AVAILABLE FILES (not yet loaded):
|
|
${availableFiles}
|
|
|
|
Your task: Select up to ${fileLimit} files that will give you the MOST understanding for this ${taskType} task.
|
|
|
|
${iteration === 1 ? `This is the FIRST iteration. Focus on:
|
|
- Main entry points (index.ts, main exports)
|
|
- Core classes and interfaces
|
|
- Package configuration
|
|
` : `This is iteration ${iteration}. You've already seen some files. Now focus on:
|
|
- Files that complement what you've already loaded
|
|
- Dependencies of already-loaded files
|
|
- Missing pieces for complete understanding
|
|
`}
|
|
|
|
Consider:
|
|
1. File importance scores (if provided)
|
|
2. File paths (ts/index.ts is likely more important than ts/internal/utils.ts)
|
|
3. Token efficiency (prefer smaller files if they provide good information)
|
|
4. Remaining budget (${remainingBudget} tokens)
|
|
|
|
Respond in JSON format:
|
|
{
|
|
"reasoning": "Brief explanation of why you're selecting these files",
|
|
"files_to_load": ["path/to/file1.ts", "path/to/file2.ts"],
|
|
"estimated_tokens_needed": 15000
|
|
}`;
|
|
}
|
|
|
|
/**
|
|
* Evaluate if current context is sufficient
|
|
*/
|
|
private async evaluateContextSufficiency(
|
|
loadedContent: string,
|
|
taskType: TaskType,
|
|
iteration: number,
|
|
tokensUsed: number,
|
|
remainingBudget: number
|
|
): Promise<IContextSufficiencyDecision> {
|
|
const prompt = `You have been building context for a ${taskType} task across ${iteration} iterations.
|
|
|
|
CURRENT STATE:
|
|
- Tokens used: ${tokensUsed}
|
|
- Remaining budget: ${remainingBudget}
|
|
- Files loaded: ${loadedContent.split('\n======').length - 1}
|
|
|
|
CONTEXT SO FAR:
|
|
${loadedContent.substring(0, 3000)}... (truncated for brevity)
|
|
|
|
Question: Do you have SUFFICIENT context to successfully complete the ${taskType} task?
|
|
|
|
Consider:
|
|
- For README: Do you understand the project's purpose, main features, API surface, and usage patterns?
|
|
- For commit: Do you understand what changed and why?
|
|
- For description: Do you understand the project's core value proposition?
|
|
|
|
Respond in JSON format:
|
|
{
|
|
"sufficient": true or false,
|
|
"reasoning": "Detailed explanation of your decision"
|
|
}`;
|
|
|
|
const response = await this.openaiInstance.chat({
|
|
systemMessage: `You are an AI assistant that evaluates whether gathered context is sufficient for a task.
|
|
You must respond ONLY with valid JSON that can be parsed with JSON.parse().
|
|
Do not wrap the JSON in markdown code blocks or add any other text.`,
|
|
userMessage: prompt,
|
|
messageHistory: [],
|
|
});
|
|
|
|
// Parse JSON response, handling potential markdown formatting
|
|
const content = response.message.replace('```json', '').replace('```', '').trim();
|
|
const parsed = JSON.parse(content);
|
|
|
|
return {
|
|
sufficient: parsed.sufficient || false,
|
|
reasoning: parsed.reasoning || 'No reasoning provided',
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Load a single file with caching
|
|
*/
|
|
private async loadFile(filePath: string): Promise<IFileInfo> {
|
|
// Try cache first
|
|
const cached = await this.cache.get(filePath);
|
|
if (cached) {
|
|
return {
|
|
path: filePath,
|
|
relativePath: plugins.path.relative(this.projectRoot, filePath),
|
|
contents: cached.contents,
|
|
tokenCount: cached.tokenCount,
|
|
};
|
|
}
|
|
|
|
// Load from disk
|
|
const contents = await plugins.smartfile.fs.toStringSync(filePath);
|
|
const tokenCount = this.countTokens(contents);
|
|
const relativePath = plugins.path.relative(this.projectRoot, filePath);
|
|
|
|
// Cache it
|
|
const stats = await fs.promises.stat(filePath);
|
|
await this.cache.set({
|
|
path: filePath,
|
|
contents,
|
|
tokenCount,
|
|
mtime: Math.floor(stats.mtimeMs),
|
|
cachedAt: Date.now(),
|
|
});
|
|
|
|
return {
|
|
path: filePath,
|
|
relativePath,
|
|
contents,
|
|
tokenCount,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Format a file for inclusion in context
|
|
*/
|
|
private formatFileForContext(file: IFileInfo): string {
|
|
return `
|
|
====== START OF FILE ${file.relativePath} ======
|
|
|
|
${file.contents}
|
|
|
|
====== END OF FILE ${file.relativePath} ======
|
|
`;
|
|
}
|
|
|
|
/**
|
|
* Count tokens in text
|
|
*/
|
|
private countTokens(text: string): number {
|
|
try {
|
|
const tokens = plugins.gptTokenizer.encode(text);
|
|
return tokens.length;
|
|
} catch (error) {
|
|
return Math.ceil(text.length / 4);
|
|
}
|
|
}
|
|
}
|