import * as plugins from '../plugins.js'; import * as fs from 'fs'; import { logger } from '../logging.js'; import type { TaskType, IFileMetadata, IFileInfo, IIterativeContextResult, IIterationState, IFileSelectionDecision, IContextSufficiencyDecision, IIterativeConfig, } from './types.js'; import { LazyFileLoader } from './lazy-file-loader.js'; import { ContextCache } from './context-cache.js'; import { ContextAnalyzer } from './context-analyzer.js'; import { ConfigManager } from './config-manager.js'; /** * Iterative context builder that uses AI to intelligently select files * across multiple iterations until sufficient context is gathered */ export class IterativeContextBuilder { private projectRoot: string; private lazyLoader: LazyFileLoader; private cache: ContextCache; private analyzer: ContextAnalyzer; private config: Required; private tokenBudget: number = 190000; private openaiInstance: plugins.smartai.OpenAiProvider; private externalOpenaiInstance?: plugins.smartai.OpenAiProvider; /** * Creates a new IterativeContextBuilder * @param projectRoot - Root directory of the project * @param config - Iterative configuration * @param openaiInstance - Optional pre-configured OpenAI provider instance */ constructor( projectRoot: string, config?: Partial, openaiInstance?: plugins.smartai.OpenAiProvider ) { this.projectRoot = projectRoot; this.lazyLoader = new LazyFileLoader(projectRoot); this.cache = new ContextCache(projectRoot); this.analyzer = new ContextAnalyzer(projectRoot); this.externalOpenaiInstance = openaiInstance; // Default configuration this.config = { maxIterations: config?.maxIterations ?? 5, firstPassFileLimit: config?.firstPassFileLimit ?? 10, subsequentPassFileLimit: config?.subsequentPassFileLimit ?? 5, temperature: config?.temperature ?? 0.3, model: config?.model ?? 'gpt-4-turbo-preview', }; } /** * Initialize the builder */ public async initialize(): Promise { await this.cache.init(); const configManager = ConfigManager.getInstance(); await configManager.initialize(this.projectRoot); this.tokenBudget = configManager.getMaxTokens(); // Use external OpenAI instance if provided, otherwise create a new one if (this.externalOpenaiInstance) { this.openaiInstance = this.externalOpenaiInstance; } else { // Initialize OpenAI instance from environment const qenvInstance = new plugins.qenv.Qenv(); const openaiToken = await qenvInstance.getEnvVarOnDemand('OPENAI_TOKEN'); if (!openaiToken) { throw new Error('OPENAI_TOKEN environment variable is required for iterative context building'); } this.openaiInstance = new plugins.smartai.OpenAiProvider({ openaiToken, }); await this.openaiInstance.start(); } } /** * Build context iteratively using AI decision making * @param taskType - Type of task being performed * @param additionalContext - Optional additional context (e.g., git diff for commit tasks) * @returns Complete iterative context result */ public async buildContextIteratively(taskType: TaskType, additionalContext?: string): Promise { const startTime = Date.now(); logger.log('info', '🤖 Starting iterative context building...'); logger.log('info', ` Task: ${taskType}, Budget: ${this.tokenBudget} tokens, Max iterations: ${this.config.maxIterations}`); // Phase 1: Scan project files for metadata logger.log('info', '📋 Scanning project files...'); const metadata = await this.scanProjectFiles(taskType); const totalEstimatedTokens = metadata.reduce((sum, m) => sum + m.estimatedTokens, 0); logger.log('info', ` Found ${metadata.length} files (~${totalEstimatedTokens} estimated tokens)`); // Phase 2: Analyze files for initial prioritization logger.log('info', '🔍 Analyzing file dependencies and importance...'); const analysis = await this.analyzer.analyze(metadata, taskType, []); logger.log('info', ` Analysis complete in ${analysis.analysisDuration}ms`); // Track state across iterations const iterations: IIterationState[] = []; let totalTokensUsed = 0; let apiCallCount = 0; let loadedContent = ''; const includedFiles: IFileInfo[] = []; // If additional context (e.g., git diff) is provided, prepend it if (additionalContext) { const diffSection = ` ====== GIT DIFF ====== ${additionalContext} ====== END OF GIT DIFF ====== `; loadedContent = diffSection; const diffTokens = this.countTokens(diffSection); totalTokensUsed += diffTokens; logger.log('info', `📝 Added git diff to context (${diffTokens} tokens)`); } // Phase 3: Iterative file selection and loading for (let iteration = 1; iteration <= this.config.maxIterations; iteration++) { const iterationStart = Date.now(); logger.log('info', `\n🤔 Iteration ${iteration}/${this.config.maxIterations}: Asking AI which files to examine...`); const remainingBudget = this.tokenBudget - totalTokensUsed; logger.log('info', ` Token budget remaining: ${remainingBudget}/${this.tokenBudget} (${Math.round((remainingBudget / this.tokenBudget) * 100)}%)`); // Get AI decision on which files to load const decision = await this.getFileSelectionDecision( metadata, analysis.files.slice(0, 30), // Top 30 files by importance taskType, iteration, totalTokensUsed, remainingBudget, loadedContent ); apiCallCount++; logger.log('info', ` AI reasoning: ${decision.reasoning}`); logger.log('info', ` AI requested ${decision.filesToLoad.length} files`); // Load requested files const iterationFiles: IFileInfo[] = []; let iterationTokens = 0; if (decision.filesToLoad.length > 0) { logger.log('info', '📥 Loading requested files...'); for (const filePath of decision.filesToLoad) { try { const fileInfo = await this.loadFile(filePath); if (totalTokensUsed + fileInfo.tokenCount! <= this.tokenBudget) { const formattedFile = this.formatFileForContext(fileInfo); loadedContent += formattedFile; includedFiles.push(fileInfo); iterationFiles.push(fileInfo); iterationTokens += fileInfo.tokenCount!; totalTokensUsed += fileInfo.tokenCount!; logger.log('info', ` ✓ ${fileInfo.relativePath} (${fileInfo.tokenCount} tokens)`); } else { logger.log('warn', ` ✗ ${fileInfo.relativePath} - would exceed budget, skipping`); } } catch (error) { logger.log('warn', ` ✗ Failed to load ${filePath}: ${error.message}`); } } } // Record iteration state const iterationDuration = Date.now() - iterationStart; iterations.push({ iteration, filesLoaded: iterationFiles, tokensUsed: iterationTokens, totalTokensUsed, decision, duration: iterationDuration, }); logger.log('info', ` Iteration ${iteration} complete: ${iterationFiles.length} files loaded, ${iterationTokens} tokens used`); // Check if we should continue if (totalTokensUsed >= this.tokenBudget * 0.95) { logger.log('warn', '⚠️ Approaching token budget limit, stopping iterations'); break; } // Ask AI if context is sufficient if (iteration < this.config.maxIterations) { logger.log('info', '🤔 Asking AI if context is sufficient...'); const sufficiencyDecision = await this.evaluateContextSufficiency( loadedContent, taskType, iteration, totalTokensUsed, remainingBudget - iterationTokens ); apiCallCount++; logger.log('info', ` AI decision: ${sufficiencyDecision.sufficient ? '✅ SUFFICIENT' : '⏭️ NEEDS MORE'}`); logger.log('info', ` Reasoning: ${sufficiencyDecision.reasoning}`); if (sufficiencyDecision.sufficient) { logger.log('ok', '✅ Context building complete - AI determined context is sufficient'); break; } } } const totalDuration = Date.now() - startTime; logger.log('ok', `\n✅ Iterative context building complete!`); logger.log('info', ` Files included: ${includedFiles.length}`); logger.log('info', ` Token usage: ${totalTokensUsed}/${this.tokenBudget} (${Math.round((totalTokensUsed / this.tokenBudget) * 100)}%)`); logger.log('info', ` Iterations: ${iterations.length}, API calls: ${apiCallCount}`); logger.log('info', ` Total duration: ${(totalDuration / 1000).toFixed(2)}s`); return { context: loadedContent, tokenCount: totalTokensUsed, includedFiles, trimmedFiles: [], excludedFiles: [], tokenSavings: 0, iterationCount: iterations.length, iterations, apiCallCount, totalDuration, }; } /** * Scan project files based on task type */ private async scanProjectFiles(taskType: TaskType): Promise { const configManager = ConfigManager.getInstance(); const taskConfig = configManager.getTaskConfig(taskType); const includeGlobs = taskConfig?.includePaths?.map(p => `${p}/**/*.ts`) || [ 'ts/**/*.ts', 'ts*/**/*.ts' ]; const configGlobs = [ 'package.json', 'readme.md', 'readme.hints.md', 'npmextra.json' ]; return await this.lazyLoader.scanFiles([...configGlobs, ...includeGlobs]); } /** * Get AI decision on which files to load */ private async getFileSelectionDecision( allMetadata: IFileMetadata[], analyzedFiles: any[], taskType: TaskType, iteration: number, tokensUsed: number, remainingBudget: number, loadedContent: string ): Promise { const isFirstIteration = iteration === 1; const fileLimit = isFirstIteration ? this.config.firstPassFileLimit : this.config.subsequentPassFileLimit; const systemPrompt = this.buildFileSelectionPrompt( allMetadata, analyzedFiles, taskType, iteration, tokensUsed, remainingBudget, loadedContent, fileLimit ); const response = await this.openaiInstance.chat({ systemMessage: `You are an AI assistant that helps select the most relevant files for code analysis. You must respond ONLY with valid JSON that can be parsed with JSON.parse(). Do not wrap the JSON in markdown code blocks or add any other text.`, userMessage: systemPrompt, messageHistory: [], }); // Parse JSON response, handling potential markdown formatting const content = response.message.replace('```json', '').replace('```', '').trim(); const parsed = JSON.parse(content); return { reasoning: parsed.reasoning || 'No reasoning provided', filesToLoad: parsed.files_to_load || [], estimatedTokensNeeded: parsed.estimated_tokens_needed, }; } /** * Build prompt for file selection */ private buildFileSelectionPrompt( metadata: IFileMetadata[], analyzedFiles: any[], taskType: TaskType, iteration: number, tokensUsed: number, remainingBudget: number, loadedContent: string, fileLimit: number ): string { const taskDescriptions = { readme: 'generating a comprehensive README that explains the project\'s purpose, features, and API', commit: 'analyzing code changes to generate an intelligent commit message', description: 'generating a concise project description for package.json', }; const alreadyLoadedFiles = loadedContent ? loadedContent.split('\n======').slice(1).map(section => { const match = section.match(/START OF FILE (.+?) ======/); return match ? match[1] : ''; }).filter(Boolean) : []; const availableFiles = metadata .filter(m => !alreadyLoadedFiles.includes(m.relativePath)) .map(m => { const analysis = analyzedFiles.find(a => a.path === m.path); return `- ${m.relativePath} (${m.size} bytes, ~${m.estimatedTokens} tokens${analysis ? `, importance: ${analysis.importanceScore.toFixed(2)}` : ''})`; }) .join('\n'); return `You are building context for ${taskDescriptions[taskType]} in a TypeScript project. ITERATION: ${iteration} TOKENS USED: ${tokensUsed}/${tokensUsed + remainingBudget} (${Math.round((tokensUsed / (tokensUsed + remainingBudget)) * 100)}%) REMAINING BUDGET: ${remainingBudget} tokens ${alreadyLoadedFiles.length > 0 ? `FILES ALREADY LOADED:\n${alreadyLoadedFiles.map(f => `- ${f}`).join('\n')}\n\n` : ''}AVAILABLE FILES (not yet loaded): ${availableFiles} Your task: Select up to ${fileLimit} files that will give you the MOST understanding for this ${taskType} task. ${iteration === 1 ? `This is the FIRST iteration. Focus on: - Main entry points (index.ts, main exports) - Core classes and interfaces - Package configuration ` : `This is iteration ${iteration}. You've already seen some files. Now focus on: - Files that complement what you've already loaded - Dependencies of already-loaded files - Missing pieces for complete understanding `} Consider: 1. File importance scores (if provided) 2. File paths (ts/index.ts is likely more important than ts/internal/utils.ts) 3. Token efficiency (prefer smaller files if they provide good information) 4. Remaining budget (${remainingBudget} tokens) Respond in JSON format: { "reasoning": "Brief explanation of why you're selecting these files", "files_to_load": ["path/to/file1.ts", "path/to/file2.ts"], "estimated_tokens_needed": 15000 }`; } /** * Evaluate if current context is sufficient */ private async evaluateContextSufficiency( loadedContent: string, taskType: TaskType, iteration: number, tokensUsed: number, remainingBudget: number ): Promise { const prompt = `You have been building context for a ${taskType} task across ${iteration} iterations. CURRENT STATE: - Tokens used: ${tokensUsed} - Remaining budget: ${remainingBudget} - Files loaded: ${loadedContent.split('\n======').length - 1} CONTEXT SO FAR: ${loadedContent.substring(0, 3000)}... (truncated for brevity) Question: Do you have SUFFICIENT context to successfully complete the ${taskType} task? Consider: - For README: Do you understand the project's purpose, main features, API surface, and usage patterns? - For commit: Do you understand what changed and why? - For description: Do you understand the project's core value proposition? Respond in JSON format: { "sufficient": true or false, "reasoning": "Detailed explanation of your decision" }`; const response = await this.openaiInstance.chat({ systemMessage: `You are an AI assistant that evaluates whether gathered context is sufficient for a task. You must respond ONLY with valid JSON that can be parsed with JSON.parse(). Do not wrap the JSON in markdown code blocks or add any other text.`, userMessage: prompt, messageHistory: [], }); // Parse JSON response, handling potential markdown formatting const content = response.message.replace('```json', '').replace('```', '').trim(); const parsed = JSON.parse(content); return { sufficient: parsed.sufficient || false, reasoning: parsed.reasoning || 'No reasoning provided', }; } /** * Load a single file with caching */ private async loadFile(filePath: string): Promise { // Try cache first const cached = await this.cache.get(filePath); if (cached) { return { path: filePath, relativePath: plugins.path.relative(this.projectRoot, filePath), contents: cached.contents, tokenCount: cached.tokenCount, }; } // Load from disk const contents = await plugins.smartfile.fs.toStringSync(filePath); const tokenCount = this.countTokens(contents); const relativePath = plugins.path.relative(this.projectRoot, filePath); // Cache it const stats = await fs.promises.stat(filePath); await this.cache.set({ path: filePath, contents, tokenCount, mtime: Math.floor(stats.mtimeMs), cachedAt: Date.now(), }); return { path: filePath, relativePath, contents, tokenCount, }; } /** * Format a file for inclusion in context */ private formatFileForContext(file: IFileInfo): string { return ` ====== START OF FILE ${file.relativePath} ====== ${file.contents} ====== END OF FILE ${file.relativePath} ====== `; } /** * Count tokens in text */ private countTokens(text: string): number { try { const tokens = plugins.gptTokenizer.encode(text); return tokens.length; } catch (error) { return Math.ceil(text.length / 4); } } }