feat(IterativeContextBuilder): Add iterative AI-driven context builder and integrate into task factory; add tests and iterative configuration

2025-11-03 13:19:29 +00:00
parent 2276fb0c0c
commit 8c3e16a4f2
10 changed files with 801 additions and 109 deletions
--- a/ts/context/iterative-context-builder.ts
+++ b/ts/context/iterative-context-builder.ts
@@ -0,0 +1,467 @@
+import * as plugins from '../plugins.js';
+import * as fs from 'fs';
+import { logger } from '../logging.js';
+import type {
+  TaskType,
+  IFileMetadata,
+  IFileInfo,
+  IIterativeContextResult,
+  IIterationState,
+  IFileSelectionDecision,
+  IContextSufficiencyDecision,
+  IIterativeConfig,
+} from './types.js';
+import { LazyFileLoader } from './lazy-file-loader.js';
+import { ContextCache } from './context-cache.js';
+import { ContextAnalyzer } from './context-analyzer.js';
+import { ConfigManager } from './config-manager.js';
+
+/**
+ * Iterative context builder that uses AI to intelligently select files
+ * across multiple iterations until sufficient context is gathered
+ */
+export class IterativeContextBuilder {
+  private projectRoot: string;
+  private lazyLoader: LazyFileLoader;
+  private cache: ContextCache;
+  private analyzer: ContextAnalyzer;
+  private config: Required<IIterativeConfig>;
+  private tokenBudget: number = 190000;
+  private openaiInstance: plugins.smartai.OpenAiProvider;
+
+  /**
+   * Creates a new IterativeContextBuilder
+   * @param projectRoot - Root directory of the project
+   * @param config - Iterative configuration
+   */
+  constructor(projectRoot: string, config?: Partial<IIterativeConfig>) {
+    this.projectRoot = projectRoot;
+    this.lazyLoader = new LazyFileLoader(projectRoot);
+    this.cache = new ContextCache(projectRoot);
+    this.analyzer = new ContextAnalyzer(projectRoot);
+
+    // Default configuration
+    this.config = {
+      maxIterations: config?.maxIterations ?? 5,
+      firstPassFileLimit: config?.firstPassFileLimit ?? 10,
+      subsequentPassFileLimit: config?.subsequentPassFileLimit ?? 5,
+      temperature: config?.temperature ?? 0.3,
+      model: config?.model ?? 'gpt-4-turbo-preview',
+    };
+
+  }
+
+  /**
+   * Initialize the builder
+   */
+  public async initialize(): Promise<void> {
+    await this.cache.init();
+    const configManager = ConfigManager.getInstance();
+    await configManager.initialize(this.projectRoot);
+    this.tokenBudget = configManager.getMaxTokens();
+
+    // Initialize OpenAI instance
+    const qenvInstance = new plugins.qenv.Qenv();
+    const openaiToken = await qenvInstance.getEnvVarOnDemand('OPENAI_TOKEN');
+    if (!openaiToken) {
+      throw new Error('OPENAI_TOKEN environment variable is required for iterative context building');
+    }
+    this.openaiInstance = new plugins.smartai.OpenAiProvider({
+      openaiToken,
+    });
+    await this.openaiInstance.start();
+  }
+
+  /**
+   * Build context iteratively using AI decision making
+   * @param taskType - Type of task being performed
+   * @returns Complete iterative context result
+   */
+  public async buildContextIteratively(taskType: TaskType): Promise<IIterativeContextResult> {
+    const startTime = Date.now();
+    logger.log('info', '🤖 Starting iterative context building...');
+    logger.log('info', `   Task: ${taskType}, Budget: ${this.tokenBudget} tokens, Max iterations: ${this.config.maxIterations}`);
+
+    // Phase 1: Scan project files for metadata
+    logger.log('info', '📋 Scanning project files...');
+    const metadata = await this.scanProjectFiles(taskType);
+    const totalEstimatedTokens = metadata.reduce((sum, m) => sum + m.estimatedTokens, 0);
+    logger.log('info', `   Found ${metadata.length} files (~${totalEstimatedTokens} estimated tokens)`);
+
+    // Phase 2: Analyze files for initial prioritization
+    logger.log('info', '🔍 Analyzing file dependencies and importance...');
+    const analysis = await this.analyzer.analyze(metadata, taskType, []);
+    logger.log('info', `   Analysis complete in ${analysis.analysisDuration}ms`);
+
+    // Track state across iterations
+    const iterations: IIterationState[] = [];
+    let totalTokensUsed = 0;
+    let apiCallCount = 0;
+    let loadedContent = '';
+    const includedFiles: IFileInfo[] = [];
+
+    // Phase 3: Iterative file selection and loading
+    for (let iteration = 1; iteration <= this.config.maxIterations; iteration++) {
+      const iterationStart = Date.now();
+      logger.log('info', `\n🤔 Iteration ${iteration}/${this.config.maxIterations}: Asking AI which files to examine...`);
+
+      const remainingBudget = this.tokenBudget - totalTokensUsed;
+      logger.log('info', `   Token budget remaining: ${remainingBudget}/${this.tokenBudget} (${Math.round((remainingBudget / this.tokenBudget) * 100)}%)`);
+
+      // Get AI decision on which files to load
+      const decision = await this.getFileSelectionDecision(
+        metadata,
+        analysis.files.slice(0, 30), // Top 30 files by importance
+        taskType,
+        iteration,
+        totalTokensUsed,
+        remainingBudget,
+        loadedContent
+      );
+      apiCallCount++;
+
+      logger.log('info', `   AI reasoning: ${decision.reasoning}`);
+      logger.log('info', `   AI requested ${decision.filesToLoad.length} files`);
+
+      // Load requested files
+      const iterationFiles: IFileInfo[] = [];
+      let iterationTokens = 0;
+
+      if (decision.filesToLoad.length > 0) {
+        logger.log('info', '📥 Loading requested files...');
+
+        for (const filePath of decision.filesToLoad) {
+          try {
+            const fileInfo = await this.loadFile(filePath);
+            if (totalTokensUsed + fileInfo.tokenCount! <= this.tokenBudget) {
+              const formattedFile = this.formatFileForContext(fileInfo);
+              loadedContent += formattedFile;
+              includedFiles.push(fileInfo);
+              iterationFiles.push(fileInfo);
+              iterationTokens += fileInfo.tokenCount!;
+              totalTokensUsed += fileInfo.tokenCount!;
+
+              logger.log('info', `   ✓ ${fileInfo.relativePath} (${fileInfo.tokenCount} tokens)`);
+            } else {
+              logger.log('warn', `   ✗ ${fileInfo.relativePath} - would exceed budget, skipping`);
+            }
+          } catch (error) {
+            logger.log('warn', `   ✗ Failed to load ${filePath}: ${error.message}`);
+          }
+        }
+      }
+
+      // Record iteration state
+      const iterationDuration = Date.now() - iterationStart;
+      iterations.push({
+        iteration,
+        filesLoaded: iterationFiles,
+        tokensUsed: iterationTokens,
+        totalTokensUsed,
+        decision,
+        duration: iterationDuration,
+      });
+
+      logger.log('info', `   Iteration ${iteration} complete: ${iterationFiles.length} files loaded, ${iterationTokens} tokens used`);
+
+      // Check if we should continue
+      if (totalTokensUsed >= this.tokenBudget * 0.95) {
+        logger.log('warn', '⚠️  Approaching token budget limit, stopping iterations');
+        break;
+      }
+
+      // Ask AI if context is sufficient
+      if (iteration < this.config.maxIterations) {
+        logger.log('info', '🤔 Asking AI if context is sufficient...');
+        const sufficiencyDecision = await this.evaluateContextSufficiency(
+          loadedContent,
+          taskType,
+          iteration,
+          totalTokensUsed,
+          remainingBudget - iterationTokens
+        );
+        apiCallCount++;
+
+        logger.log('info', `   AI decision: ${sufficiencyDecision.sufficient ? '✅ SUFFICIENT' : '⏭️  NEEDS MORE'}`);
+        logger.log('info', `   Reasoning: ${sufficiencyDecision.reasoning}`);
+
+        if (sufficiencyDecision.sufficient) {
+          logger.log('ok', '✅ Context building complete - AI determined context is sufficient');
+          break;
+        }
+      }
+    }
+
+    const totalDuration = Date.now() - startTime;
+    logger.log('ok', `\n✅ Iterative context building complete!`);
+    logger.log('info', `   Files included: ${includedFiles.length}`);
+    logger.log('info', `   Token usage: ${totalTokensUsed}/${this.tokenBudget} (${Math.round((totalTokensUsed / this.tokenBudget) * 100)}%)`);
+    logger.log('info', `   Iterations: ${iterations.length}, API calls: ${apiCallCount}`);
+    logger.log('info', `   Total duration: ${(totalDuration / 1000).toFixed(2)}s`);
+
+    return {
+      context: loadedContent,
+      tokenCount: totalTokensUsed,
+      includedFiles,
+      trimmedFiles: [],
+      excludedFiles: [],
+      tokenSavings: 0,
+      iterationCount: iterations.length,
+      iterations,
+      apiCallCount,
+      totalDuration,
+    };
+  }
+
+  /**
+   * Scan project files based on task type
+   */
+  private async scanProjectFiles(taskType: TaskType): Promise<IFileMetadata[]> {
+    const configManager = ConfigManager.getInstance();
+    const taskConfig = configManager.getTaskConfig(taskType);
+
+    const includeGlobs = taskConfig?.includePaths?.map(p => `${p}/**/*.ts`) || [
+      'ts/**/*.ts',
+      'ts*/**/*.ts'
+    ];
+
+    const configGlobs = [
+      'package.json',
+      'readme.md',
+      'readme.hints.md',
+      'npmextra.json'
+    ];
+
+    return await this.lazyLoader.scanFiles([...configGlobs, ...includeGlobs]);
+  }
+
+  /**
+   * Get AI decision on which files to load
+   */
+  private async getFileSelectionDecision(
+    allMetadata: IFileMetadata[],
+    analyzedFiles: any[],
+    taskType: TaskType,
+    iteration: number,
+    tokensUsed: number,
+    remainingBudget: number,
+    loadedContent: string
+  ): Promise<IFileSelectionDecision> {
+    const isFirstIteration = iteration === 1;
+    const fileLimit = isFirstIteration
+      ? this.config.firstPassFileLimit
+      : this.config.subsequentPassFileLimit;
+
+    const systemPrompt = this.buildFileSelectionPrompt(
+      allMetadata,
+      analyzedFiles,
+      taskType,
+      iteration,
+      tokensUsed,
+      remainingBudget,
+      loadedContent,
+      fileLimit
+    );
+
+    const response = await this.openaiInstance.chat({
+      systemMessage: `You are an AI assistant that helps select the most relevant files for code analysis.
+You must respond ONLY with valid JSON that can be parsed with JSON.parse().
+Do not wrap the JSON in markdown code blocks or add any other text.`,
+      userMessage: systemPrompt,
+      messageHistory: [],
+    });
+
+    // Parse JSON response, handling potential markdown formatting
+    const content = response.message.replace('```json', '').replace('```', '').trim();
+    const parsed = JSON.parse(content);
+
+    return {
+      reasoning: parsed.reasoning || 'No reasoning provided',
+      filesToLoad: parsed.files_to_load || [],
+      estimatedTokensNeeded: parsed.estimated_tokens_needed,
+    };
+  }
+
+  /**
+   * Build prompt for file selection
+   */
+  private buildFileSelectionPrompt(
+    metadata: IFileMetadata[],
+    analyzedFiles: any[],
+    taskType: TaskType,
+    iteration: number,
+    tokensUsed: number,
+    remainingBudget: number,
+    loadedContent: string,
+    fileLimit: number
+  ): string {
+    const taskDescriptions = {
+      readme: 'generating a comprehensive README that explains the project\'s purpose, features, and API',
+      commit: 'analyzing code changes to generate an intelligent commit message',
+      description: 'generating a concise project description for package.json',
+    };
+
+    const alreadyLoadedFiles = loadedContent
+      ? loadedContent.split('\n======').slice(1).map(section => {
+          const match = section.match(/START OF FILE (.+?) ======/);
+          return match ? match[1] : '';
+        }).filter(Boolean)
+      : [];
+
+    const availableFiles = metadata
+      .filter(m => !alreadyLoadedFiles.includes(m.relativePath))
+      .map(m => {
+        const analysis = analyzedFiles.find(a => a.path === m.path);
+        return `- ${m.relativePath} (${m.size} bytes, ~${m.estimatedTokens} tokens${analysis ? `, importance: ${analysis.importanceScore.toFixed(2)}` : ''})`;
+      })
+      .join('\n');
+
+    return `You are building context for ${taskDescriptions[taskType]} in a TypeScript project.
+
+ITERATION: ${iteration}
+TOKENS USED: ${tokensUsed}/${tokensUsed + remainingBudget} (${Math.round((tokensUsed / (tokensUsed + remainingBudget)) * 100)}%)
+REMAINING BUDGET: ${remainingBudget} tokens
+
+${alreadyLoadedFiles.length > 0 ? `FILES ALREADY LOADED:\n${alreadyLoadedFiles.map(f => `- ${f}`).join('\n')}\n\n` : ''}AVAILABLE FILES (not yet loaded):
+${availableFiles}
+
+Your task: Select up to ${fileLimit} files that will give you the MOST understanding for this ${taskType} task.
+
+${iteration === 1 ? `This is the FIRST iteration. Focus on:
+- Main entry points (index.ts, main exports)
+- Core classes and interfaces
+- Package configuration
+` : `This is iteration ${iteration}. You've already seen some files. Now focus on:
+- Files that complement what you've already loaded
+- Dependencies of already-loaded files
+- Missing pieces for complete understanding
+`}
+
+Consider:
+1. File importance scores (if provided)
+2. File paths (ts/index.ts is likely more important than ts/internal/utils.ts)
+3. Token efficiency (prefer smaller files if they provide good information)
+4. Remaining budget (${remainingBudget} tokens)
+
+Respond in JSON format:
+{
+  "reasoning": "Brief explanation of why you're selecting these files",
+  "files_to_load": ["path/to/file1.ts", "path/to/file2.ts"],
+  "estimated_tokens_needed": 15000
+}`;
+  }
+
+  /**
+   * Evaluate if current context is sufficient
+   */
+  private async evaluateContextSufficiency(
+    loadedContent: string,
+    taskType: TaskType,
+    iteration: number,
+    tokensUsed: number,
+    remainingBudget: number
+  ): Promise<IContextSufficiencyDecision> {
+    const prompt = `You have been building context for a ${taskType} task across ${iteration} iterations.
+
+CURRENT STATE:
+- Tokens used: ${tokensUsed}
+- Remaining budget: ${remainingBudget}
+- Files loaded: ${loadedContent.split('\n======').length - 1}
+
+CONTEXT SO FAR:
+${loadedContent.substring(0, 3000)}... (truncated for brevity)
+
+Question: Do you have SUFFICIENT context to successfully complete the ${taskType} task?
+
+Consider:
+- For README: Do you understand the project's purpose, main features, API surface, and usage patterns?
+- For commit: Do you understand what changed and why?
+- For description: Do you understand the project's core value proposition?
+
+Respond in JSON format:
+{
+  "sufficient": true or false,
+  "reasoning": "Detailed explanation of your decision"
+}`;
+
+    const response = await this.openaiInstance.chat({
+      systemMessage: `You are an AI assistant that evaluates whether gathered context is sufficient for a task.
+You must respond ONLY with valid JSON that can be parsed with JSON.parse().
+Do not wrap the JSON in markdown code blocks or add any other text.`,
+      userMessage: prompt,
+      messageHistory: [],
+    });
+
+    // Parse JSON response, handling potential markdown formatting
+    const content = response.message.replace('```json', '').replace('```', '').trim();
+    const parsed = JSON.parse(content);
+
+    return {
+      sufficient: parsed.sufficient || false,
+      reasoning: parsed.reasoning || 'No reasoning provided',
+    };
+  }
+
+  /**
+   * Load a single file with caching
+   */
+  private async loadFile(filePath: string): Promise<IFileInfo> {
+    // Try cache first
+    const cached = await this.cache.get(filePath);
+    if (cached) {
+      return {
+        path: filePath,
+        relativePath: plugins.path.relative(this.projectRoot, filePath),
+        contents: cached.contents,
+        tokenCount: cached.tokenCount,
+      };
+    }
+
+    // Load from disk
+    const contents = await plugins.smartfile.fs.toStringSync(filePath);
+    const tokenCount = this.countTokens(contents);
+    const relativePath = plugins.path.relative(this.projectRoot, filePath);
+
+    // Cache it
+    const stats = await fs.promises.stat(filePath);
+    await this.cache.set({
+      path: filePath,
+      contents,
+      tokenCount,
+      mtime: Math.floor(stats.mtimeMs),
+      cachedAt: Date.now(),
+    });
+
+    return {
+      path: filePath,
+      relativePath,
+      contents,
+      tokenCount,
+    };
+  }
+
+  /**
+   * Format a file for inclusion in context
+   */
+  private formatFileForContext(file: IFileInfo): string {
+    return `
+====== START OF FILE ${file.relativePath} ======
+
+${file.contents}
+
+====== END OF FILE ${file.relativePath} ======
+`;
+  }
+
+  /**
+   * Count tokens in text
+   */
+  private countTokens(text: string): number {
+    try {
+      const tokens = plugins.gptTokenizer.encode(text);
+      return tokens.length;
+    } catch (error) {
+      return Math.ceil(text.length / 4);
+    }
+  }
+}