feat(IterativeContextBuilder): Add iterative AI-driven context builder and integrate into task factory; add tests and iterative configuration
This commit is contained in:
467
ts/context/iterative-context-builder.ts
Normal file
467
ts/context/iterative-context-builder.ts
Normal file
@@ -0,0 +1,467 @@
|
||||
import * as plugins from '../plugins.js';
|
||||
import * as fs from 'fs';
|
||||
import { logger } from '../logging.js';
|
||||
import type {
|
||||
TaskType,
|
||||
IFileMetadata,
|
||||
IFileInfo,
|
||||
IIterativeContextResult,
|
||||
IIterationState,
|
||||
IFileSelectionDecision,
|
||||
IContextSufficiencyDecision,
|
||||
IIterativeConfig,
|
||||
} from './types.js';
|
||||
import { LazyFileLoader } from './lazy-file-loader.js';
|
||||
import { ContextCache } from './context-cache.js';
|
||||
import { ContextAnalyzer } from './context-analyzer.js';
|
||||
import { ConfigManager } from './config-manager.js';
|
||||
|
||||
/**
|
||||
* Iterative context builder that uses AI to intelligently select files
|
||||
* across multiple iterations until sufficient context is gathered
|
||||
*/
|
||||
export class IterativeContextBuilder {
|
||||
private projectRoot: string;
|
||||
private lazyLoader: LazyFileLoader;
|
||||
private cache: ContextCache;
|
||||
private analyzer: ContextAnalyzer;
|
||||
private config: Required<IIterativeConfig>;
|
||||
private tokenBudget: number = 190000;
|
||||
private openaiInstance: plugins.smartai.OpenAiProvider;
|
||||
|
||||
/**
|
||||
* Creates a new IterativeContextBuilder
|
||||
* @param projectRoot - Root directory of the project
|
||||
* @param config - Iterative configuration
|
||||
*/
|
||||
constructor(projectRoot: string, config?: Partial<IIterativeConfig>) {
|
||||
this.projectRoot = projectRoot;
|
||||
this.lazyLoader = new LazyFileLoader(projectRoot);
|
||||
this.cache = new ContextCache(projectRoot);
|
||||
this.analyzer = new ContextAnalyzer(projectRoot);
|
||||
|
||||
// Default configuration
|
||||
this.config = {
|
||||
maxIterations: config?.maxIterations ?? 5,
|
||||
firstPassFileLimit: config?.firstPassFileLimit ?? 10,
|
||||
subsequentPassFileLimit: config?.subsequentPassFileLimit ?? 5,
|
||||
temperature: config?.temperature ?? 0.3,
|
||||
model: config?.model ?? 'gpt-4-turbo-preview',
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the builder
|
||||
*/
|
||||
public async initialize(): Promise<void> {
|
||||
await this.cache.init();
|
||||
const configManager = ConfigManager.getInstance();
|
||||
await configManager.initialize(this.projectRoot);
|
||||
this.tokenBudget = configManager.getMaxTokens();
|
||||
|
||||
// Initialize OpenAI instance
|
||||
const qenvInstance = new plugins.qenv.Qenv();
|
||||
const openaiToken = await qenvInstance.getEnvVarOnDemand('OPENAI_TOKEN');
|
||||
if (!openaiToken) {
|
||||
throw new Error('OPENAI_TOKEN environment variable is required for iterative context building');
|
||||
}
|
||||
this.openaiInstance = new plugins.smartai.OpenAiProvider({
|
||||
openaiToken,
|
||||
});
|
||||
await this.openaiInstance.start();
|
||||
}
|
||||
|
||||
/**
|
||||
* Build context iteratively using AI decision making
|
||||
* @param taskType - Type of task being performed
|
||||
* @returns Complete iterative context result
|
||||
*/
|
||||
public async buildContextIteratively(taskType: TaskType): Promise<IIterativeContextResult> {
|
||||
const startTime = Date.now();
|
||||
logger.log('info', '🤖 Starting iterative context building...');
|
||||
logger.log('info', ` Task: ${taskType}, Budget: ${this.tokenBudget} tokens, Max iterations: ${this.config.maxIterations}`);
|
||||
|
||||
// Phase 1: Scan project files for metadata
|
||||
logger.log('info', '📋 Scanning project files...');
|
||||
const metadata = await this.scanProjectFiles(taskType);
|
||||
const totalEstimatedTokens = metadata.reduce((sum, m) => sum + m.estimatedTokens, 0);
|
||||
logger.log('info', ` Found ${metadata.length} files (~${totalEstimatedTokens} estimated tokens)`);
|
||||
|
||||
// Phase 2: Analyze files for initial prioritization
|
||||
logger.log('info', '🔍 Analyzing file dependencies and importance...');
|
||||
const analysis = await this.analyzer.analyze(metadata, taskType, []);
|
||||
logger.log('info', ` Analysis complete in ${analysis.analysisDuration}ms`);
|
||||
|
||||
// Track state across iterations
|
||||
const iterations: IIterationState[] = [];
|
||||
let totalTokensUsed = 0;
|
||||
let apiCallCount = 0;
|
||||
let loadedContent = '';
|
||||
const includedFiles: IFileInfo[] = [];
|
||||
|
||||
// Phase 3: Iterative file selection and loading
|
||||
for (let iteration = 1; iteration <= this.config.maxIterations; iteration++) {
|
||||
const iterationStart = Date.now();
|
||||
logger.log('info', `\n🤔 Iteration ${iteration}/${this.config.maxIterations}: Asking AI which files to examine...`);
|
||||
|
||||
const remainingBudget = this.tokenBudget - totalTokensUsed;
|
||||
logger.log('info', ` Token budget remaining: ${remainingBudget}/${this.tokenBudget} (${Math.round((remainingBudget / this.tokenBudget) * 100)}%)`);
|
||||
|
||||
// Get AI decision on which files to load
|
||||
const decision = await this.getFileSelectionDecision(
|
||||
metadata,
|
||||
analysis.files.slice(0, 30), // Top 30 files by importance
|
||||
taskType,
|
||||
iteration,
|
||||
totalTokensUsed,
|
||||
remainingBudget,
|
||||
loadedContent
|
||||
);
|
||||
apiCallCount++;
|
||||
|
||||
logger.log('info', ` AI reasoning: ${decision.reasoning}`);
|
||||
logger.log('info', ` AI requested ${decision.filesToLoad.length} files`);
|
||||
|
||||
// Load requested files
|
||||
const iterationFiles: IFileInfo[] = [];
|
||||
let iterationTokens = 0;
|
||||
|
||||
if (decision.filesToLoad.length > 0) {
|
||||
logger.log('info', '📥 Loading requested files...');
|
||||
|
||||
for (const filePath of decision.filesToLoad) {
|
||||
try {
|
||||
const fileInfo = await this.loadFile(filePath);
|
||||
if (totalTokensUsed + fileInfo.tokenCount! <= this.tokenBudget) {
|
||||
const formattedFile = this.formatFileForContext(fileInfo);
|
||||
loadedContent += formattedFile;
|
||||
includedFiles.push(fileInfo);
|
||||
iterationFiles.push(fileInfo);
|
||||
iterationTokens += fileInfo.tokenCount!;
|
||||
totalTokensUsed += fileInfo.tokenCount!;
|
||||
|
||||
logger.log('info', ` ✓ ${fileInfo.relativePath} (${fileInfo.tokenCount} tokens)`);
|
||||
} else {
|
||||
logger.log('warn', ` ✗ ${fileInfo.relativePath} - would exceed budget, skipping`);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.log('warn', ` ✗ Failed to load ${filePath}: ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Record iteration state
|
||||
const iterationDuration = Date.now() - iterationStart;
|
||||
iterations.push({
|
||||
iteration,
|
||||
filesLoaded: iterationFiles,
|
||||
tokensUsed: iterationTokens,
|
||||
totalTokensUsed,
|
||||
decision,
|
||||
duration: iterationDuration,
|
||||
});
|
||||
|
||||
logger.log('info', ` Iteration ${iteration} complete: ${iterationFiles.length} files loaded, ${iterationTokens} tokens used`);
|
||||
|
||||
// Check if we should continue
|
||||
if (totalTokensUsed >= this.tokenBudget * 0.95) {
|
||||
logger.log('warn', '⚠️ Approaching token budget limit, stopping iterations');
|
||||
break;
|
||||
}
|
||||
|
||||
// Ask AI if context is sufficient
|
||||
if (iteration < this.config.maxIterations) {
|
||||
logger.log('info', '🤔 Asking AI if context is sufficient...');
|
||||
const sufficiencyDecision = await this.evaluateContextSufficiency(
|
||||
loadedContent,
|
||||
taskType,
|
||||
iteration,
|
||||
totalTokensUsed,
|
||||
remainingBudget - iterationTokens
|
||||
);
|
||||
apiCallCount++;
|
||||
|
||||
logger.log('info', ` AI decision: ${sufficiencyDecision.sufficient ? '✅ SUFFICIENT' : '⏭️ NEEDS MORE'}`);
|
||||
logger.log('info', ` Reasoning: ${sufficiencyDecision.reasoning}`);
|
||||
|
||||
if (sufficiencyDecision.sufficient) {
|
||||
logger.log('ok', '✅ Context building complete - AI determined context is sufficient');
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const totalDuration = Date.now() - startTime;
|
||||
logger.log('ok', `\n✅ Iterative context building complete!`);
|
||||
logger.log('info', ` Files included: ${includedFiles.length}`);
|
||||
logger.log('info', ` Token usage: ${totalTokensUsed}/${this.tokenBudget} (${Math.round((totalTokensUsed / this.tokenBudget) * 100)}%)`);
|
||||
logger.log('info', ` Iterations: ${iterations.length}, API calls: ${apiCallCount}`);
|
||||
logger.log('info', ` Total duration: ${(totalDuration / 1000).toFixed(2)}s`);
|
||||
|
||||
return {
|
||||
context: loadedContent,
|
||||
tokenCount: totalTokensUsed,
|
||||
includedFiles,
|
||||
trimmedFiles: [],
|
||||
excludedFiles: [],
|
||||
tokenSavings: 0,
|
||||
iterationCount: iterations.length,
|
||||
iterations,
|
||||
apiCallCount,
|
||||
totalDuration,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan project files based on task type
|
||||
*/
|
||||
private async scanProjectFiles(taskType: TaskType): Promise<IFileMetadata[]> {
|
||||
const configManager = ConfigManager.getInstance();
|
||||
const taskConfig = configManager.getTaskConfig(taskType);
|
||||
|
||||
const includeGlobs = taskConfig?.includePaths?.map(p => `${p}/**/*.ts`) || [
|
||||
'ts/**/*.ts',
|
||||
'ts*/**/*.ts'
|
||||
];
|
||||
|
||||
const configGlobs = [
|
||||
'package.json',
|
||||
'readme.md',
|
||||
'readme.hints.md',
|
||||
'npmextra.json'
|
||||
];
|
||||
|
||||
return await this.lazyLoader.scanFiles([...configGlobs, ...includeGlobs]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get AI decision on which files to load
|
||||
*/
|
||||
private async getFileSelectionDecision(
|
||||
allMetadata: IFileMetadata[],
|
||||
analyzedFiles: any[],
|
||||
taskType: TaskType,
|
||||
iteration: number,
|
||||
tokensUsed: number,
|
||||
remainingBudget: number,
|
||||
loadedContent: string
|
||||
): Promise<IFileSelectionDecision> {
|
||||
const isFirstIteration = iteration === 1;
|
||||
const fileLimit = isFirstIteration
|
||||
? this.config.firstPassFileLimit
|
||||
: this.config.subsequentPassFileLimit;
|
||||
|
||||
const systemPrompt = this.buildFileSelectionPrompt(
|
||||
allMetadata,
|
||||
analyzedFiles,
|
||||
taskType,
|
||||
iteration,
|
||||
tokensUsed,
|
||||
remainingBudget,
|
||||
loadedContent,
|
||||
fileLimit
|
||||
);
|
||||
|
||||
const response = await this.openaiInstance.chat({
|
||||
systemMessage: `You are an AI assistant that helps select the most relevant files for code analysis.
|
||||
You must respond ONLY with valid JSON that can be parsed with JSON.parse().
|
||||
Do not wrap the JSON in markdown code blocks or add any other text.`,
|
||||
userMessage: systemPrompt,
|
||||
messageHistory: [],
|
||||
});
|
||||
|
||||
// Parse JSON response, handling potential markdown formatting
|
||||
const content = response.message.replace('```json', '').replace('```', '').trim();
|
||||
const parsed = JSON.parse(content);
|
||||
|
||||
return {
|
||||
reasoning: parsed.reasoning || 'No reasoning provided',
|
||||
filesToLoad: parsed.files_to_load || [],
|
||||
estimatedTokensNeeded: parsed.estimated_tokens_needed,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Build prompt for file selection
|
||||
*/
|
||||
private buildFileSelectionPrompt(
|
||||
metadata: IFileMetadata[],
|
||||
analyzedFiles: any[],
|
||||
taskType: TaskType,
|
||||
iteration: number,
|
||||
tokensUsed: number,
|
||||
remainingBudget: number,
|
||||
loadedContent: string,
|
||||
fileLimit: number
|
||||
): string {
|
||||
const taskDescriptions = {
|
||||
readme: 'generating a comprehensive README that explains the project\'s purpose, features, and API',
|
||||
commit: 'analyzing code changes to generate an intelligent commit message',
|
||||
description: 'generating a concise project description for package.json',
|
||||
};
|
||||
|
||||
const alreadyLoadedFiles = loadedContent
|
||||
? loadedContent.split('\n======').slice(1).map(section => {
|
||||
const match = section.match(/START OF FILE (.+?) ======/);
|
||||
return match ? match[1] : '';
|
||||
}).filter(Boolean)
|
||||
: [];
|
||||
|
||||
const availableFiles = metadata
|
||||
.filter(m => !alreadyLoadedFiles.includes(m.relativePath))
|
||||
.map(m => {
|
||||
const analysis = analyzedFiles.find(a => a.path === m.path);
|
||||
return `- ${m.relativePath} (${m.size} bytes, ~${m.estimatedTokens} tokens${analysis ? `, importance: ${analysis.importanceScore.toFixed(2)}` : ''})`;
|
||||
})
|
||||
.join('\n');
|
||||
|
||||
return `You are building context for ${taskDescriptions[taskType]} in a TypeScript project.
|
||||
|
||||
ITERATION: ${iteration}
|
||||
TOKENS USED: ${tokensUsed}/${tokensUsed + remainingBudget} (${Math.round((tokensUsed / (tokensUsed + remainingBudget)) * 100)}%)
|
||||
REMAINING BUDGET: ${remainingBudget} tokens
|
||||
|
||||
${alreadyLoadedFiles.length > 0 ? `FILES ALREADY LOADED:\n${alreadyLoadedFiles.map(f => `- ${f}`).join('\n')}\n\n` : ''}AVAILABLE FILES (not yet loaded):
|
||||
${availableFiles}
|
||||
|
||||
Your task: Select up to ${fileLimit} files that will give you the MOST understanding for this ${taskType} task.
|
||||
|
||||
${iteration === 1 ? `This is the FIRST iteration. Focus on:
|
||||
- Main entry points (index.ts, main exports)
|
||||
- Core classes and interfaces
|
||||
- Package configuration
|
||||
` : `This is iteration ${iteration}. You've already seen some files. Now focus on:
|
||||
- Files that complement what you've already loaded
|
||||
- Dependencies of already-loaded files
|
||||
- Missing pieces for complete understanding
|
||||
`}
|
||||
|
||||
Consider:
|
||||
1. File importance scores (if provided)
|
||||
2. File paths (ts/index.ts is likely more important than ts/internal/utils.ts)
|
||||
3. Token efficiency (prefer smaller files if they provide good information)
|
||||
4. Remaining budget (${remainingBudget} tokens)
|
||||
|
||||
Respond in JSON format:
|
||||
{
|
||||
"reasoning": "Brief explanation of why you're selecting these files",
|
||||
"files_to_load": ["path/to/file1.ts", "path/to/file2.ts"],
|
||||
"estimated_tokens_needed": 15000
|
||||
}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluate if current context is sufficient
|
||||
*/
|
||||
private async evaluateContextSufficiency(
|
||||
loadedContent: string,
|
||||
taskType: TaskType,
|
||||
iteration: number,
|
||||
tokensUsed: number,
|
||||
remainingBudget: number
|
||||
): Promise<IContextSufficiencyDecision> {
|
||||
const prompt = `You have been building context for a ${taskType} task across ${iteration} iterations.
|
||||
|
||||
CURRENT STATE:
|
||||
- Tokens used: ${tokensUsed}
|
||||
- Remaining budget: ${remainingBudget}
|
||||
- Files loaded: ${loadedContent.split('\n======').length - 1}
|
||||
|
||||
CONTEXT SO FAR:
|
||||
${loadedContent.substring(0, 3000)}... (truncated for brevity)
|
||||
|
||||
Question: Do you have SUFFICIENT context to successfully complete the ${taskType} task?
|
||||
|
||||
Consider:
|
||||
- For README: Do you understand the project's purpose, main features, API surface, and usage patterns?
|
||||
- For commit: Do you understand what changed and why?
|
||||
- For description: Do you understand the project's core value proposition?
|
||||
|
||||
Respond in JSON format:
|
||||
{
|
||||
"sufficient": true or false,
|
||||
"reasoning": "Detailed explanation of your decision"
|
||||
}`;
|
||||
|
||||
const response = await this.openaiInstance.chat({
|
||||
systemMessage: `You are an AI assistant that evaluates whether gathered context is sufficient for a task.
|
||||
You must respond ONLY with valid JSON that can be parsed with JSON.parse().
|
||||
Do not wrap the JSON in markdown code blocks or add any other text.`,
|
||||
userMessage: prompt,
|
||||
messageHistory: [],
|
||||
});
|
||||
|
||||
// Parse JSON response, handling potential markdown formatting
|
||||
const content = response.message.replace('```json', '').replace('```', '').trim();
|
||||
const parsed = JSON.parse(content);
|
||||
|
||||
return {
|
||||
sufficient: parsed.sufficient || false,
|
||||
reasoning: parsed.reasoning || 'No reasoning provided',
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a single file with caching
|
||||
*/
|
||||
private async loadFile(filePath: string): Promise<IFileInfo> {
|
||||
// Try cache first
|
||||
const cached = await this.cache.get(filePath);
|
||||
if (cached) {
|
||||
return {
|
||||
path: filePath,
|
||||
relativePath: plugins.path.relative(this.projectRoot, filePath),
|
||||
contents: cached.contents,
|
||||
tokenCount: cached.tokenCount,
|
||||
};
|
||||
}
|
||||
|
||||
// Load from disk
|
||||
const contents = await plugins.smartfile.fs.toStringSync(filePath);
|
||||
const tokenCount = this.countTokens(contents);
|
||||
const relativePath = plugins.path.relative(this.projectRoot, filePath);
|
||||
|
||||
// Cache it
|
||||
const stats = await fs.promises.stat(filePath);
|
||||
await this.cache.set({
|
||||
path: filePath,
|
||||
contents,
|
||||
tokenCount,
|
||||
mtime: Math.floor(stats.mtimeMs),
|
||||
cachedAt: Date.now(),
|
||||
});
|
||||
|
||||
return {
|
||||
path: filePath,
|
||||
relativePath,
|
||||
contents,
|
||||
tokenCount,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a file for inclusion in context
|
||||
*/
|
||||
private formatFileForContext(file: IFileInfo): string {
|
||||
return `
|
||||
====== START OF FILE ${file.relativePath} ======
|
||||
|
||||
${file.contents}
|
||||
|
||||
====== END OF FILE ${file.relativePath} ======
|
||||
`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Count tokens in text
|
||||
*/
|
||||
private countTokens(text: string): number {
|
||||
try {
|
||||
const tokens = plugins.gptTokenizer.encode(text);
|
||||
return tokens.length;
|
||||
} catch (error) {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user