2025-09-28 15:51:50 +00:00
|
|
|
import { expect, tap } from '@push.rocks/tapbundle';
|
|
|
|
import * as qenv from '@push.rocks/qenv';
|
|
|
|
import * as smartai from '../ts/index.js';
|
2025-10-03 12:50:42 +00:00
|
|
|
import * as path from 'path';
|
|
|
|
import { promises as fs } from 'fs';
|
2025-09-28 15:51:50 +00:00
|
|
|
|
|
|
|
const testQenv = new qenv.Qenv('./', './.nogit/');
|
|
|
|
|
2025-10-03 12:50:42 +00:00
|
|
|
// Helper function to save research results
|
|
|
|
async function saveResearchResult(testName: string, result: any) {
|
|
|
|
const sanitizedName = testName.replace(/[^a-z0-9]/gi, '_').toLowerCase();
|
|
|
|
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
|
|
const filename = `${sanitizedName}_${timestamp}.json`;
|
|
|
|
const filepath = path.join('.nogit', 'testresults', 'research', filename);
|
|
|
|
|
|
|
|
await fs.mkdir(path.dirname(filepath), { recursive: true });
|
|
|
|
await fs.writeFile(filepath, JSON.stringify(result, null, 2), 'utf-8');
|
|
|
|
|
|
|
|
console.log(` 💾 Saved to: ${filepath}`);
|
|
|
|
}
|
|
|
|
|
2025-09-28 15:51:50 +00:00
|
|
|
let anthropicProvider: smartai.AnthropicProvider;
|
|
|
|
|
|
|
|
tap.test('Anthropic Research: should initialize provider with web search', async () => {
|
|
|
|
anthropicProvider = new smartai.AnthropicProvider({
|
|
|
|
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
|
|
|
|
enableWebSearch: true
|
|
|
|
});
|
|
|
|
|
|
|
|
await anthropicProvider.start();
|
|
|
|
expect(anthropicProvider).toBeInstanceOf(smartai.AnthropicProvider);
|
|
|
|
expect(typeof anthropicProvider.research).toEqual('function');
|
|
|
|
});
|
|
|
|
|
|
|
|
tap.test('Anthropic Research: should perform basic research query', async () => {
|
|
|
|
const result = await anthropicProvider.research({
|
|
|
|
query: 'What is machine learning and its main applications?',
|
|
|
|
searchDepth: 'basic'
|
|
|
|
});
|
|
|
|
|
|
|
|
console.log('Anthropic Basic Research:');
|
|
|
|
console.log('- Answer length:', result.answer.length);
|
|
|
|
console.log('- Sources found:', result.sources.length);
|
|
|
|
console.log('- First 200 chars:', result.answer.substring(0, 200));
|
|
|
|
|
2025-10-03 12:50:42 +00:00
|
|
|
await saveResearchResult('basic_research_machine_learning', result);
|
|
|
|
|
2025-09-28 15:51:50 +00:00
|
|
|
expect(result).toBeTruthy();
|
|
|
|
expect(result.answer).toBeTruthy();
|
|
|
|
expect(result.answer.toLowerCase()).toInclude('machine learning');
|
|
|
|
expect(result.sources).toBeArray();
|
|
|
|
expect(result.metadata).toBeTruthy();
|
|
|
|
});
|
|
|
|
|
|
|
|
tap.test('Anthropic Research: should perform research with web search', async () => {
|
|
|
|
const result = await anthropicProvider.research({
|
|
|
|
query: 'What are the latest developments in renewable energy technology?',
|
|
|
|
searchDepth: 'advanced',
|
|
|
|
includeWebSearch: true,
|
|
|
|
maxSources: 5
|
|
|
|
});
|
|
|
|
|
|
|
|
console.log('Anthropic Web Search Research:');
|
|
|
|
console.log('- Answer length:', result.answer.length);
|
|
|
|
console.log('- Sources:', result.sources.length);
|
|
|
|
if (result.searchQueries) {
|
|
|
|
console.log('- Search queries:', result.searchQueries);
|
|
|
|
}
|
|
|
|
|
2025-10-03 12:50:42 +00:00
|
|
|
await saveResearchResult('web_search_renewable_energy', result);
|
|
|
|
|
2025-09-28 15:51:50 +00:00
|
|
|
expect(result.answer).toBeTruthy();
|
|
|
|
expect(result.answer.toLowerCase()).toInclude('renewable');
|
|
|
|
|
|
|
|
// Check if sources were extracted
|
|
|
|
if (result.sources.length > 0) {
|
|
|
|
console.log('- Example source:', result.sources[0]);
|
|
|
|
expect(result.sources[0]).toHaveProperty('url');
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
tap.test('Anthropic Research: should handle deep research queries', async () => {
|
|
|
|
const result = await anthropicProvider.research({
|
|
|
|
query: 'Explain the differences between REST and GraphQL APIs',
|
|
|
|
searchDepth: 'deep'
|
|
|
|
});
|
|
|
|
|
|
|
|
console.log('Anthropic Deep Research:');
|
|
|
|
console.log('- Answer length:', result.answer.length);
|
|
|
|
console.log('- Token usage:', result.metadata?.tokensUsed);
|
|
|
|
|
2025-10-03 12:50:42 +00:00
|
|
|
await saveResearchResult('deep_research_rest_vs_graphql', result);
|
|
|
|
|
2025-09-28 15:51:50 +00:00
|
|
|
expect(result.answer).toBeTruthy();
|
|
|
|
expect(result.answer.length).toBeGreaterThan(300);
|
|
|
|
expect(result.answer.toLowerCase()).toInclude('rest');
|
|
|
|
expect(result.answer.toLowerCase()).toInclude('graphql');
|
|
|
|
});
|
|
|
|
|
|
|
|
tap.test('Anthropic Research: should extract citations from response', async () => {
|
|
|
|
const result = await anthropicProvider.research({
|
|
|
|
query: 'What is Docker and how does containerization work?',
|
|
|
|
searchDepth: 'basic',
|
|
|
|
maxSources: 3
|
|
|
|
});
|
|
|
|
|
|
|
|
console.log('Anthropic Citation Extraction:');
|
|
|
|
console.log('- Sources found:', result.sources.length);
|
|
|
|
console.log('- Answer includes Docker:', result.answer.toLowerCase().includes('docker'));
|
|
|
|
|
2025-10-03 12:50:42 +00:00
|
|
|
await saveResearchResult('citation_extraction_docker', result);
|
|
|
|
|
2025-09-28 15:51:50 +00:00
|
|
|
expect(result.answer).toInclude('Docker');
|
|
|
|
|
|
|
|
// Check for URL extraction (both markdown and plain URLs)
|
|
|
|
const hasUrls = result.answer.includes('http') || result.sources.length > 0;
|
|
|
|
console.log('- Contains URLs or sources:', hasUrls);
|
|
|
|
});
|
|
|
|
|
|
|
|
tap.test('Anthropic Research: should use domain filtering when configured', async () => {
|
|
|
|
// Create a new provider with domain restrictions
|
|
|
|
const filteredProvider = new smartai.AnthropicProvider({
|
|
|
|
anthropicToken: await testQenv.getEnvVarOnDemand('ANTHROPIC_TOKEN'),
|
|
|
|
enableWebSearch: true,
|
|
|
|
searchDomainAllowList: ['wikipedia.org', 'docs.microsoft.com'],
|
|
|
|
searchDomainBlockList: ['reddit.com']
|
|
|
|
});
|
|
|
|
|
|
|
|
await filteredProvider.start();
|
|
|
|
|
|
|
|
const result = await filteredProvider.research({
|
|
|
|
query: 'What is JavaScript?',
|
|
|
|
searchDepth: 'basic'
|
|
|
|
});
|
|
|
|
|
|
|
|
console.log('Anthropic Domain Filtering Test:');
|
|
|
|
console.log('- Answer length:', result.answer.length);
|
|
|
|
console.log('- Applied domain filters (allow: wikipedia, docs.microsoft)');
|
|
|
|
|
2025-10-03 12:50:42 +00:00
|
|
|
await saveResearchResult('domain_filtering_javascript', result);
|
|
|
|
|
2025-09-28 15:51:50 +00:00
|
|
|
expect(result.answer).toBeTruthy();
|
|
|
|
expect(result.answer.toLowerCase()).toInclude('javascript');
|
|
|
|
|
|
|
|
await filteredProvider.stop();
|
|
|
|
});
|
|
|
|
|
|
|
|
tap.test('Anthropic Research: should handle errors gracefully', async () => {
|
|
|
|
let errorCaught = false;
|
|
|
|
|
|
|
|
try {
|
|
|
|
await anthropicProvider.research({
|
|
|
|
query: '', // Empty query
|
|
|
|
searchDepth: 'basic'
|
|
|
|
});
|
|
|
|
} catch (error) {
|
|
|
|
errorCaught = true;
|
|
|
|
console.log('Expected error for empty query:', error.message.substring(0, 100));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Anthropic might handle empty queries differently
|
|
|
|
console.log(`Empty query error test - Error caught: ${errorCaught}`);
|
|
|
|
});
|
|
|
|
|
|
|
|
tap.test('Anthropic Research: should handle different search depths', async () => {
|
|
|
|
// Test basic search depth
|
|
|
|
const basicResult = await anthropicProvider.research({
|
|
|
|
query: 'What is Python?',
|
|
|
|
searchDepth: 'basic'
|
|
|
|
});
|
|
|
|
|
|
|
|
// Test advanced search depth
|
|
|
|
const advancedResult = await anthropicProvider.research({
|
|
|
|
query: 'What is Python?',
|
|
|
|
searchDepth: 'advanced'
|
|
|
|
});
|
|
|
|
|
|
|
|
console.log('Anthropic Search Depth Comparison:');
|
|
|
|
console.log('- Basic answer length:', basicResult.answer.length);
|
|
|
|
console.log('- Advanced answer length:', advancedResult.answer.length);
|
|
|
|
console.log('- Basic tokens:', basicResult.metadata?.tokensUsed);
|
|
|
|
console.log('- Advanced tokens:', advancedResult.metadata?.tokensUsed);
|
|
|
|
|
2025-10-03 12:50:42 +00:00
|
|
|
await saveResearchResult('search_depth_python_basic', basicResult);
|
|
|
|
await saveResearchResult('search_depth_python_advanced', advancedResult);
|
|
|
|
|
2025-09-28 15:51:50 +00:00
|
|
|
expect(basicResult.answer).toBeTruthy();
|
|
|
|
expect(advancedResult.answer).toBeTruthy();
|
|
|
|
|
|
|
|
// Advanced search typically produces longer answers
|
|
|
|
// But this isn't guaranteed, so we just check they exist
|
|
|
|
expect(basicResult.answer.toLowerCase()).toInclude('python');
|
|
|
|
expect(advancedResult.answer.toLowerCase()).toInclude('python');
|
|
|
|
});
|
|
|
|
|
2025-10-03 12:50:42 +00:00
|
|
|
tap.test('Anthropic Research: ARM vs. Qualcomm comparison', async () => {
|
|
|
|
const result = await anthropicProvider.research({
|
|
|
|
query: 'Compare ARM and Qualcomm: their technologies, market positions, and recent developments in the mobile and computing sectors',
|
|
|
|
searchDepth: 'advanced',
|
|
|
|
includeWebSearch: true,
|
|
|
|
maxSources: 10
|
|
|
|
});
|
|
|
|
|
|
|
|
console.log('ARM vs. Qualcomm Research:');
|
|
|
|
console.log('- Answer length:', result.answer.length);
|
|
|
|
console.log('- Sources found:', result.sources.length);
|
|
|
|
console.log('- First 300 chars:', result.answer.substring(0, 300));
|
|
|
|
|
|
|
|
await saveResearchResult('arm_vs_qualcomm_comparison', result);
|
|
|
|
|
|
|
|
expect(result.answer).toBeTruthy();
|
|
|
|
expect(result.answer.length).toBeGreaterThan(500);
|
|
|
|
expect(result.answer.toLowerCase()).toInclude('arm');
|
|
|
|
expect(result.answer.toLowerCase()).toInclude('qualcomm');
|
|
|
|
expect(result.sources.length).toBeGreaterThan(0);
|
|
|
|
});
|
|
|
|
|
2025-09-28 15:51:50 +00:00
|
|
|
tap.test('Anthropic Research: should clean up provider', async () => {
|
|
|
|
await anthropicProvider.stop();
|
|
|
|
console.log('Anthropic research provider stopped successfully');
|
|
|
|
});
|
|
|
|
|
|
|
|
export default tap.start();
|