diff --git a/changelog.md b/changelog.md index 791e8bd..31f50eb 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,13 @@ # Changelog +## 2025-10-03 - 0.7.1 - fix(docs) +Add README image generation docs and .claude local settings + +- Add .claude/settings.local.json with permission allow-list for local assistant tooling and web search +- Update README provider capabilities table to include an Images column and reference gpt-image-1 +- Add Image Generation & Editing section with examples, options, and gpt-image-1 advantages +- Mark image generation support as implemented in the roadmap and remove duplicate entry + ## 2025-10-03 - 0.7.0 - feat(providers) Add research API and image generation/editing support; extend providers and tests diff --git a/readme.md b/readme.md index ea08544..254524f 100644 --- a/readme.md +++ b/readme.md @@ -45,15 +45,15 @@ const response = await ai.openaiProvider.chat({ Choose the right provider for your use case: -| Provider | Chat | Streaming | TTS | Vision | Documents | Research | Highlights | -|----------|:----:|:---------:|:---:|:------:|:---------:|:--------:|------------| -| **OpenAI** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | • GPT-4, DALL-E 3
• Industry standard
• Deep research API | -| **Anthropic** | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | • Claude 3 Opus
• Superior reasoning
• Web search API | -| **Ollama** | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | • 100% local
• Privacy-first
• No API costs | -| **XAI** | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | • Grok models
• Real-time data
• Uncensored | -| **Perplexity** | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | • Web-aware
• Research-focused
• Sonar Pro models | -| **Groq** | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | • 10x faster
• LPU inference
• Low latency | -| **Exo** | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | • Distributed
• P2P compute
• Decentralized | +| Provider | Chat | Streaming | TTS | Vision | Documents | Research | Images | Highlights | +|----------|:----:|:---------:|:---:|:------:|:---------:|:--------:|:------:|------------| +| **OpenAI** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | • gpt-image-1
• DALL-E 3
• Deep research API | +| **Anthropic** | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | • Claude 3 Opus
• Superior reasoning
• Web search API | +| **Ollama** | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | • 100% local
• Privacy-first
• No API costs | +| **XAI** | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | • Grok models
• Real-time data
• Uncensored | +| **Perplexity** | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | • Web-aware
• Research-focused
• Sonar Pro models | +| **Groq** | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | • 10x faster
• LPU inference
• Low latency | +| **Exo** | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | • Distributed
• P2P compute
• Decentralized | ## 🎮 Core Features @@ -216,6 +216,87 @@ const perplexityResearch = await ai.perplexityProvider.research({ - **Anthropic**: Web Search API with domain filtering - **Perplexity**: Sonar and Sonar Pro models with built-in citations +### 🎨 Image Generation & Editing + +Generate and edit images with OpenAI's cutting-edge models: + +```typescript +// Basic image generation with gpt-image-1 +const image = await ai.openaiProvider.imageGenerate({ + prompt: 'A futuristic robot assistant in a modern office, digital art', + model: 'gpt-image-1', + quality: 'high', + size: '1024x1024' +}); + +// Save the generated image +const imageBuffer = Buffer.from(image.images[0].b64_json!, 'base64'); +fs.writeFileSync('robot.png', imageBuffer); + +// Advanced: Transparent background with custom format +const logo = await ai.openaiProvider.imageGenerate({ + prompt: 'Minimalist mountain peak logo, geometric design', + model: 'gpt-image-1', + quality: 'high', + size: '1024x1024', + background: 'transparent', + outputFormat: 'png' +}); + +// WebP with compression for web use +const webImage = await ai.openaiProvider.imageGenerate({ + prompt: 'Product showcase: sleek smartphone on marble surface', + model: 'gpt-image-1', + quality: 'high', + size: '1536x1024', + outputFormat: 'webp', + outputCompression: 85 +}); + +// Superior text rendering (gpt-image-1's strength) +const signage = await ai.openaiProvider.imageGenerate({ + prompt: 'Vintage cafe sign saying "COFFEE & CODE" in hand-lettered typography', + model: 'gpt-image-1', + quality: 'high', + size: '1024x1024' +}); + +// Generate multiple variations at once +const variations = await ai.openaiProvider.imageGenerate({ + prompt: 'Abstract geometric pattern, colorful minimalist art', + model: 'gpt-image-1', + n: 3, + quality: 'medium', + size: '1024x1024' +}); + +// Edit an existing image +const editedImage = await ai.openaiProvider.imageEdit({ + image: originalImageBuffer, + prompt: 'Add sunglasses and change the background to a beach sunset', + model: 'gpt-image-1', + quality: 'high' +}); +``` + +**Image Generation Options:** +- `model`: 'gpt-image-1' | 'dall-e-3' | 'dall-e-2' +- `quality`: 'low' | 'medium' | 'high' | 'auto' +- `size`: Multiple aspect ratios up to 4096×4096 +- `background`: 'transparent' | 'opaque' | 'auto' +- `outputFormat`: 'png' | 'jpeg' | 'webp' +- `outputCompression`: 0-100 for webp/jpeg +- `moderation`: 'low' | 'auto' +- `n`: Number of images (1-10) + +**gpt-image-1 Advantages:** +- Superior text rendering in images +- Up to 4096×4096 resolution +- Transparent background support +- Advanced output formats (WebP with compression) +- Better prompt understanding +- Streaming support for progressive rendering + ### 🔄 Persistent Conversations Maintain context across interactions: @@ -501,8 +582,8 @@ export PERPLEXITY_API_KEY=pplx-... ## 📈 Roadmap - [x] Research & Web Search API +- [x] Image generation support (gpt-image-1, DALL-E 3, DALL-E 2) - [ ] Streaming function calls -- [ ] Image generation support - [ ] Voice input processing - [ ] Fine-tuning integration - [ ] Embedding support diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts index c5bbcb9..bc6cf6f 100644 --- a/ts/00_commitinfo_data.ts +++ b/ts/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: '@push.rocks/smartai', - version: '0.7.0', + version: '0.7.1', description: 'SmartAi is a versatile TypeScript library designed to facilitate integration and interaction with various AI models, offering functionalities for chat, audio generation, document processing, and vision tasks.' }