From e6346be884f93fb7f14057fc16a67cd5ab923d8d Mon Sep 17 00:00:00 2001 From: Juergen Kunz Date: Thu, 14 May 2026 11:34:04 +0000 Subject: [PATCH] feat(agent): add prompt caching options and cache token usage reporting --- changelog.md | 8 ++ package.json | 4 +- pnpm-lock.yaml | 136 ++++++++++++++++----------------- readme.md | 32 +++++++- test/test.ts | 65 ++++++++++++++++ ts/index.ts | 10 ++- ts/plugins.ts | 28 +++++-- ts/smartagent.classes.agent.ts | 57 +++++++++++++- ts/smartagent.interfaces.ts | 25 +++++- 9 files changed, 281 insertions(+), 84 deletions(-) diff --git a/changelog.md b/changelog.md index 113751a..b66eb95 100644 --- a/changelog.md +++ b/changelog.md @@ -3,6 +3,14 @@ ## Pending +### Features + +- add prompt caching options and cache token usage reporting (agent) + - adds sessionId and cache run options to configure provider-specific prompt caching defaults + - applies OpenAI cache provider options and Anthropic cache breakpoints automatically, with support to disable defaults + - extends usage reporting to include cacheReadTokens and cacheWriteTokens + - exports cache-related types and helpers and updates tests and README to cover the new behavior + ## 2026-05-11 - 3.1.1 ### Fixes diff --git a/package.json b/package.json index c82bcc2..9e31396 100644 --- a/package.json +++ b/package.json @@ -37,11 +37,11 @@ "@types/node": "^25.6.0" }, "dependencies": { - "@push.rocks/smartai": "^2.2.0", + "@push.rocks/smartai": "^2.3.0", "@push.rocks/smartfs": "^1.5.1", "@push.rocks/smartrequest": "^5.0.1", "@push.rocks/smartshell": "^3.3.8", - "ai": "^6.0.170", + "ai": "^6.0.182", "zod": "^4.4.1" }, "packageManager": "pnpm@10.28.2", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 6a97eef..3dc1b71 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9,8 +9,8 @@ importers: .: dependencies: '@push.rocks/smartai': - specifier: ^2.2.0 - version: 2.2.0(typescript@6.0.3)(ws@8.20.0)(zod@4.4.1) + specifier: ^2.3.0 + version: 2.3.0(typescript@6.0.3)(ws@8.20.0)(zod@4.4.1) '@push.rocks/smartfs': specifier: ^1.5.1 version: 1.5.1 @@ -21,8 +21,8 @@ importers: specifier: ^3.3.8 version: 3.3.8 ai: - specifier: ^6.0.170 - version: 6.0.175(zod@4.4.1) + specifier: ^6.0.182 + version: 6.0.182(zod@4.4.1) zod: specifier: ^4.4.1 version: 4.4.1 @@ -51,56 +51,56 @@ importers: packages: - '@ai-sdk/anthropic@3.0.75': - resolution: {integrity: sha512-5AV3CKwaOJFdGXhihVgvRLNrjwRn2Xmy71YygT8DYOA+5zTx93Seg2QSIS8b3tJxzZ7X4H84pEtrE8VZKBCZGA==} + '@ai-sdk/anthropic@3.0.77': + resolution: {integrity: sha512-ML8C2M1YvPA1ulEx4TiyF0k1xvC2ikEiPBIC1PPQ0a5xELUGrO2lAaEzsTEoJ+eCeDd8PSBuFJjs+r+9yIwQXA==} engines: {node: '>=18'} peerDependencies: zod: ^3.25.76 || ^4.1.8 - '@ai-sdk/gateway@3.0.110': - resolution: {integrity: sha512-sbv8+1L9/BRKydn8dMNwoMQKupA4iLJ9N+yvxgW6wMQ/94UepDf3FeYWMj/dLdzolAHZ6izRUP4s5WqQkmJ2Zg==} + '@ai-sdk/gateway@3.0.114': + resolution: {integrity: sha512-MqkZ5sd+qiq6RgIxELkoFQXg2/JwK+WCMaot7U+rtrZpWJl3fSyYvc28SC03b256o4F7OXjQtdjTqs81B2w+dA==} engines: {node: '>=18'} peerDependencies: zod: ^3.25.76 || ^4.1.8 - '@ai-sdk/google@3.0.68': - resolution: {integrity: sha512-bjQSuUmwStn7R0RDGl9I8kriY+xjmschzy5JN4eHPPEOdca2gS6zLc+oi8jhRiCqqROkk3U12Q9M8rmQw7gmbQ==} + '@ai-sdk/google@3.0.73': + resolution: {integrity: sha512-o2MuIeyvZrFIeIbnbA8Thrr63irdyUBh0uWBZ2lY6yFeXuE/tcwyXF74bDKS4KvTu84uFpQfpbS/LXHGKKXz+g==} engines: {node: '>=18'} peerDependencies: zod: ^3.25.76 || ^4.1.8 - '@ai-sdk/groq@3.0.38': - resolution: {integrity: sha512-mzn+KYeROVHFZnAr3qNX+eZ4Un4BFykOcs8XDH8LLzdfgrW6fxQkdiZyww0asYGjIYaa16dkyVtglp4GV6BeUQ==} + '@ai-sdk/groq@3.0.39': + resolution: {integrity: sha512-BZAr6DjCbzWQ0Qn1/TSsHo/bmCt4JaAMb4A7HCSUZBQCAcOjne/03D0sVjHnQhUC3TpwcmYiv7tHAviK7BluRw==} engines: {node: '>=18'} peerDependencies: zod: ^3.25.76 || ^4.1.8 - '@ai-sdk/mistral@3.0.35': - resolution: {integrity: sha512-8BCt8pOWjvfIFZOVFz+koFcbydVv7Q8WM24J0gVJWDw1eOEn3Muugw4py+TuaQc8KdjP7d1HR9E4gIMN55zBgQ==} + '@ai-sdk/mistral@3.0.36': + resolution: {integrity: sha512-FLIb2QdLraOgQP3puUybuFYWbtsB02YWQBTOJOk8heiEsdFW3YE0dfuzwtsvoF4FXlBnbYYMWu5jgOesthcmWg==} engines: {node: '>=18'} peerDependencies: zod: ^3.25.76 || ^4.1.8 - '@ai-sdk/openai-compatible@2.0.46': - resolution: {integrity: sha512-23ExGdy3p0Grfz3BAjCbIOc74TjQc5nHu72e0+kx3hshvScp32a4nnQlzzG4VT1bDZxa9yPNNUNyb5nN6vJHcQ==} + '@ai-sdk/openai-compatible@2.0.47': + resolution: {integrity: sha512-Enm5UlL0zUCrW3792opk5h7hRWxZOZzDe6eQYVFqX9LUOGGCe1h8MZWAGim765nwzgnjlpeYOsuzZmLtRsTPlg==} engines: {node: '>=18'} peerDependencies: zod: ^3.25.76 || ^4.1.8 - '@ai-sdk/openai@3.0.62': - resolution: {integrity: sha512-Oy74Bztik2X25wZD9HRd83BAXOKcRvrfgz9gvVGqKj68yegf447NiElPbB6TSVb8zyiY9wv1GSGywMCxnnoF9g==} + '@ai-sdk/openai@3.0.63': + resolution: {integrity: sha512-4yY/m8a57MNNVoJCsXuNblKf6BO4yuAuLKRX4tzSNffBEBSp1FlcWdPE0Z4FkqUeS0AJhYSSqp0GIiA/cIcDNA==} engines: {node: '>=18'} peerDependencies: zod: ^3.25.76 || ^4.1.8 - '@ai-sdk/perplexity@3.0.32': - resolution: {integrity: sha512-5kPyfDOHL72Mnz0unBiW3S/jHjNMo/frPW6dBTADX1SMbFB9Yvks4k1pjixIJc1m8YBulI5hV5yTvr7uUxpzxA==} + '@ai-sdk/perplexity@3.0.33': + resolution: {integrity: sha512-aNt6pTAzq+akadDXVdg2SjN2dODtaVlkKbw8/35c+sekr+Tx0sJwVqMR1udxrjLzhQvz8qtfsWRuz+hB9pmOnQ==} engines: {node: '>=18'} peerDependencies: zod: ^3.25.76 || ^4.1.8 - '@ai-sdk/provider-utils@4.0.26': - resolution: {integrity: sha512-CsKNLKsOpvPujRlIYvoz+Ybw+kGn7J4/fIZa/58+R7iWLLfwn6ifE2G6Yq8K9XvH/I/3bzaDAJ3NhRwEMsLBKQ==} + '@ai-sdk/provider-utils@4.0.27': + resolution: {integrity: sha512-ubkAJ+xODouwtmN1tYlvTPphH1hPOBfZaEQe8U7skGvFAnIRs9PPpsq57bC2+Ky/MB4yzhd6YOsxTAx9sGpazw==} engines: {node: '>=18'} peerDependencies: zod: ^3.25.76 || ^4.1.8 @@ -109,14 +109,14 @@ packages: resolution: {integrity: sha512-Q3BZ27qfpYqnCYGvE3vt+Qi6LGOF9R5Nmzn+9JoM1lCRsD9mYaIhfJLkSunN48nfGXJ6n+XNV0J/XVpqGQl7Dw==} engines: {node: '>=18'} - '@ai-sdk/xai@3.0.88': - resolution: {integrity: sha512-9jc7CT+6YxdPnkMF66jX/LmFoEcRI2Cjr5CTxZcrCA8f764NPF0mGtaNRv3M0OzYwBIppgZ3F0TNhADguHWLrg==} + '@ai-sdk/xai@3.0.89': + resolution: {integrity: sha512-ecFE4iQnWePrxPYuSUYCh8lpoKZ52J3jao5whDVC3+Z9Cu/XeyOe2oUGzsYSUPgbJNi/ZmD/KN69bHbUcAHFvw==} engines: {node: '>=18'} peerDependencies: zod: ^3.25.76 || ^4.1.8 - '@anthropic-ai/sdk@0.95.0': - resolution: {integrity: sha512-7It2B76OFJH9jC/a0TicXFMq0ZZM25ei+i/mK7JnsE1Ibmo0Yfkqm+DXOHeU/ZxxKwLLGPP6qaAvKmQmgV6XhA==} + '@anthropic-ai/sdk@0.95.2': + resolution: {integrity: sha512-Egddwo3sheo1PzUrMkZnH6VkQYwS0h/b/i8vSK8Ta9M45UQipAMeDFH57dYuDAfXMEUUGeKw6CMlremgMZgrSQ==} hasBin: true peerDependencies: zod: ^3.25.0 || ^4.0.0 @@ -960,8 +960,8 @@ packages: '@push.rocks/qenv@6.1.3': resolution: {integrity: sha512-+z2hsAU/7CIgpYLFqvda8cn9rUBMHqLdQLjsFfRn5jPoD7dJ5rFlpkbhfM4Ws8mHMniwWaxGKo+q/YBhtzRBLg==} - '@push.rocks/smartai@2.2.0': - resolution: {integrity: sha512-dB9DDguVbvKNfVjVPME2vZqAcZEZBehlv7VpFL+ECTf6jBf4tNUtCfw/a2YlF1uE7EL7rgHFttLbGXvUuaxk4A==} + '@push.rocks/smartai@2.3.0': + resolution: {integrity: sha512-i2Oz322qzU0ao/QJvpFNmqN8fkGbctImYZ6iDs9MYwR6KKbwoLDp1tZg1rM/nf1LuHOqjdojGcDNf0ycrTfHTw==} '@push.rocks/smartarchive@4.2.4': resolution: {integrity: sha512-uiqVAXPxmr8G5rv3uZvZFMOCt8l7cZC3nzvsy4YQqKf/VkPhKIEX+b7LkAeNlxPSYUiBQUkNRoawg9+5BaMcHg==} @@ -1725,8 +1725,8 @@ packages: resolution: {integrity: sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==} engines: {node: '>= 8.0.0'} - ai@6.0.175: - resolution: {integrity: sha512-6fFFHzbh6FIZnYc31V6osOxq25ABJYCShfG0O6ajHiA4FB/DgnPi1mP8cO5aAU3HNSbQHiMazdlh9bIsp97mVA==} + ai@6.0.182: + resolution: {integrity: sha512-ooJdziFjYrYRcsCx107roqA8gDTI3P82nUfroNWIhVvwrkYzEN3W1l50YK+XNqkUew8AiimaW0/SLBewRXMuHQ==} engines: {node: '>=18'} peerDependencies: zod: ^3.25.76 || ^4.1.8 @@ -2791,8 +2791,8 @@ packages: once@1.4.0: resolution: {integrity: sha1-WDsap3WWHUsROsF9nFC6753Xa9E=} - openai@6.36.0: - resolution: {integrity: sha512-Has2YbIusMq9wQEierFsgf9c783dy1y9arX459LmphNacEkkM5yxi2RIyXP0LmkOroQyW19iTwALHL8Yf26UKA==} + openai@6.37.0: + resolution: {integrity: sha512-0H5dEGFmmLv6KSd0W1w2nyL8WsLkX6yoLeQpU+dZAOuGcany5qkYQMmj35ZrKgb6yiyYqpUzFOpR8mZQkgqeEQ==} hasBin: true peerDependencies: ws: ^8.18.0 @@ -3446,56 +3446,56 @@ packages: snapshots: - '@ai-sdk/anthropic@3.0.75(zod@4.4.1)': + '@ai-sdk/anthropic@3.0.77(zod@4.4.1)': dependencies: '@ai-sdk/provider': 3.0.10 - '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1) + '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1) zod: 4.4.1 - '@ai-sdk/gateway@3.0.110(zod@4.4.1)': + '@ai-sdk/gateway@3.0.114(zod@4.4.1)': dependencies: '@ai-sdk/provider': 3.0.10 - '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1) + '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1) '@vercel/oidc': 3.2.0 zod: 4.4.1 - '@ai-sdk/google@3.0.68(zod@4.4.1)': + '@ai-sdk/google@3.0.73(zod@4.4.1)': dependencies: '@ai-sdk/provider': 3.0.10 - '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1) + '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1) zod: 4.4.1 - '@ai-sdk/groq@3.0.38(zod@4.4.1)': + '@ai-sdk/groq@3.0.39(zod@4.4.1)': dependencies: '@ai-sdk/provider': 3.0.10 - '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1) + '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1) zod: 4.4.1 - '@ai-sdk/mistral@3.0.35(zod@4.4.1)': + '@ai-sdk/mistral@3.0.36(zod@4.4.1)': dependencies: '@ai-sdk/provider': 3.0.10 - '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1) + '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1) zod: 4.4.1 - '@ai-sdk/openai-compatible@2.0.46(zod@4.4.1)': + '@ai-sdk/openai-compatible@2.0.47(zod@4.4.1)': dependencies: '@ai-sdk/provider': 3.0.10 - '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1) + '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1) zod: 4.4.1 - '@ai-sdk/openai@3.0.62(zod@4.4.1)': + '@ai-sdk/openai@3.0.63(zod@4.4.1)': dependencies: '@ai-sdk/provider': 3.0.10 - '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1) + '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1) zod: 4.4.1 - '@ai-sdk/perplexity@3.0.32(zod@4.4.1)': + '@ai-sdk/perplexity@3.0.33(zod@4.4.1)': dependencies: '@ai-sdk/provider': 3.0.10 - '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1) + '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1) zod: 4.4.1 - '@ai-sdk/provider-utils@4.0.26(zod@4.4.1)': + '@ai-sdk/provider-utils@4.0.27(zod@4.4.1)': dependencies: '@ai-sdk/provider': 3.0.10 '@standard-schema/spec': 1.1.0 @@ -3506,14 +3506,14 @@ snapshots: dependencies: json-schema: 0.4.0 - '@ai-sdk/xai@3.0.88(zod@4.4.1)': + '@ai-sdk/xai@3.0.89(zod@4.4.1)': dependencies: - '@ai-sdk/openai-compatible': 2.0.46(zod@4.4.1) + '@ai-sdk/openai-compatible': 2.0.47(zod@4.4.1) '@ai-sdk/provider': 3.0.10 - '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1) + '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1) zod: 4.4.1 - '@anthropic-ai/sdk@0.95.0(zod@4.4.1)': + '@anthropic-ai/sdk@0.95.2(zod@4.4.1)': dependencies: json-schema-to-ts: 3.1.1 standardwebhooks: 1.0.0 @@ -4933,20 +4933,20 @@ snapshots: '@push.rocks/smartlog': 3.2.2 '@push.rocks/smartpath': 6.0.0 - '@push.rocks/smartai@2.2.0(typescript@6.0.3)(ws@8.20.0)(zod@4.4.1)': + '@push.rocks/smartai@2.3.0(typescript@6.0.3)(ws@8.20.0)(zod@4.4.1)': dependencies: - '@ai-sdk/anthropic': 3.0.75(zod@4.4.1) - '@ai-sdk/google': 3.0.68(zod@4.4.1) - '@ai-sdk/groq': 3.0.38(zod@4.4.1) - '@ai-sdk/mistral': 3.0.35(zod@4.4.1) - '@ai-sdk/openai': 3.0.62(zod@4.4.1) - '@ai-sdk/perplexity': 3.0.32(zod@4.4.1) + '@ai-sdk/anthropic': 3.0.77(zod@4.4.1) + '@ai-sdk/google': 3.0.73(zod@4.4.1) + '@ai-sdk/groq': 3.0.39(zod@4.4.1) + '@ai-sdk/mistral': 3.0.36(zod@4.4.1) + '@ai-sdk/openai': 3.0.63(zod@4.4.1) + '@ai-sdk/perplexity': 3.0.33(zod@4.4.1) '@ai-sdk/provider': 3.0.10 - '@ai-sdk/xai': 3.0.88(zod@4.4.1) - '@anthropic-ai/sdk': 0.95.0(zod@4.4.1) + '@ai-sdk/xai': 3.0.89(zod@4.4.1) + '@anthropic-ai/sdk': 0.95.2(zod@4.4.1) '@push.rocks/smartpdf': 4.2.2(typescript@6.0.3) - ai: 6.0.175(zod@4.4.1) - openai: 6.36.0(ws@8.20.0)(zod@4.4.1) + ai: 6.0.182(zod@4.4.1) + openai: 6.37.0(ws@8.20.0)(zod@4.4.1) transitivePeerDependencies: - aws-crt - bare-abort-controller @@ -6196,11 +6196,11 @@ snapshots: dependencies: humanize-ms: 1.2.1 - ai@6.0.175(zod@4.4.1): + ai@6.0.182(zod@4.4.1): dependencies: - '@ai-sdk/gateway': 3.0.110(zod@4.4.1) + '@ai-sdk/gateway': 3.0.114(zod@4.4.1) '@ai-sdk/provider': 3.0.10 - '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1) + '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1) '@opentelemetry/api': 1.9.0 zod: 4.4.1 @@ -7502,7 +7502,7 @@ snapshots: dependencies: wrappy: 1.0.2 - openai@6.36.0(ws@8.20.0)(zod@4.4.1): + openai@6.37.0(ws@8.20.0)(zod@4.4.1): optionalDependencies: ws: 8.20.0 zod: 4.4.1 diff --git a/readme.md b/readme.md index e18dd65..cee0ee6 100644 --- a/readme.md +++ b/readme.md @@ -49,7 +49,7 @@ const result = await runAgent({ console.log(result.text); // "7 + 35 = 42" console.log(result.steps); // number of agentic steps taken -console.log(result.usage); // { promptTokens, completionTokens, totalTokens } +console.log(result.usage); // { inputTokens, outputTokens, totalTokens, cacheReadTokens, cacheWriteTokens } ``` ## Architecture @@ -92,6 +92,8 @@ The single entry point. Options: | `system` | `string` | `undefined` | System prompt | | `tools` | `ToolSet` | `{}` | Tools the agent can call | | `providerOptions` | `ProviderOptions` | `undefined` | Provider-specific AI SDK request options passed through to `streamText()` | +| `sessionId` | `string` | `undefined` | Stable session id used as provider prompt-cache affinity key where supported | +| `cache` | `'auto' \| false \| IAgentCacheOptions` | `'auto'` | Prompt-cache policy. Set `false` to disable SmartAgent cache defaults | | `maxSteps` | `number` | `20` | Max agentic steps before stopping | | `messages` | `ModelMessage[]` | `[]` | Conversation history (for multi-turn) | | `maxRetries` | `number` | `5` | Max retries on rate-limit/server errors | @@ -114,6 +116,8 @@ interface IAgentRunResult { inputTokens: number; outputTokens: number; totalTokens: number; + cacheReadTokens: number; + cacheWriteTokens: number; }; toolCalls: Array<{ toolName: string; @@ -126,7 +130,7 @@ interface IAgentRunResult { ### OpenAI Provider Options -Use `providerOptions` for provider-specific request settings such as GPT reasoning effort. SmartAgent forwards the object unchanged to AI SDK `streamText()`. +Use `providerOptions` for provider-specific request settings such as GPT reasoning effort. SmartAgent merges cache defaults first, then applies your `providerOptions` so explicit caller options win. ```typescript import { getModelSetup } from '@push.rocks/smartai'; @@ -157,6 +161,30 @@ const saved = result.toolCalls.some((call) => ); ``` +### Prompt Caching + +SmartAgent enables prompt-cache defaults by default: + +- Anthropic-compatible models get cache breakpoints on the first two system messages and the two most recent non-system messages. +- OpenAI models get `store: false` by default and, when `sessionId` is provided, `promptCacheKey: sessionId` with `promptCacheRetention: 'in_memory'`. +- Longer retention is opt-in. Use `cache: { retention: '24h' }` for OpenAI or `cache: { retention: '1h' }` for Anthropic. +- Set `cache: false` to disable these defaults for a run. + +```typescript +const result = await runAgent({ + model, + sessionId: 'stable-session-id', + prompt: 'Continue the task.', + tools, +}); + +const noCache = await runAgent({ + model, + prompt: 'One-off request.', + cache: false, +}); +``` + ### Completion Validation Use `validateCompletion` when a workflow must not finish unless a required side-effect happened. Return `void` to accept the run, or return a string to append that string as a new user message and continue. If retries are exhausted, `runAgent()` throws. diff --git a/test/test.ts b/test/test.ts index bd84d93..2fe6bdb 100644 --- a/test/test.ts +++ b/test/test.ts @@ -104,6 +104,71 @@ tap.test('runAgent should forward providerOptions to streamText', async () => { expect((model.doStreamCalls[0].providerOptions as any).openai.reasoningEffort).toEqual('xhigh'); }); +tap.test('runAgent should add OpenAI cache defaults when sessionId is provided', async () => { + const model = new MockLanguageModelV3({ + provider: 'openai', + modelId: 'gpt-5', + doStream: async () => createTextStreamResult('ok') as any, + }); + + const result = await smartagent.runAgent({ + model, + prompt: 'hello', + sessionId: 'session-123', + providerOptions: { + openai: { + reasoningEffort: 'high', + }, + } as any, + }); + + const openaiOptions = (model.doStreamCalls[0].providerOptions as any).openai; + + expect(result.text).toEqual('ok'); + expect(openaiOptions.store).toEqual(false); + expect(openaiOptions.promptCacheKey).toEqual('session-123'); + expect(openaiOptions.promptCacheRetention).toEqual('in_memory'); + expect(openaiOptions.reasoningEffort).toEqual('high'); +}); + +tap.test('runAgent should mark Anthropic prompt cache breakpoints by default', async () => { + const model = new MockLanguageModelV3({ + provider: 'anthropic', + modelId: 'claude-sonnet-4-5-20250929', + doStream: async () => createTextStreamResult('ok') as any, + }); + + const result = await smartagent.runAgent({ + model, + system: 'stable system prompt', + prompt: 'hello', + }); + const prompt = model.doStreamCalls[0].prompt as any[]; + const systemMessage = prompt.find((message) => message.role === 'system'); + const userMessage = prompt.find((message) => message.role === 'user'); + + expect(result.text).toEqual('ok'); + expect(systemMessage.providerOptions?.anthropic?.cacheControl?.type).toEqual('ephemeral'); + expect(userMessage.providerOptions?.anthropic?.cacheControl?.type).toEqual('ephemeral'); +}); + +tap.test('runAgent should allow cache defaults to be disabled', async () => { + const model = new MockLanguageModelV3({ + provider: 'openai', + modelId: 'gpt-5', + doStream: async () => createTextStreamResult('ok') as any, + }); + + await smartagent.runAgent({ + model, + prompt: 'hello', + sessionId: 'session-123', + cache: false, + }); + + expect(model.doStreamCalls[0].providerOptions).toBeUndefined(); +}); + tap.test('runAgent should return final tool call records', async () => { let streamCallCount = 0; const callbackToolCalls: Array<{ name: string; input: unknown }> = []; diff --git a/ts/index.ts b/ts/index.ts index 5795b11..23b8bbe 100644 --- a/ts/index.ts +++ b/ts/index.ts @@ -3,7 +3,15 @@ export { ToolRegistry } from './smartagent.classes.toolregistry.js'; export { truncateOutput } from './smartagent.utils.truncation.js'; export type { ITruncateResult } from './smartagent.utils.truncation.js'; export { ContextOverflowError } from './smartagent.interfaces.js'; -export type { IAgentRunOptions, IAgentRunResult, IAgentToolCallRecord, ProviderOptions } from './smartagent.interfaces.js'; +export type { + IAgentCacheOptions, + IAgentRunOptions, + IAgentRunResult, + IAgentToolCallRecord, + ProviderOptions, + TAgentCacheRetention, + TAgentCacheSetting, +} from './smartagent.interfaces.js'; // Re-export tool() and z so consumers can define tools without extra imports export { tool, jsonSchema } from '@push.rocks/smartai'; diff --git a/ts/plugins.ts b/ts/plugins.ts index c5d270a..28d1619 100644 --- a/ts/plugins.ts +++ b/ts/plugins.ts @@ -4,9 +4,9 @@ import * as path from 'path'; export { path }; // ai-sdk core -import { streamText, generateText, stepCountIs } from 'ai'; +import { streamText, generateText, stepCountIs, wrapLanguageModel } from 'ai'; -export { streamText, generateText, stepCountIs }; +export { streamText, generateText, stepCountIs, wrapLanguageModel }; export type { ModelMessage, @@ -15,11 +15,29 @@ export type { } from 'ai'; // @push.rocks/smartai -import { tool, jsonSchema } from '@push.rocks/smartai'; +import { + applySmartAiCacheProviderOptions, + createSmartAiCachingMiddleware, + jsonSchema, + resolveSmartAiCacheProvider, + tool, +} from '@push.rocks/smartai'; -export { tool, jsonSchema }; +export { + applySmartAiCacheProviderOptions, + createSmartAiCachingMiddleware, + resolveSmartAiCacheProvider, + tool, + jsonSchema, +}; -export type { LanguageModelV3, TSmartAiProviderOptions as ProviderOptions } from '@push.rocks/smartai'; +export type { + ISmartAiCacheOptions, + LanguageModelV3, + TSmartAiCacheRetention, + TSmartAiCacheSetting, + TSmartAiProviderOptions as ProviderOptions, +} from '@push.rocks/smartai'; // zod import { z } from 'zod'; diff --git a/ts/smartagent.classes.agent.ts b/ts/smartagent.classes.agent.ts index b010669..a2eb8cf 100644 --- a/ts/smartagent.classes.agent.ts +++ b/ts/smartagent.classes.agent.ts @@ -90,6 +90,28 @@ function errorToString(error: unknown): string { return String(error); } +function tokenTotal(tokenUsage: unknown): number { + if (typeof tokenUsage === 'number') return tokenUsage; + if (tokenUsage && typeof tokenUsage === 'object' && typeof (tokenUsage as any).total === 'number') { + return (tokenUsage as any).total; + } + return 0; +} + +function tokenCacheRead(tokenUsage: unknown): number { + if (tokenUsage && typeof tokenUsage === 'object' && typeof (tokenUsage as any).cacheRead === 'number') { + return (tokenUsage as any).cacheRead; + } + return 0; +} + +function tokenCacheWrite(tokenUsage: unknown): number { + if (tokenUsage && typeof tokenUsage === 'object' && typeof (tokenUsage as any).cacheWrite === 'number') { + return (tokenUsage as any).cacheWrite; + } + return 0; +} + function recordToolCall( toolCalls: IAgentToolCallRecord[], toolCallIndexes: Map, @@ -129,11 +151,34 @@ export async function runAgent(options: IAgentRunOptions): Promise(); const tools = options.tools ?? {}; + const cache = options.cache ?? 'auto'; + const configuredCacheProvider = typeof cache === 'object' ? cache.provider : undefined; + const messageCacheProvider = cache === false + ? undefined + : configuredCacheProvider ?? plugins.resolveSmartAiCacheProvider(options.model.provider, options.model.modelId); + const model = messageCacheProvider + ? plugins.wrapLanguageModel({ + model: options.model, + middleware: plugins.createSmartAiCachingMiddleware({ + ...(typeof cache === 'object' ? cache : {}), + provider: messageCacheProvider, + }), + }) as unknown as plugins.LanguageModelV3 + : options.model; + const providerOptions = plugins.applySmartAiCacheProviderOptions({ + provider: options.model.provider, + modelId: options.model.modelId, + providerOptions: options.providerOptions, + cache, + sessionId: options.sessionId, + }); // Add a no-op sink for repaired-but-unrecognised tool calls const allTools: plugins.ToolSet = { @@ -157,11 +202,11 @@ export async function runAgent(options: IAgentRunOptions): Promise { stepCount++; - totalInput += usage?.inputTokens ?? 0; - totalOutput += usage?.outputTokens ?? 0; + totalInput += tokenTotal((usage as any)?.inputTokens); + totalOutput += tokenTotal((usage as any)?.outputTokens); + totalCacheRead += tokenCacheRead((usage as any)?.inputTokens); + totalCacheWrite += tokenCacheWrite((usage as any)?.inputTokens); for (const toolCall of stepToolCalls) { recordToolCall(toolCalls, toolCallIndexes, toolCall); } @@ -251,6 +298,8 @@ export async function runAgent(options: IAgentRunOptions): Promise