From e6346be884f93fb7f14057fc16a67cd5ab923d8d Mon Sep 17 00:00:00 2001
From: Juergen Kunz <juergen@foss.global>
Date: Thu, 14 May 2026 11:34:04 +0000
Subject: [PATCH] feat(agent): add prompt caching options and cache token usage
 reporting

---
 changelog.md                   |   8 ++
 package.json                   |   4 +-
 pnpm-lock.yaml                 | 136 ++++++++++++++++-----------------
 readme.md                      |  32 +++++++-
 test/test.ts                   |  65 ++++++++++++++++
 ts/index.ts                    |  10 ++-
 ts/plugins.ts                  |  28 +++++--
 ts/smartagent.classes.agent.ts |  57 +++++++++++++-
 ts/smartagent.interfaces.ts    |  25 +++++-
 9 files changed, 281 insertions(+), 84 deletions(-)

diff --git a/changelog.md b/changelog.md
index 113751a..b66eb95 100644
--- a/changelog.md
+++ b/changelog.md
@@ -3,6 +3,14 @@
 ## Pending
 
 
+### Features
+
+- add prompt caching options and cache token usage reporting (agent)
+  - adds sessionId and cache run options to configure provider-specific prompt caching defaults
+  - applies OpenAI cache provider options and Anthropic cache breakpoints automatically, with support to disable defaults
+  - extends usage reporting to include cacheReadTokens and cacheWriteTokens
+  - exports cache-related types and helpers and updates tests and README to cover the new behavior
+
 ## 2026-05-11 - 3.1.1
 
 ### Fixes
diff --git a/package.json b/package.json
index c82bcc2..9e31396 100644
--- a/package.json
+++ b/package.json
@@ -37,11 +37,11 @@
     "@types/node": "^25.6.0"
   },
   "dependencies": {
-    "@push.rocks/smartai": "^2.2.0",
+    "@push.rocks/smartai": "^2.3.0",
     "@push.rocks/smartfs": "^1.5.1",
     "@push.rocks/smartrequest": "^5.0.1",
     "@push.rocks/smartshell": "^3.3.8",
-    "ai": "^6.0.170",
+    "ai": "^6.0.182",
     "zod": "^4.4.1"
   },
   "packageManager": "pnpm@10.28.2",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 6a97eef..3dc1b71 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -9,8 +9,8 @@ importers:
   .:
     dependencies:
       '@push.rocks/smartai':
-        specifier: ^2.2.0
-        version: 2.2.0(typescript@6.0.3)(ws@8.20.0)(zod@4.4.1)
+        specifier: ^2.3.0
+        version: 2.3.0(typescript@6.0.3)(ws@8.20.0)(zod@4.4.1)
       '@push.rocks/smartfs':
         specifier: ^1.5.1
         version: 1.5.1
@@ -21,8 +21,8 @@ importers:
         specifier: ^3.3.8
         version: 3.3.8
       ai:
-        specifier: ^6.0.170
-        version: 6.0.175(zod@4.4.1)
+        specifier: ^6.0.182
+        version: 6.0.182(zod@4.4.1)
       zod:
         specifier: ^4.4.1
         version: 4.4.1
@@ -51,56 +51,56 @@ importers:
 
 packages:
 
-  '@ai-sdk/anthropic@3.0.75':
-    resolution: {integrity: sha512-5AV3CKwaOJFdGXhihVgvRLNrjwRn2Xmy71YygT8DYOA+5zTx93Seg2QSIS8b3tJxzZ7X4H84pEtrE8VZKBCZGA==}
+  '@ai-sdk/anthropic@3.0.77':
+    resolution: {integrity: sha512-ML8C2M1YvPA1ulEx4TiyF0k1xvC2ikEiPBIC1PPQ0a5xELUGrO2lAaEzsTEoJ+eCeDd8PSBuFJjs+r+9yIwQXA==}
     engines: {node: '>=18'}
     peerDependencies:
       zod: ^3.25.76 || ^4.1.8
 
-  '@ai-sdk/gateway@3.0.110':
-    resolution: {integrity: sha512-sbv8+1L9/BRKydn8dMNwoMQKupA4iLJ9N+yvxgW6wMQ/94UepDf3FeYWMj/dLdzolAHZ6izRUP4s5WqQkmJ2Zg==}
+  '@ai-sdk/gateway@3.0.114':
+    resolution: {integrity: sha512-MqkZ5sd+qiq6RgIxELkoFQXg2/JwK+WCMaot7U+rtrZpWJl3fSyYvc28SC03b256o4F7OXjQtdjTqs81B2w+dA==}
     engines: {node: '>=18'}
     peerDependencies:
       zod: ^3.25.76 || ^4.1.8
 
-  '@ai-sdk/google@3.0.68':
-    resolution: {integrity: sha512-bjQSuUmwStn7R0RDGl9I8kriY+xjmschzy5JN4eHPPEOdca2gS6zLc+oi8jhRiCqqROkk3U12Q9M8rmQw7gmbQ==}
+  '@ai-sdk/google@3.0.73':
+    resolution: {integrity: sha512-o2MuIeyvZrFIeIbnbA8Thrr63irdyUBh0uWBZ2lY6yFeXuE/tcwyXF74bDKS4KvTu84uFpQfpbS/LXHGKKXz+g==}
     engines: {node: '>=18'}
     peerDependencies:
       zod: ^3.25.76 || ^4.1.8
 
-  '@ai-sdk/groq@3.0.38':
-    resolution: {integrity: sha512-mzn+KYeROVHFZnAr3qNX+eZ4Un4BFykOcs8XDH8LLzdfgrW6fxQkdiZyww0asYGjIYaa16dkyVtglp4GV6BeUQ==}
+  '@ai-sdk/groq@3.0.39':
+    resolution: {integrity: sha512-BZAr6DjCbzWQ0Qn1/TSsHo/bmCt4JaAMb4A7HCSUZBQCAcOjne/03D0sVjHnQhUC3TpwcmYiv7tHAviK7BluRw==}
     engines: {node: '>=18'}
     peerDependencies:
       zod: ^3.25.76 || ^4.1.8
 
-  '@ai-sdk/mistral@3.0.35':
-    resolution: {integrity: sha512-8BCt8pOWjvfIFZOVFz+koFcbydVv7Q8WM24J0gVJWDw1eOEn3Muugw4py+TuaQc8KdjP7d1HR9E4gIMN55zBgQ==}
+  '@ai-sdk/mistral@3.0.36':
+    resolution: {integrity: sha512-FLIb2QdLraOgQP3puUybuFYWbtsB02YWQBTOJOk8heiEsdFW3YE0dfuzwtsvoF4FXlBnbYYMWu5jgOesthcmWg==}
     engines: {node: '>=18'}
     peerDependencies:
       zod: ^3.25.76 || ^4.1.8
 
-  '@ai-sdk/openai-compatible@2.0.46':
-    resolution: {integrity: sha512-23ExGdy3p0Grfz3BAjCbIOc74TjQc5nHu72e0+kx3hshvScp32a4nnQlzzG4VT1bDZxa9yPNNUNyb5nN6vJHcQ==}
+  '@ai-sdk/openai-compatible@2.0.47':
+    resolution: {integrity: sha512-Enm5UlL0zUCrW3792opk5h7hRWxZOZzDe6eQYVFqX9LUOGGCe1h8MZWAGim765nwzgnjlpeYOsuzZmLtRsTPlg==}
     engines: {node: '>=18'}
     peerDependencies:
       zod: ^3.25.76 || ^4.1.8
 
-  '@ai-sdk/openai@3.0.62':
-    resolution: {integrity: sha512-Oy74Bztik2X25wZD9HRd83BAXOKcRvrfgz9gvVGqKj68yegf447NiElPbB6TSVb8zyiY9wv1GSGywMCxnnoF9g==}
+  '@ai-sdk/openai@3.0.63':
+    resolution: {integrity: sha512-4yY/m8a57MNNVoJCsXuNblKf6BO4yuAuLKRX4tzSNffBEBSp1FlcWdPE0Z4FkqUeS0AJhYSSqp0GIiA/cIcDNA==}
     engines: {node: '>=18'}
     peerDependencies:
       zod: ^3.25.76 || ^4.1.8
 
-  '@ai-sdk/perplexity@3.0.32':
-    resolution: {integrity: sha512-5kPyfDOHL72Mnz0unBiW3S/jHjNMo/frPW6dBTADX1SMbFB9Yvks4k1pjixIJc1m8YBulI5hV5yTvr7uUxpzxA==}
+  '@ai-sdk/perplexity@3.0.33':
+    resolution: {integrity: sha512-aNt6pTAzq+akadDXVdg2SjN2dODtaVlkKbw8/35c+sekr+Tx0sJwVqMR1udxrjLzhQvz8qtfsWRuz+hB9pmOnQ==}
     engines: {node: '>=18'}
     peerDependencies:
       zod: ^3.25.76 || ^4.1.8
 
-  '@ai-sdk/provider-utils@4.0.26':
-    resolution: {integrity: sha512-CsKNLKsOpvPujRlIYvoz+Ybw+kGn7J4/fIZa/58+R7iWLLfwn6ifE2G6Yq8K9XvH/I/3bzaDAJ3NhRwEMsLBKQ==}
+  '@ai-sdk/provider-utils@4.0.27':
+    resolution: {integrity: sha512-ubkAJ+xODouwtmN1tYlvTPphH1hPOBfZaEQe8U7skGvFAnIRs9PPpsq57bC2+Ky/MB4yzhd6YOsxTAx9sGpazw==}
     engines: {node: '>=18'}
     peerDependencies:
       zod: ^3.25.76 || ^4.1.8
@@ -109,14 +109,14 @@ packages:
     resolution: {integrity: sha512-Q3BZ27qfpYqnCYGvE3vt+Qi6LGOF9R5Nmzn+9JoM1lCRsD9mYaIhfJLkSunN48nfGXJ6n+XNV0J/XVpqGQl7Dw==}
     engines: {node: '>=18'}
 
-  '@ai-sdk/xai@3.0.88':
-    resolution: {integrity: sha512-9jc7CT+6YxdPnkMF66jX/LmFoEcRI2Cjr5CTxZcrCA8f764NPF0mGtaNRv3M0OzYwBIppgZ3F0TNhADguHWLrg==}
+  '@ai-sdk/xai@3.0.89':
+    resolution: {integrity: sha512-ecFE4iQnWePrxPYuSUYCh8lpoKZ52J3jao5whDVC3+Z9Cu/XeyOe2oUGzsYSUPgbJNi/ZmD/KN69bHbUcAHFvw==}
     engines: {node: '>=18'}
     peerDependencies:
       zod: ^3.25.76 || ^4.1.8
 
-  '@anthropic-ai/sdk@0.95.0':
-    resolution: {integrity: sha512-7It2B76OFJH9jC/a0TicXFMq0ZZM25ei+i/mK7JnsE1Ibmo0Yfkqm+DXOHeU/ZxxKwLLGPP6qaAvKmQmgV6XhA==}
+  '@anthropic-ai/sdk@0.95.2':
+    resolution: {integrity: sha512-Egddwo3sheo1PzUrMkZnH6VkQYwS0h/b/i8vSK8Ta9M45UQipAMeDFH57dYuDAfXMEUUGeKw6CMlremgMZgrSQ==}
     hasBin: true
     peerDependencies:
       zod: ^3.25.0 || ^4.0.0
@@ -960,8 +960,8 @@ packages:
   '@push.rocks/qenv@6.1.3':
     resolution: {integrity: sha512-+z2hsAU/7CIgpYLFqvda8cn9rUBMHqLdQLjsFfRn5jPoD7dJ5rFlpkbhfM4Ws8mHMniwWaxGKo+q/YBhtzRBLg==}
 
-  '@push.rocks/smartai@2.2.0':
-    resolution: {integrity: sha512-dB9DDguVbvKNfVjVPME2vZqAcZEZBehlv7VpFL+ECTf6jBf4tNUtCfw/a2YlF1uE7EL7rgHFttLbGXvUuaxk4A==}
+  '@push.rocks/smartai@2.3.0':
+    resolution: {integrity: sha512-i2Oz322qzU0ao/QJvpFNmqN8fkGbctImYZ6iDs9MYwR6KKbwoLDp1tZg1rM/nf1LuHOqjdojGcDNf0ycrTfHTw==}
 
   '@push.rocks/smartarchive@4.2.4':
     resolution: {integrity: sha512-uiqVAXPxmr8G5rv3uZvZFMOCt8l7cZC3nzvsy4YQqKf/VkPhKIEX+b7LkAeNlxPSYUiBQUkNRoawg9+5BaMcHg==}
@@ -1725,8 +1725,8 @@ packages:
     resolution: {integrity: sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==}
     engines: {node: '>= 8.0.0'}
 
-  ai@6.0.175:
-    resolution: {integrity: sha512-6fFFHzbh6FIZnYc31V6osOxq25ABJYCShfG0O6ajHiA4FB/DgnPi1mP8cO5aAU3HNSbQHiMazdlh9bIsp97mVA==}
+  ai@6.0.182:
+    resolution: {integrity: sha512-ooJdziFjYrYRcsCx107roqA8gDTI3P82nUfroNWIhVvwrkYzEN3W1l50YK+XNqkUew8AiimaW0/SLBewRXMuHQ==}
     engines: {node: '>=18'}
     peerDependencies:
       zod: ^3.25.76 || ^4.1.8
@@ -2791,8 +2791,8 @@ packages:
   once@1.4.0:
     resolution: {integrity: sha1-WDsap3WWHUsROsF9nFC6753Xa9E=}
 
-  openai@6.36.0:
-    resolution: {integrity: sha512-Has2YbIusMq9wQEierFsgf9c783dy1y9arX459LmphNacEkkM5yxi2RIyXP0LmkOroQyW19iTwALHL8Yf26UKA==}
+  openai@6.37.0:
+    resolution: {integrity: sha512-0H5dEGFmmLv6KSd0W1w2nyL8WsLkX6yoLeQpU+dZAOuGcany5qkYQMmj35ZrKgb6yiyYqpUzFOpR8mZQkgqeEQ==}
     hasBin: true
     peerDependencies:
       ws: ^8.18.0
@@ -3446,56 +3446,56 @@ packages:
 
 snapshots:
 
-  '@ai-sdk/anthropic@3.0.75(zod@4.4.1)':
+  '@ai-sdk/anthropic@3.0.77(zod@4.4.1)':
     dependencies:
       '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
       zod: 4.4.1
 
-  '@ai-sdk/gateway@3.0.110(zod@4.4.1)':
+  '@ai-sdk/gateway@3.0.114(zod@4.4.1)':
     dependencies:
       '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
       '@vercel/oidc': 3.2.0
       zod: 4.4.1
 
-  '@ai-sdk/google@3.0.68(zod@4.4.1)':
+  '@ai-sdk/google@3.0.73(zod@4.4.1)':
     dependencies:
       '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
       zod: 4.4.1
 
-  '@ai-sdk/groq@3.0.38(zod@4.4.1)':
+  '@ai-sdk/groq@3.0.39(zod@4.4.1)':
     dependencies:
       '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
       zod: 4.4.1
 
-  '@ai-sdk/mistral@3.0.35(zod@4.4.1)':
+  '@ai-sdk/mistral@3.0.36(zod@4.4.1)':
     dependencies:
       '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
       zod: 4.4.1
 
-  '@ai-sdk/openai-compatible@2.0.46(zod@4.4.1)':
+  '@ai-sdk/openai-compatible@2.0.47(zod@4.4.1)':
     dependencies:
       '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
       zod: 4.4.1
 
-  '@ai-sdk/openai@3.0.62(zod@4.4.1)':
+  '@ai-sdk/openai@3.0.63(zod@4.4.1)':
     dependencies:
       '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
       zod: 4.4.1
 
-  '@ai-sdk/perplexity@3.0.32(zod@4.4.1)':
+  '@ai-sdk/perplexity@3.0.33(zod@4.4.1)':
     dependencies:
       '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
       zod: 4.4.1
 
-  '@ai-sdk/provider-utils@4.0.26(zod@4.4.1)':
+  '@ai-sdk/provider-utils@4.0.27(zod@4.4.1)':
     dependencies:
       '@ai-sdk/provider': 3.0.10
       '@standard-schema/spec': 1.1.0
@@ -3506,14 +3506,14 @@ snapshots:
     dependencies:
       json-schema: 0.4.0
 
-  '@ai-sdk/xai@3.0.88(zod@4.4.1)':
+  '@ai-sdk/xai@3.0.89(zod@4.4.1)':
     dependencies:
-      '@ai-sdk/openai-compatible': 2.0.46(zod@4.4.1)
+      '@ai-sdk/openai-compatible': 2.0.47(zod@4.4.1)
       '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
       zod: 4.4.1
 
-  '@anthropic-ai/sdk@0.95.0(zod@4.4.1)':
+  '@anthropic-ai/sdk@0.95.2(zod@4.4.1)':
     dependencies:
       json-schema-to-ts: 3.1.1
       standardwebhooks: 1.0.0
@@ -4933,20 +4933,20 @@ snapshots:
       '@push.rocks/smartlog': 3.2.2
       '@push.rocks/smartpath': 6.0.0
 
-  '@push.rocks/smartai@2.2.0(typescript@6.0.3)(ws@8.20.0)(zod@4.4.1)':
+  '@push.rocks/smartai@2.3.0(typescript@6.0.3)(ws@8.20.0)(zod@4.4.1)':
     dependencies:
-      '@ai-sdk/anthropic': 3.0.75(zod@4.4.1)
-      '@ai-sdk/google': 3.0.68(zod@4.4.1)
-      '@ai-sdk/groq': 3.0.38(zod@4.4.1)
-      '@ai-sdk/mistral': 3.0.35(zod@4.4.1)
-      '@ai-sdk/openai': 3.0.62(zod@4.4.1)
-      '@ai-sdk/perplexity': 3.0.32(zod@4.4.1)
+      '@ai-sdk/anthropic': 3.0.77(zod@4.4.1)
+      '@ai-sdk/google': 3.0.73(zod@4.4.1)
+      '@ai-sdk/groq': 3.0.39(zod@4.4.1)
+      '@ai-sdk/mistral': 3.0.36(zod@4.4.1)
+      '@ai-sdk/openai': 3.0.63(zod@4.4.1)
+      '@ai-sdk/perplexity': 3.0.33(zod@4.4.1)
       '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/xai': 3.0.88(zod@4.4.1)
-      '@anthropic-ai/sdk': 0.95.0(zod@4.4.1)
+      '@ai-sdk/xai': 3.0.89(zod@4.4.1)
+      '@anthropic-ai/sdk': 0.95.2(zod@4.4.1)
       '@push.rocks/smartpdf': 4.2.2(typescript@6.0.3)
-      ai: 6.0.175(zod@4.4.1)
-      openai: 6.36.0(ws@8.20.0)(zod@4.4.1)
+      ai: 6.0.182(zod@4.4.1)
+      openai: 6.37.0(ws@8.20.0)(zod@4.4.1)
     transitivePeerDependencies:
       - aws-crt
       - bare-abort-controller
@@ -6196,11 +6196,11 @@ snapshots:
     dependencies:
       humanize-ms: 1.2.1
 
-  ai@6.0.175(zod@4.4.1):
+  ai@6.0.182(zod@4.4.1):
     dependencies:
-      '@ai-sdk/gateway': 3.0.110(zod@4.4.1)
+      '@ai-sdk/gateway': 3.0.114(zod@4.4.1)
       '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
       '@opentelemetry/api': 1.9.0
       zod: 4.4.1
 
@@ -7502,7 +7502,7 @@ snapshots:
     dependencies:
       wrappy: 1.0.2
 
-  openai@6.36.0(ws@8.20.0)(zod@4.4.1):
+  openai@6.37.0(ws@8.20.0)(zod@4.4.1):
     optionalDependencies:
       ws: 8.20.0
       zod: 4.4.1
diff --git a/readme.md b/readme.md
index e18dd65..cee0ee6 100644
--- a/readme.md
+++ b/readme.md
@@ -49,7 +49,7 @@ const result = await runAgent({
 
 console.log(result.text);    // "7 + 35 = 42"
 console.log(result.steps);   // number of agentic steps taken
-console.log(result.usage);   // { promptTokens, completionTokens, totalTokens }
+console.log(result.usage);   // { inputTokens, outputTokens, totalTokens, cacheReadTokens, cacheWriteTokens }
 ```
 
 ## Architecture
@@ -92,6 +92,8 @@ The single entry point. Options:
 | `system` | `string` | `undefined` | System prompt |
 | `tools` | `ToolSet` | `{}` | Tools the agent can call |
 | `providerOptions` | `ProviderOptions` | `undefined` | Provider-specific AI SDK request options passed through to `streamText()` |
+| `sessionId` | `string` | `undefined` | Stable session id used as provider prompt-cache affinity key where supported |
+| `cache` | `'auto' \| false \| IAgentCacheOptions` | `'auto'` | Prompt-cache policy. Set `false` to disable SmartAgent cache defaults |
 | `maxSteps` | `number` | `20` | Max agentic steps before stopping |
 | `messages` | `ModelMessage[]` | `[]` | Conversation history (for multi-turn) |
 | `maxRetries` | `number` | `5` | Max retries on rate-limit/server errors |
@@ -114,6 +116,8 @@ interface IAgentRunResult {
     inputTokens: number;
     outputTokens: number;
     totalTokens: number;
+    cacheReadTokens: number;
+    cacheWriteTokens: number;
   };
   toolCalls: Array<{
     toolName: string;
@@ -126,7 +130,7 @@ interface IAgentRunResult {
 
 ### OpenAI Provider Options
 
-Use `providerOptions` for provider-specific request settings such as GPT reasoning effort. SmartAgent forwards the object unchanged to AI SDK `streamText()`.
+Use `providerOptions` for provider-specific request settings such as GPT reasoning effort. SmartAgent merges cache defaults first, then applies your `providerOptions` so explicit caller options win.
 
 ```typescript
 import { getModelSetup } from '@push.rocks/smartai';
@@ -157,6 +161,30 @@ const saved = result.toolCalls.some((call) =>
 );
 ```
 
+### Prompt Caching
+
+SmartAgent enables prompt-cache defaults by default:
+
+- Anthropic-compatible models get cache breakpoints on the first two system messages and the two most recent non-system messages.
+- OpenAI models get `store: false` by default and, when `sessionId` is provided, `promptCacheKey: sessionId` with `promptCacheRetention: 'in_memory'`.
+- Longer retention is opt-in. Use `cache: { retention: '24h' }` for OpenAI or `cache: { retention: '1h' }` for Anthropic.
+- Set `cache: false` to disable these defaults for a run.
+
+```typescript
+const result = await runAgent({
+  model,
+  sessionId: 'stable-session-id',
+  prompt: 'Continue the task.',
+  tools,
+});
+
+const noCache = await runAgent({
+  model,
+  prompt: 'One-off request.',
+  cache: false,
+});
+```
+
 ### Completion Validation
 
 Use `validateCompletion` when a workflow must not finish unless a required side-effect happened. Return `void` to accept the run, or return a string to append that string as a new user message and continue. If retries are exhausted, `runAgent()` throws.
diff --git a/test/test.ts b/test/test.ts
index bd84d93..2fe6bdb 100644
--- a/test/test.ts
+++ b/test/test.ts
@@ -104,6 +104,71 @@ tap.test('runAgent should forward providerOptions to streamText', async () => {
   expect((model.doStreamCalls[0].providerOptions as any).openai.reasoningEffort).toEqual('xhigh');
 });
 
+tap.test('runAgent should add OpenAI cache defaults when sessionId is provided', async () => {
+  const model = new MockLanguageModelV3({
+    provider: 'openai',
+    modelId: 'gpt-5',
+    doStream: async () => createTextStreamResult('ok') as any,
+  });
+
+  const result = await smartagent.runAgent({
+    model,
+    prompt: 'hello',
+    sessionId: 'session-123',
+    providerOptions: {
+      openai: {
+        reasoningEffort: 'high',
+      },
+    } as any,
+  });
+
+  const openaiOptions = (model.doStreamCalls[0].providerOptions as any).openai;
+
+  expect(result.text).toEqual('ok');
+  expect(openaiOptions.store).toEqual(false);
+  expect(openaiOptions.promptCacheKey).toEqual('session-123');
+  expect(openaiOptions.promptCacheRetention).toEqual('in_memory');
+  expect(openaiOptions.reasoningEffort).toEqual('high');
+});
+
+tap.test('runAgent should mark Anthropic prompt cache breakpoints by default', async () => {
+  const model = new MockLanguageModelV3({
+    provider: 'anthropic',
+    modelId: 'claude-sonnet-4-5-20250929',
+    doStream: async () => createTextStreamResult('ok') as any,
+  });
+
+  const result = await smartagent.runAgent({
+    model,
+    system: 'stable system prompt',
+    prompt: 'hello',
+  });
+  const prompt = model.doStreamCalls[0].prompt as any[];
+  const systemMessage = prompt.find((message) => message.role === 'system');
+  const userMessage = prompt.find((message) => message.role === 'user');
+
+  expect(result.text).toEqual('ok');
+  expect(systemMessage.providerOptions?.anthropic?.cacheControl?.type).toEqual('ephemeral');
+  expect(userMessage.providerOptions?.anthropic?.cacheControl?.type).toEqual('ephemeral');
+});
+
+tap.test('runAgent should allow cache defaults to be disabled', async () => {
+  const model = new MockLanguageModelV3({
+    provider: 'openai',
+    modelId: 'gpt-5',
+    doStream: async () => createTextStreamResult('ok') as any,
+  });
+
+  await smartagent.runAgent({
+    model,
+    prompt: 'hello',
+    sessionId: 'session-123',
+    cache: false,
+  });
+
+  expect(model.doStreamCalls[0].providerOptions).toBeUndefined();
+});
+
 tap.test('runAgent should return final tool call records', async () => {
   let streamCallCount = 0;
   const callbackToolCalls: Array<{ name: string; input: unknown }> = [];
diff --git a/ts/index.ts b/ts/index.ts
index 5795b11..23b8bbe 100644
--- a/ts/index.ts
+++ b/ts/index.ts
@@ -3,7 +3,15 @@ export { ToolRegistry } from './smartagent.classes.toolregistry.js';
 export { truncateOutput } from './smartagent.utils.truncation.js';
 export type { ITruncateResult } from './smartagent.utils.truncation.js';
 export { ContextOverflowError } from './smartagent.interfaces.js';
-export type { IAgentRunOptions, IAgentRunResult, IAgentToolCallRecord, ProviderOptions } from './smartagent.interfaces.js';
+export type {
+  IAgentCacheOptions,
+  IAgentRunOptions,
+  IAgentRunResult,
+  IAgentToolCallRecord,
+  ProviderOptions,
+  TAgentCacheRetention,
+  TAgentCacheSetting,
+} from './smartagent.interfaces.js';
 
 // Re-export tool() and z so consumers can define tools without extra imports
 export { tool, jsonSchema } from '@push.rocks/smartai';
diff --git a/ts/plugins.ts b/ts/plugins.ts
index c5d270a..28d1619 100644
--- a/ts/plugins.ts
+++ b/ts/plugins.ts
@@ -4,9 +4,9 @@ import * as path from 'path';
 export { path };
 
 // ai-sdk core
-import { streamText, generateText, stepCountIs } from 'ai';
+import { streamText, generateText, stepCountIs, wrapLanguageModel } from 'ai';
 
-export { streamText, generateText, stepCountIs };
+export { streamText, generateText, stepCountIs, wrapLanguageModel };
 
 export type {
   ModelMessage,
@@ -15,11 +15,29 @@ export type {
 } from 'ai';
 
 // @push.rocks/smartai
-import { tool, jsonSchema } from '@push.rocks/smartai';
+import {
+  applySmartAiCacheProviderOptions,
+  createSmartAiCachingMiddleware,
+  jsonSchema,
+  resolveSmartAiCacheProvider,
+  tool,
+} from '@push.rocks/smartai';
 
-export { tool, jsonSchema };
+export {
+  applySmartAiCacheProviderOptions,
+  createSmartAiCachingMiddleware,
+  resolveSmartAiCacheProvider,
+  tool,
+  jsonSchema,
+};
 
-export type { LanguageModelV3, TSmartAiProviderOptions as ProviderOptions } from '@push.rocks/smartai';
+export type {
+  ISmartAiCacheOptions,
+  LanguageModelV3,
+  TSmartAiCacheRetention,
+  TSmartAiCacheSetting,
+  TSmartAiProviderOptions as ProviderOptions,
+} from '@push.rocks/smartai';
 
 // zod
 import { z } from 'zod';
diff --git a/ts/smartagent.classes.agent.ts b/ts/smartagent.classes.agent.ts
index b010669..a2eb8cf 100644
--- a/ts/smartagent.classes.agent.ts
+++ b/ts/smartagent.classes.agent.ts
@@ -90,6 +90,28 @@ function errorToString(error: unknown): string {
   return String(error);
 }
 
+function tokenTotal(tokenUsage: unknown): number {
+  if (typeof tokenUsage === 'number') return tokenUsage;
+  if (tokenUsage && typeof tokenUsage === 'object' && typeof (tokenUsage as any).total === 'number') {
+    return (tokenUsage as any).total;
+  }
+  return 0;
+}
+
+function tokenCacheRead(tokenUsage: unknown): number {
+  if (tokenUsage && typeof tokenUsage === 'object' && typeof (tokenUsage as any).cacheRead === 'number') {
+    return (tokenUsage as any).cacheRead;
+  }
+  return 0;
+}
+
+function tokenCacheWrite(tokenUsage: unknown): number {
+  if (tokenUsage && typeof tokenUsage === 'object' && typeof (tokenUsage as any).cacheWrite === 'number') {
+    return (tokenUsage as any).cacheWrite;
+  }
+  return 0;
+}
+
 function recordToolCall(
   toolCalls: IAgentToolCallRecord[],
   toolCallIndexes: Map<string, number>,
@@ -129,11 +151,34 @@ export async function runAgent(options: IAgentRunOptions): Promise<IAgentRunResu
   let attempt = 0;
   let totalInput = 0;
   let totalOutput = 0;
+  let totalCacheRead = 0;
+  let totalCacheWrite = 0;
   let validationRetries = 0;
   const toolCalls: IAgentToolCallRecord[] = [];
   const toolCallIndexes = new Map<string, number>();
 
   const tools = options.tools ?? {};
+  const cache = options.cache ?? 'auto';
+  const configuredCacheProvider = typeof cache === 'object' ? cache.provider : undefined;
+  const messageCacheProvider = cache === false
+    ? undefined
+    : configuredCacheProvider ?? plugins.resolveSmartAiCacheProvider(options.model.provider, options.model.modelId);
+  const model = messageCacheProvider
+    ? plugins.wrapLanguageModel({
+        model: options.model,
+        middleware: plugins.createSmartAiCachingMiddleware({
+          ...(typeof cache === 'object' ? cache : {}),
+          provider: messageCacheProvider,
+        }),
+      }) as unknown as plugins.LanguageModelV3
+    : options.model;
+  const providerOptions = plugins.applySmartAiCacheProviderOptions({
+    provider: options.model.provider,
+    modelId: options.model.modelId,
+    providerOptions: options.providerOptions,
+    cache,
+    sessionId: options.sessionId,
+  });
 
   // Add a no-op sink for repaired-but-unrecognised tool calls
   const allTools: plugins.ToolSet = {
@@ -157,11 +202,11 @@ export async function runAgent(options: IAgentRunOptions): Promise<IAgentRunResu
   while (true) {
     try {
       const result = plugins.streamText({
-        model: options.model,
+        model,
         system: options.system,
         messages,
         tools: allTools,
-        providerOptions: options.providerOptions,
+        providerOptions,
         stopWhen: plugins.stepCountIs(options.maxSteps ?? 20),
         maxRetries: 0, // handled manually below
         abortSignal: options.abort,
@@ -218,8 +263,10 @@ export async function runAgent(options: IAgentRunOptions): Promise<IAgentRunResu
 
         onStepFinish: ({ usage, toolCalls: stepToolCalls, toolResults, content }) => {
           stepCount++;
-          totalInput += usage?.inputTokens ?? 0;
-          totalOutput += usage?.outputTokens ?? 0;
+          totalInput += tokenTotal((usage as any)?.inputTokens);
+          totalOutput += tokenTotal((usage as any)?.outputTokens);
+          totalCacheRead += tokenCacheRead((usage as any)?.inputTokens);
+          totalCacheWrite += tokenCacheWrite((usage as any)?.inputTokens);
           for (const toolCall of stepToolCalls) {
             recordToolCall(toolCalls, toolCallIndexes, toolCall);
           }
@@ -251,6 +298,8 @@ export async function runAgent(options: IAgentRunOptions): Promise<IAgentRunResu
           inputTokens: totalInput,
           outputTokens: totalOutput,
           totalTokens: totalInput + totalOutput,
+          cacheReadTokens: totalCacheRead,
+          cacheWriteTokens: totalCacheWrite,
         },
         toolCalls,
       };
diff --git a/ts/smartagent.interfaces.ts b/ts/smartagent.interfaces.ts
index bdd5b0a..0198435 100644
--- a/ts/smartagent.interfaces.ts
+++ b/ts/smartagent.interfaces.ts
@@ -1,6 +1,17 @@
-import type { ToolSet, ModelMessage, LanguageModelV3, ProviderOptions } from './plugins.js';
+import type {
+  ISmartAiCacheOptions,
+  ToolSet,
+  ModelMessage,
+  LanguageModelV3,
+  ProviderOptions,
+  TSmartAiCacheRetention,
+  TSmartAiCacheSetting,
+} from './plugins.js';
 
 export type { ProviderOptions };
+export type IAgentCacheOptions = ISmartAiCacheOptions;
+export type TAgentCacheRetention = TSmartAiCacheRetention;
+export type TAgentCacheSetting = TSmartAiCacheSetting;
 
 export interface IAgentToolCallRecord {
   toolName: string;
@@ -20,6 +31,10 @@ export interface IAgentRunOptions {
   tools?: ToolSet;
   /** Provider-specific AI SDK request options passed through to streamText() */
   providerOptions?: ProviderOptions;
+  /** Stable session id used as provider prompt-cache affinity key where supported. */
+  sessionId?: string;
+  /** Prompt-cache policy. Default: 'auto'. Set false to disable smartagent cache defaults. */
+  cache?: TAgentCacheSetting;
   /**
    * Maximum number of LLM↔tool round trips.
    * Each step may execute multiple tools in parallel.
@@ -61,7 +76,13 @@ export interface IAgentRunResult {
   /** Finish reason from the final step */
   finishReason: string;
   /** Accumulated token usage across all steps */
-  usage: { inputTokens: number; outputTokens: number; totalTokens: number };
+  usage: {
+    inputTokens: number;
+    outputTokens: number;
+    totalTokens: number;
+    cacheReadTokens: number;
+    cacheWriteTokens: number;
+  };
   /** Tool calls observed during the run, including inputs and outputs/errors when available */
   toolCalls: IAgentToolCallRecord[];
 }