v3.4.0

feat(agent): add streamed reasoning summary callbacks to runAgent
v3.3.0
2026-05-14 22:44:10 +00:00 · 2026-05-14 22:44:08 +00:00 · 2026-05-14 16:50:16 +00:00 · 2026-05-14 16:50:08 +00:00 · 2026-05-14 11:34:11 +00:00 · 2026-05-14 11:34:04 +00:00
11 changed files with 394 additions and 90 deletions
@@ -3,6 +3,37 @@
 ## Pending


+
+
+
+## 2026-05-14 - 3.4.0
+
+### Features
+
+- add streamed reasoning summary callbacks to runAgent (agent)
+  - Introduces onReasoningStart, onReasoningDelta, and onReasoningEnd callbacks in the agent options interface
+  - Handles reasoning-start, reasoning-delta, and reasoning-end stream chunks while accumulating reasoning text by id
+  - Ensures incomplete reasoning streams are finalized after the response completes
+  - Adds tests for reasoning summary streaming and updates the README API documentation
+
+## 2026-05-14 - 3.3.0
+
+### Features
+
+- upgrade @push.rocks/smartai to ^4.0.0 (deps)
+  - Updates the core smartai dependency from ^2.3.0 to ^4.0.0.
+  - Refreshes README hints to document the new smartai version.
+
+## 2026-05-14 - 3.2.0
+
+### Features
+
+- add prompt caching options and cache token usage reporting (agent)
+  - adds sessionId and cache run options to configure provider-specific prompt caching defaults
+  - applies OpenAI cache provider options and Anthropic cache breakpoints automatically, with support to disable defaults
+  - extends usage reporting to include cacheReadTokens and cacheWriteTokens
+  - exports cache-related types and helpers and updates tests and README to cover the new behavior
+
 ## 2026-05-11 - 3.1.1

 ### Fixes
@@ -1,6 +1,6 @@
 {
  "name": "@push.rocks/smartagent",
-  "version": "3.1.1",
+  "version": "3.4.0",
  "private": false,
  "description": "Agentic loop for ai-sdk (Vercel AI SDK). Wraps streamText with stopWhen for parallel multi-step tool execution. Built on @push.rocks/smartai.",
  "main": "dist_ts/index.js",
@@ -37,11 +37,11 @@
    "@types/node": "^25.6.0"
  },
  "dependencies": {
-    "@push.rocks/smartai": "^2.2.0",
+    "@push.rocks/smartai": "^4.0.0",
    "@push.rocks/smartfs": "^1.5.1",
    "@push.rocks/smartrequest": "^5.0.1",
    "@push.rocks/smartshell": "^3.3.8",
-    "ai": "^6.0.170",
+    "ai": "^6.0.182",
    "zod": "^4.4.1"
  },
  "packageManager": "pnpm@10.28.2",
@@ -9,8 +9,8 @@ importers:
  .:
    dependencies:
      '@push.rocks/smartai':
-        specifier: ^2.2.0
-        version: 2.2.0(typescript@6.0.3)(ws@8.20.0)(zod@4.4.1)
+        specifier: ^4.0.0
+        version: 4.0.0(typescript@6.0.3)(ws@8.20.0)(zod@4.4.1)
      '@push.rocks/smartfs':
        specifier: ^1.5.1
        version: 1.5.1
@@ -21,8 +21,8 @@ importers:
        specifier: ^3.3.8
        version: 3.3.8
      ai:
-        specifier: ^6.0.170
-        version: 6.0.175(zod@4.4.1)
+        specifier: ^6.0.182
+        version: 6.0.182(zod@4.4.1)
      zod:
        specifier: ^4.4.1
        version: 4.4.1
@@ -51,56 +51,56 @@ importers:

 packages:

-  '@ai-sdk/anthropic@3.0.75':
-    resolution: {integrity: sha512-5AV3CKwaOJFdGXhihVgvRLNrjwRn2Xmy71YygT8DYOA+5zTx93Seg2QSIS8b3tJxzZ7X4H84pEtrE8VZKBCZGA==}
+  '@ai-sdk/anthropic@3.0.77':
+    resolution: {integrity: sha512-ML8C2M1YvPA1ulEx4TiyF0k1xvC2ikEiPBIC1PPQ0a5xELUGrO2lAaEzsTEoJ+eCeDd8PSBuFJjs+r+9yIwQXA==}
    engines: {node: '>=18'}
    peerDependencies:
      zod: ^3.25.76 || ^4.1.8

-  '@ai-sdk/gateway@3.0.110':
-    resolution: {integrity: sha512-sbv8+1L9/BRKydn8dMNwoMQKupA4iLJ9N+yvxgW6wMQ/94UepDf3FeYWMj/dLdzolAHZ6izRUP4s5WqQkmJ2Zg==}
+  '@ai-sdk/gateway@3.0.114':
+    resolution: {integrity: sha512-MqkZ5sd+qiq6RgIxELkoFQXg2/JwK+WCMaot7U+rtrZpWJl3fSyYvc28SC03b256o4F7OXjQtdjTqs81B2w+dA==}
    engines: {node: '>=18'}
    peerDependencies:
      zod: ^3.25.76 || ^4.1.8

-  '@ai-sdk/google@3.0.68':
-    resolution: {integrity: sha512-bjQSuUmwStn7R0RDGl9I8kriY+xjmschzy5JN4eHPPEOdca2gS6zLc+oi8jhRiCqqROkk3U12Q9M8rmQw7gmbQ==}
+  '@ai-sdk/google@3.0.73':
+    resolution: {integrity: sha512-o2MuIeyvZrFIeIbnbA8Thrr63irdyUBh0uWBZ2lY6yFeXuE/tcwyXF74bDKS4KvTu84uFpQfpbS/LXHGKKXz+g==}
    engines: {node: '>=18'}
    peerDependencies:
      zod: ^3.25.76 || ^4.1.8

-  '@ai-sdk/groq@3.0.38':
-    resolution: {integrity: sha512-mzn+KYeROVHFZnAr3qNX+eZ4Un4BFykOcs8XDH8LLzdfgrW6fxQkdiZyww0asYGjIYaa16dkyVtglp4GV6BeUQ==}
+  '@ai-sdk/groq@3.0.39':
+    resolution: {integrity: sha512-BZAr6DjCbzWQ0Qn1/TSsHo/bmCt4JaAMb4A7HCSUZBQCAcOjne/03D0sVjHnQhUC3TpwcmYiv7tHAviK7BluRw==}
    engines: {node: '>=18'}
    peerDependencies:
      zod: ^3.25.76 || ^4.1.8

-  '@ai-sdk/mistral@3.0.35':
-    resolution: {integrity: sha512-8BCt8pOWjvfIFZOVFz+koFcbydVv7Q8WM24J0gVJWDw1eOEn3Muugw4py+TuaQc8KdjP7d1HR9E4gIMN55zBgQ==}
+  '@ai-sdk/mistral@3.0.36':
+    resolution: {integrity: sha512-FLIb2QdLraOgQP3puUybuFYWbtsB02YWQBTOJOk8heiEsdFW3YE0dfuzwtsvoF4FXlBnbYYMWu5jgOesthcmWg==}
    engines: {node: '>=18'}
    peerDependencies:
      zod: ^3.25.76 || ^4.1.8

-  '@ai-sdk/openai-compatible@2.0.46':
-    resolution: {integrity: sha512-23ExGdy3p0Grfz3BAjCbIOc74TjQc5nHu72e0+kx3hshvScp32a4nnQlzzG4VT1bDZxa9yPNNUNyb5nN6vJHcQ==}
+  '@ai-sdk/openai-compatible@2.0.47':
+    resolution: {integrity: sha512-Enm5UlL0zUCrW3792opk5h7hRWxZOZzDe6eQYVFqX9LUOGGCe1h8MZWAGim765nwzgnjlpeYOsuzZmLtRsTPlg==}
    engines: {node: '>=18'}
    peerDependencies:
      zod: ^3.25.76 || ^4.1.8

-  '@ai-sdk/openai@3.0.62':
-    resolution: {integrity: sha512-Oy74Bztik2X25wZD9HRd83BAXOKcRvrfgz9gvVGqKj68yegf447NiElPbB6TSVb8zyiY9wv1GSGywMCxnnoF9g==}
+  '@ai-sdk/openai@3.0.63':
+    resolution: {integrity: sha512-4yY/m8a57MNNVoJCsXuNblKf6BO4yuAuLKRX4tzSNffBEBSp1FlcWdPE0Z4FkqUeS0AJhYSSqp0GIiA/cIcDNA==}
    engines: {node: '>=18'}
    peerDependencies:
      zod: ^3.25.76 || ^4.1.8

-  '@ai-sdk/perplexity@3.0.32':
-    resolution: {integrity: sha512-5kPyfDOHL72Mnz0unBiW3S/jHjNMo/frPW6dBTADX1SMbFB9Yvks4k1pjixIJc1m8YBulI5hV5yTvr7uUxpzxA==}
+  '@ai-sdk/perplexity@3.0.33':
+    resolution: {integrity: sha512-aNt6pTAzq+akadDXVdg2SjN2dODtaVlkKbw8/35c+sekr+Tx0sJwVqMR1udxrjLzhQvz8qtfsWRuz+hB9pmOnQ==}
    engines: {node: '>=18'}
    peerDependencies:
      zod: ^3.25.76 || ^4.1.8

-  '@ai-sdk/provider-utils@4.0.26':
-    resolution: {integrity: sha512-CsKNLKsOpvPujRlIYvoz+Ybw+kGn7J4/fIZa/58+R7iWLLfwn6ifE2G6Yq8K9XvH/I/3bzaDAJ3NhRwEMsLBKQ==}
+  '@ai-sdk/provider-utils@4.0.27':
+    resolution: {integrity: sha512-ubkAJ+xODouwtmN1tYlvTPphH1hPOBfZaEQe8U7skGvFAnIRs9PPpsq57bC2+Ky/MB4yzhd6YOsxTAx9sGpazw==}
    engines: {node: '>=18'}
    peerDependencies:
      zod: ^3.25.76 || ^4.1.8
@@ -109,14 +109,14 @@ packages:
    resolution: {integrity: sha512-Q3BZ27qfpYqnCYGvE3vt+Qi6LGOF9R5Nmzn+9JoM1lCRsD9mYaIhfJLkSunN48nfGXJ6n+XNV0J/XVpqGQl7Dw==}
    engines: {node: '>=18'}

-  '@ai-sdk/xai@3.0.88':
-    resolution: {integrity: sha512-9jc7CT+6YxdPnkMF66jX/LmFoEcRI2Cjr5CTxZcrCA8f764NPF0mGtaNRv3M0OzYwBIppgZ3F0TNhADguHWLrg==}
+  '@ai-sdk/xai@3.0.89':
+    resolution: {integrity: sha512-ecFE4iQnWePrxPYuSUYCh8lpoKZ52J3jao5whDVC3+Z9Cu/XeyOe2oUGzsYSUPgbJNi/ZmD/KN69bHbUcAHFvw==}
    engines: {node: '>=18'}
    peerDependencies:
      zod: ^3.25.76 || ^4.1.8

-  '@anthropic-ai/sdk@0.95.0':
-    resolution: {integrity: sha512-7It2B76OFJH9jC/a0TicXFMq0ZZM25ei+i/mK7JnsE1Ibmo0Yfkqm+DXOHeU/ZxxKwLLGPP6qaAvKmQmgV6XhA==}
+  '@anthropic-ai/sdk@0.95.2':
+    resolution: {integrity: sha512-Egddwo3sheo1PzUrMkZnH6VkQYwS0h/b/i8vSK8Ta9M45UQipAMeDFH57dYuDAfXMEUUGeKw6CMlremgMZgrSQ==}
    hasBin: true
    peerDependencies:
      zod: ^3.25.0 || ^4.0.0
@@ -960,8 +960,8 @@ packages:
  '@push.rocks/qenv@6.1.3':
    resolution: {integrity: sha512-+z2hsAU/7CIgpYLFqvda8cn9rUBMHqLdQLjsFfRn5jPoD7dJ5rFlpkbhfM4Ws8mHMniwWaxGKo+q/YBhtzRBLg==}

-  '@push.rocks/smartai@2.2.0':
-    resolution: {integrity: sha512-dB9DDguVbvKNfVjVPME2vZqAcZEZBehlv7VpFL+ECTf6jBf4tNUtCfw/a2YlF1uE7EL7rgHFttLbGXvUuaxk4A==}
+  '@push.rocks/smartai@4.0.0':
+    resolution: {integrity: sha512-wl6XZ6uQ/LQrz4fvKIan5zryKo/v+2uYwknXHsunyQ6rK0Nw8MPjQ4QFWLG+8wsaWvgh42WPKcCp6zWcVguvVw==}

  '@push.rocks/smartarchive@4.2.4':
    resolution: {integrity: sha512-uiqVAXPxmr8G5rv3uZvZFMOCt8l7cZC3nzvsy4YQqKf/VkPhKIEX+b7LkAeNlxPSYUiBQUkNRoawg9+5BaMcHg==}
@@ -1725,8 +1725,8 @@ packages:
    resolution: {integrity: sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==}
    engines: {node: '>= 8.0.0'}

-  ai@6.0.175:
-    resolution: {integrity: sha512-6fFFHzbh6FIZnYc31V6osOxq25ABJYCShfG0O6ajHiA4FB/DgnPi1mP8cO5aAU3HNSbQHiMazdlh9bIsp97mVA==}
+  ai@6.0.182:
+    resolution: {integrity: sha512-ooJdziFjYrYRcsCx107roqA8gDTI3P82nUfroNWIhVvwrkYzEN3W1l50YK+XNqkUew8AiimaW0/SLBewRXMuHQ==}
    engines: {node: '>=18'}
    peerDependencies:
      zod: ^3.25.76 || ^4.1.8
@@ -2791,8 +2791,8 @@ packages:
  once@1.4.0:
    resolution: {integrity: sha1-WDsap3WWHUsROsF9nFC6753Xa9E=}

-  openai@6.36.0:
-    resolution: {integrity: sha512-Has2YbIusMq9wQEierFsgf9c783dy1y9arX459LmphNacEkkM5yxi2RIyXP0LmkOroQyW19iTwALHL8Yf26UKA==}
+  openai@6.37.0:
+    resolution: {integrity: sha512-0H5dEGFmmLv6KSd0W1w2nyL8WsLkX6yoLeQpU+dZAOuGcany5qkYQMmj35ZrKgb6yiyYqpUzFOpR8mZQkgqeEQ==}
    hasBin: true
    peerDependencies:
      ws: ^8.18.0
@@ -3446,56 +3446,56 @@ packages:

 snapshots:

-  '@ai-sdk/anthropic@3.0.75(zod@4.4.1)':
+  '@ai-sdk/anthropic@3.0.77(zod@4.4.1)':
    dependencies:
      '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
      zod: 4.4.1

-  '@ai-sdk/gateway@3.0.110(zod@4.4.1)':
+  '@ai-sdk/gateway@3.0.114(zod@4.4.1)':
    dependencies:
      '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
      '@vercel/oidc': 3.2.0
      zod: 4.4.1

-  '@ai-sdk/google@3.0.68(zod@4.4.1)':
+  '@ai-sdk/google@3.0.73(zod@4.4.1)':
    dependencies:
      '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
      zod: 4.4.1

-  '@ai-sdk/groq@3.0.38(zod@4.4.1)':
+  '@ai-sdk/groq@3.0.39(zod@4.4.1)':
    dependencies:
      '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
      zod: 4.4.1

-  '@ai-sdk/mistral@3.0.35(zod@4.4.1)':
+  '@ai-sdk/mistral@3.0.36(zod@4.4.1)':
    dependencies:
      '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
      zod: 4.4.1

-  '@ai-sdk/openai-compatible@2.0.46(zod@4.4.1)':
+  '@ai-sdk/openai-compatible@2.0.47(zod@4.4.1)':
    dependencies:
      '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
      zod: 4.4.1

-  '@ai-sdk/openai@3.0.62(zod@4.4.1)':
+  '@ai-sdk/openai@3.0.63(zod@4.4.1)':
    dependencies:
      '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
      zod: 4.4.1

-  '@ai-sdk/perplexity@3.0.32(zod@4.4.1)':
+  '@ai-sdk/perplexity@3.0.33(zod@4.4.1)':
    dependencies:
      '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
      zod: 4.4.1

-  '@ai-sdk/provider-utils@4.0.26(zod@4.4.1)':
+  '@ai-sdk/provider-utils@4.0.27(zod@4.4.1)':
    dependencies:
      '@ai-sdk/provider': 3.0.10
      '@standard-schema/spec': 1.1.0
@@ -3506,14 +3506,14 @@ snapshots:
    dependencies:
      json-schema: 0.4.0

-  '@ai-sdk/xai@3.0.88(zod@4.4.1)':
+  '@ai-sdk/xai@3.0.89(zod@4.4.1)':
    dependencies:
-      '@ai-sdk/openai-compatible': 2.0.46(zod@4.4.1)
+      '@ai-sdk/openai-compatible': 2.0.47(zod@4.4.1)
      '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
      zod: 4.4.1

-  '@anthropic-ai/sdk@0.95.0(zod@4.4.1)':
+  '@anthropic-ai/sdk@0.95.2(zod@4.4.1)':
    dependencies:
      json-schema-to-ts: 3.1.1
      standardwebhooks: 1.0.0
@@ -4933,20 +4933,20 @@ snapshots:
      '@push.rocks/smartlog': 3.2.2
      '@push.rocks/smartpath': 6.0.0

-  '@push.rocks/smartai@2.2.0(typescript@6.0.3)(ws@8.20.0)(zod@4.4.1)':
+  '@push.rocks/smartai@4.0.0(typescript@6.0.3)(ws@8.20.0)(zod@4.4.1)':
    dependencies:
-      '@ai-sdk/anthropic': 3.0.75(zod@4.4.1)
-      '@ai-sdk/google': 3.0.68(zod@4.4.1)
-      '@ai-sdk/groq': 3.0.38(zod@4.4.1)
-      '@ai-sdk/mistral': 3.0.35(zod@4.4.1)
-      '@ai-sdk/openai': 3.0.62(zod@4.4.1)
-      '@ai-sdk/perplexity': 3.0.32(zod@4.4.1)
+      '@ai-sdk/anthropic': 3.0.77(zod@4.4.1)
+      '@ai-sdk/google': 3.0.73(zod@4.4.1)
+      '@ai-sdk/groq': 3.0.39(zod@4.4.1)
+      '@ai-sdk/mistral': 3.0.36(zod@4.4.1)
+      '@ai-sdk/openai': 3.0.63(zod@4.4.1)
+      '@ai-sdk/perplexity': 3.0.33(zod@4.4.1)
      '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/xai': 3.0.88(zod@4.4.1)
-      '@anthropic-ai/sdk': 0.95.0(zod@4.4.1)
+      '@ai-sdk/xai': 3.0.89(zod@4.4.1)
+      '@anthropic-ai/sdk': 0.95.2(zod@4.4.1)
      '@push.rocks/smartpdf': 4.2.2(typescript@6.0.3)
-      ai: 6.0.175(zod@4.4.1)
-      openai: 6.36.0(ws@8.20.0)(zod@4.4.1)
+      ai: 6.0.182(zod@4.4.1)
+      openai: 6.37.0(ws@8.20.0)(zod@4.4.1)
    transitivePeerDependencies:
      - aws-crt
      - bare-abort-controller
@@ -6196,11 +6196,11 @@ snapshots:
    dependencies:
      humanize-ms: 1.2.1

-  ai@6.0.175(zod@4.4.1):
+  ai@6.0.182(zod@4.4.1):
    dependencies:
-      '@ai-sdk/gateway': 3.0.110(zod@4.4.1)
+      '@ai-sdk/gateway': 3.0.114(zod@4.4.1)
      '@ai-sdk/provider': 3.0.10
-      '@ai-sdk/provider-utils': 4.0.26(zod@4.4.1)
+      '@ai-sdk/provider-utils': 4.0.27(zod@4.4.1)
      '@opentelemetry/api': 1.9.0
      zod: 4.4.1

@@ -7502,7 +7502,7 @@ snapshots:
    dependencies:
      wrappy: 1.0.2

-  openai@6.36.0(ws@8.20.0)(zod@4.4.1):
+  openai@6.37.0(ws@8.20.0)(zod@4.4.1):
    optionalDependencies:
      ws: 8.20.0
      zod: 4.4.1
@@ -24,7 +24,7 @@ Each exports a factory returning a flat `ToolSet` (Record<string, Tool>):
 4. **jsonTool()** → `json_validate`, `json_transform`

 ## Key Dependencies
- `@push.rocks/smartai` ^2.0.0 — provider registry, `getModel()`, re-exports `tool`, `jsonSchema`
+- `@push.rocks/smartai` ^4.0.0 — provider registry, `getModel()`, re-exports `tool`, `jsonSchema`
 - `ai` ^6.0.0 — Vercel AI SDK v6 (`streamText`, `stepCountIs`, `ModelMessage`, `ToolSet`)
 - `zod` ^3.25.0 — tool input schema definitions
 - `@push.rocks/smartfs`, `smartshell`, `smartrequest` — tool implementations
@@ -49,7 +49,7 @@ const result = await runAgent({

 console.log(result.text);    // "7 + 35 = 42"
 console.log(result.steps);   // number of agentic steps taken
-console.log(result.usage);   // { promptTokens, completionTokens, totalTokens }
+console.log(result.usage);   // { inputTokens, outputTokens, totalTokens, cacheReadTokens, cacheWriteTokens }
 ```

 ## Architecture
@@ -76,7 +76,7 @@ console.log(result.usage);   // { promptTokens, completionTokens, totalTokens }
 - ⚡ **Parallel tool execution** — multiple tool calls in a single step are executed concurrently
 - 🔧 **Auto-retry with backoff** — handles 429/529/503 errors with header-aware retry delays
 - 🩹 **Tool call repair** — case-insensitive name matching + invalid tool sink prevents crashes
- 📊 **Token streaming** — `onToken` and `onToolCall` callbacks for real-time progress
+- 📊 **Token and reasoning streaming** — `onToken`, `onReasoning*`, and `onToolCall` callbacks for real-time progress
 - 💥 **Context overflow handling** — detects overflow and invokes your `onContextOverflow` callback

 ## Core API
@@ -92,10 +92,15 @@ The single entry point. Options:
 | `system` | `string` | `undefined` | System prompt |
 | `tools` | `ToolSet` | `{}` | Tools the agent can call |
 | `providerOptions` | `ProviderOptions` | `undefined` | Provider-specific AI SDK request options passed through to `streamText()` |
+| `sessionId` | `string` | `undefined` | Stable session id used as provider prompt-cache affinity key where supported |
+| `cache` | `'auto' \| false \| IAgentCacheOptions` | `'auto'` | Prompt-cache policy. Set `false` to disable SmartAgent cache defaults |
 | `maxSteps` | `number` | `20` | Max agentic steps before stopping |
 | `messages` | `ModelMessage[]` | `[]` | Conversation history (for multi-turn) |
 | `maxRetries` | `number` | `5` | Max retries on rate-limit/server errors |
 | `onToken` | `(delta: string) => void` | — | Streaming token callback |
+| `onReasoningStart` | `(id: string) => void` | — | Called when a reasoning summary starts |
+| `onReasoningDelta` | `(id: string, delta: string) => void` | — | Called for streamed reasoning summary text |
+| `onReasoningEnd` | `(id: string, text: string) => void` | — | Called when a reasoning summary completes |
 | `onToolCall` | `(name: string) => void` | — | Called when a tool is invoked |
 | `onToolResult` | `(name: string, result: unknown) => void` | — | Called when a tool finishes |
 | `validateCompletion` | `(result) => string \| void` | — | Return a string to reject and reprompt an incomplete run |
@@ -114,6 +119,8 @@ interface IAgentRunResult {
    inputTokens: number;
    outputTokens: number;
    totalTokens: number;
+    cacheReadTokens: number;
+    cacheWriteTokens: number;
  };
  toolCalls: Array<{
    toolName: string;
@@ -126,7 +133,7 @@ interface IAgentRunResult {

 ### OpenAI Provider Options

-Use `providerOptions` for provider-specific request settings such as GPT reasoning effort. SmartAgent forwards the object unchanged to AI SDK `streamText()`.
+Use `providerOptions` for provider-specific request settings such as GPT reasoning effort. SmartAgent merges cache defaults first, then applies your `providerOptions` so explicit caller options win.

 ```typescript
 import { getModelSetup } from '@push.rocks/smartai';
@@ -157,6 +164,30 @@ const saved = result.toolCalls.some((call) =>
 );
 ```

+### Prompt Caching
+
+SmartAgent enables prompt-cache defaults by default:
+
+- Anthropic-compatible models get cache breakpoints on the first two system messages and the two most recent non-system messages.
+- OpenAI models get `store: false` by default and, when `sessionId` is provided, `promptCacheKey: sessionId` with `promptCacheRetention: 'in_memory'`.
+- Longer retention is opt-in. Use `cache: { retention: '24h' }` for OpenAI or `cache: { retention: '1h' }` for Anthropic.
+- Set `cache: false` to disable these defaults for a run.
+
+```typescript
+const result = await runAgent({
+  model,
+  sessionId: 'stable-session-id',
+  prompt: 'Continue the task.',
+  tools,
+});
+
+const noCache = await runAgent({
+  model,
+  prompt: 'One-off request.',
+  cache: false,
+});
+```
+
 ### Completion Validation

 Use `validateCompletion` when a workflow must not finish unless a required side-effect happened. Return `void` to accept the run, or return a string to append that string as a new user message and continue. If retries are exhausted, `runAgent()` throws.
@@ -33,6 +33,25 @@ const createTextStreamResult = (text: string) => ({
  ] as any[]),
 });

+const createReasoningStreamResult = (reasoning: string, text: string) => ({
+  stream: convertArrayToReadableStream([
+    { type: 'stream-start', warnings: [] },
+    { type: 'response-metadata', id: 'response-1', timestamp: new Date(0), modelId: 'mock-model' },
+    { type: 'reasoning-start', id: 'reasoning-1' },
+    { type: 'reasoning-delta', id: 'reasoning-1', delta: reasoning.slice(0, 7) },
+    { type: 'reasoning-delta', id: 'reasoning-1', delta: reasoning.slice(7) },
+    { type: 'reasoning-end', id: 'reasoning-1' },
+    { type: 'text-start', id: 'text-1' },
+    { type: 'text-delta', id: 'text-1', delta: text },
+    { type: 'text-end', id: 'text-1' },
+    {
+      type: 'finish',
+      finishReason: { unified: 'stop', raw: 'stop' },
+      usage: createUsage(2, 2),
+    },
+  ] as any[]),
+});
+
 const createToolCallStreamResult = (toolName: string, input: unknown) => ({
  stream: convertArrayToReadableStream([
    { type: 'stream-start', warnings: [] },
@@ -104,6 +123,97 @@ tap.test('runAgent should forward providerOptions to streamText', async () => {
  expect((model.doStreamCalls[0].providerOptions as any).openai.reasoningEffort).toEqual('xhigh');
 });

+tap.test('runAgent should add OpenAI cache defaults when sessionId is provided', async () => {
+  const model = new MockLanguageModelV3({
+    provider: 'openai',
+    modelId: 'gpt-5',
+    doStream: async () => createTextStreamResult('ok') as any,
+  });
+
+  const result = await smartagent.runAgent({
+    model,
+    prompt: 'hello',
+    sessionId: 'session-123',
+    providerOptions: {
+      openai: {
+        reasoningEffort: 'high',
+      },
+    } as any,
+  });
+
+  const openaiOptions = (model.doStreamCalls[0].providerOptions as any).openai;
+
+  expect(result.text).toEqual('ok');
+  expect(openaiOptions.store).toEqual(false);
+  expect(openaiOptions.promptCacheKey).toEqual('session-123');
+  expect(openaiOptions.promptCacheRetention).toEqual('in_memory');
+  expect(openaiOptions.reasoningEffort).toEqual('high');
+});
+
+tap.test('runAgent should stream reasoning summary callbacks', async () => {
+  const reasoningEvents: string[] = [];
+  const tokenDeltas: string[] = [];
+  const model = new MockLanguageModelV3({
+    doStream: async () => createReasoningStreamResult('thinking through it', 'done') as any,
+  });
+
+  const result = await smartagent.runAgent({
+    model,
+    prompt: 'hello',
+    onToken: (delta) => tokenDeltas.push(delta),
+    onReasoningStart: (id) => reasoningEvents.push('start:' + id),
+    onReasoningDelta: (id, delta) => reasoningEvents.push('delta:' + id + ':' + delta),
+    onReasoningEnd: (id, text) => reasoningEvents.push('end:' + id + ':' + text),
+  });
+
+  expect(result.text).toEqual('done');
+  expect(tokenDeltas.join('')).toEqual('done');
+  expect(reasoningEvents).toEqual([
+    'start:reasoning-1',
+    'delta:reasoning-1:thinkin',
+    'delta:reasoning-1:g through it',
+    'end:reasoning-1:thinking through it',
+  ]);
+});
+
+tap.test('runAgent should mark Anthropic prompt cache breakpoints by default', async () => {
+  const model = new MockLanguageModelV3({
+    provider: 'anthropic',
+    modelId: 'claude-sonnet-4-5-20250929',
+    doStream: async () => createTextStreamResult('ok') as any,
+  });
+
+  const result = await smartagent.runAgent({
+    model,
+    system: 'stable system prompt',
+    prompt: 'hello',
+  });
+  const prompt = model.doStreamCalls[0].prompt as any[];
+  const systemMessage = prompt.find((message) => message.role === 'system');
+  const userMessage = prompt.find((message) => message.role === 'user');
+
+  expect(result.text).toEqual('ok');
+  expect(systemMessage.providerOptions?.anthropic?.cacheControl?.type).toEqual('ephemeral');
+  expect(userMessage.providerOptions?.anthropic?.cacheControl?.type).toEqual('ephemeral');
+});
+
+tap.test('runAgent should allow cache defaults to be disabled', async () => {
+  const model = new MockLanguageModelV3({
+    provider: 'openai',
+    modelId: 'gpt-5',
+    doStream: async () => createTextStreamResult('ok') as any,
+  });
+
+  await smartagent.runAgent({
+    model,
+    prompt: 'hello',
+    sessionId: 'session-123',
+    cache: false,
+  });
+
+  expect(model.doStreamCalls[0].providerOptions).toBeUndefined();
+});
+
 tap.test('runAgent should return final tool call records', async () => {
  let streamCallCount = 0;
  const callbackToolCalls: Array<{ name: string; input: unknown }> = [];
@@ -3,6 +3,6 @@
 */
 export const commitinfo = {
  name: '@push.rocks/smartagent',
-  version: '3.1.1',
+  version: '3.4.0',
  description: 'Agentic loop for ai-sdk (Vercel AI SDK). Wraps streamText with stopWhen for parallel multi-step tool execution. Built on @push.rocks/smartai.'
 }
@@ -3,7 +3,15 @@ export { ToolRegistry } from './smartagent.classes.toolregistry.js';
 export { truncateOutput } from './smartagent.utils.truncation.js';
 export type { ITruncateResult } from './smartagent.utils.truncation.js';
 export { ContextOverflowError } from './smartagent.interfaces.js';
-export type { IAgentRunOptions, IAgentRunResult, IAgentToolCallRecord, ProviderOptions } from './smartagent.interfaces.js';
+export type {
+  IAgentCacheOptions,
+  IAgentRunOptions,
+  IAgentRunResult,
+  IAgentToolCallRecord,
+  ProviderOptions,
+  TAgentCacheRetention,
+  TAgentCacheSetting,
+} from './smartagent.interfaces.js';

 // Re-export tool() and z so consumers can define tools without extra imports
 export { tool, jsonSchema } from '@push.rocks/smartai';
@@ -4,9 +4,9 @@ import * as path from 'path';
 export { path };

 // ai-sdk core
-import { streamText, generateText, stepCountIs } from 'ai';
+import { streamText, generateText, stepCountIs, wrapLanguageModel } from 'ai';

-export { streamText, generateText, stepCountIs };
+export { streamText, generateText, stepCountIs, wrapLanguageModel };

 export type {
  ModelMessage,
@@ -15,11 +15,29 @@ export type {
 } from 'ai';

 // @push.rocks/smartai
-import { tool, jsonSchema } from '@push.rocks/smartai';
+import {
+  applySmartAiCacheProviderOptions,
+  createSmartAiCachingMiddleware,
+  jsonSchema,
+  resolveSmartAiCacheProvider,
+  tool,
+} from '@push.rocks/smartai';

-export { tool, jsonSchema };
+export {
+  applySmartAiCacheProviderOptions,
+  createSmartAiCachingMiddleware,
+  resolveSmartAiCacheProvider,
+  tool,
+  jsonSchema,
+};

-export type { LanguageModelV3, TSmartAiProviderOptions as ProviderOptions } from '@push.rocks/smartai';
+export type {
+  ISmartAiCacheOptions,
+  LanguageModelV3,
+  TSmartAiCacheRetention,
+  TSmartAiCacheSetting,
+  TSmartAiProviderOptions as ProviderOptions,
+} from '@push.rocks/smartai';

 // zod
 import { z } from 'zod';
@@ -90,6 +90,28 @@ function errorToString(error: unknown): string {
  return String(error);
 }

+function tokenTotal(tokenUsage: unknown): number {
+  if (typeof tokenUsage === 'number') return tokenUsage;
+  if (tokenUsage && typeof tokenUsage === 'object' && typeof (tokenUsage as any).total === 'number') {
+    return (tokenUsage as any).total;
+  }
+  return 0;
+}
+
+function tokenCacheRead(tokenUsage: unknown): number {
+  if (tokenUsage && typeof tokenUsage === 'object' && typeof (tokenUsage as any).cacheRead === 'number') {
+    return (tokenUsage as any).cacheRead;
+  }
+  return 0;
+}
+
+function tokenCacheWrite(tokenUsage: unknown): number {
+  if (tokenUsage && typeof tokenUsage === 'object' && typeof (tokenUsage as any).cacheWrite === 'number') {
+    return (tokenUsage as any).cacheWrite;
+  }
+  return 0;
+}
+
 function recordToolCall(
  toolCalls: IAgentToolCallRecord[],
  toolCallIndexes: Map<string, number>,
@@ -129,11 +151,35 @@ export async function runAgent(options: IAgentRunOptions): Promise<IAgentRunResu
  let attempt = 0;
  let totalInput = 0;
  let totalOutput = 0;
+  let totalCacheRead = 0;
+  let totalCacheWrite = 0;
  let validationRetries = 0;
  const toolCalls: IAgentToolCallRecord[] = [];
  const toolCallIndexes = new Map<string, number>();
+  const reasoningTextById = new Map<string, string>();

  const tools = options.tools ?? {};
+  const cache = options.cache ?? 'auto';
+  const configuredCacheProvider = typeof cache === 'object' ? cache.provider : undefined;
+  const messageCacheProvider = cache === false
+    ? undefined
+    : configuredCacheProvider ?? plugins.resolveSmartAiCacheProvider(options.model.provider, options.model.modelId);
+  const model = messageCacheProvider
+    ? plugins.wrapLanguageModel({
+        model: options.model,
+        middleware: plugins.createSmartAiCachingMiddleware({
+          ...(typeof cache === 'object' ? cache : {}),
+          provider: messageCacheProvider,
+        }),
+      }) as unknown as plugins.LanguageModelV3
+    : options.model;
+  const providerOptions = plugins.applySmartAiCacheProviderOptions({
+    provider: options.model.provider,
+    modelId: options.model.modelId,
+    providerOptions: options.providerOptions,
+    cache,
+    sessionId: options.sessionId,
+  });

  // Add a no-op sink for repaired-but-unrecognised tool calls
  const allTools: plugins.ToolSet = {
@@ -157,11 +203,11 @@ export async function runAgent(options: IAgentRunOptions): Promise<IAgentRunResu
  while (true) {
    try {
      const result = plugins.streamText({
-        model: options.model,
+        model,
        system: options.system,
        messages,
        tools: allTools,
-        providerOptions: options.providerOptions,
+        providerOptions,
        stopWhen: plugins.stepCountIs(options.maxSteps ?? 20),
        maxRetries: 0, // handled manually below
        abortSignal: options.abort,
@@ -182,8 +228,33 @@ export async function runAgent(options: IAgentRunOptions): Promise<IAgentRunResu
        },

        onChunk: ({ chunk }) => {
-          if (chunk.type === 'text-delta' && options.onToken) {
-            options.onToken((chunk as any).textDelta ?? (chunk as any).text ?? '');
+          const chunkType = String((chunk as any).type || '');
+          if (chunkType === 'text-delta' && options.onToken) {
+            options.onToken((chunk as any).delta ?? (chunk as any).textDelta ?? (chunk as any).text ?? '');
+            return;
+          }
+          if (chunkType === 'reasoning-start') {
+            const id = (chunk as any).id || 'reasoning';
+            reasoningTextById.set(id, '');
+            options.onReasoningStart?.(id, (chunk as any).providerMetadata);
+            return;
+          }
+          if (chunkType === 'reasoning-delta') {
+            const id = (chunk as any).id || 'reasoning';
+            const delta = (chunk as any).delta ?? (chunk as any).textDelta ?? (chunk as any).text ?? '';
+            if (!reasoningTextById.has(id)) {
+              reasoningTextById.set(id, '');
+              options.onReasoningStart?.(id, (chunk as any).providerMetadata);
+            }
+            reasoningTextById.set(id, (reasoningTextById.get(id) ?? '') + delta);
+            options.onReasoningDelta?.(id, delta, (chunk as any).providerMetadata);
+            return;
+          }
+          if (chunkType === 'reasoning-end') {
+            const id = (chunk as any).id || 'reasoning';
+            const text = reasoningTextById.get(id) ?? '';
+            reasoningTextById.delete(id);
+            options.onReasoningEnd?.(id, text, (chunk as any).providerMetadata);
          }
        },

@@ -218,8 +289,10 @@ export async function runAgent(options: IAgentRunOptions): Promise<IAgentRunResu

        onStepFinish: ({ usage, toolCalls: stepToolCalls, toolResults, content }) => {
          stepCount++;
-          totalInput += usage?.inputTokens ?? 0;
-          totalOutput += usage?.outputTokens ?? 0;
+          totalInput += tokenTotal((usage as any)?.inputTokens);
+          totalOutput += tokenTotal((usage as any)?.outputTokens);
+          totalCacheRead += tokenCacheRead((usage as any)?.inputTokens);
+          totalCacheWrite += tokenCacheWrite((usage as any)?.inputTokens);
          for (const toolCall of stepToolCalls) {
            recordToolCall(toolCalls, toolCallIndexes, toolCall);
          }
@@ -239,6 +312,10 @@ export async function runAgent(options: IAgentRunOptions): Promise<IAgentRunResu
      const finishReason = await result.finishReason;
      const responseData = await result.response;
      const responseMessages = responseData.messages as plugins.ModelMessage[];
+      for (const [id, reasoningText] of reasoningTextById) {
+        options.onReasoningEnd?.(id, reasoningText);
+        reasoningTextById.delete(id);
+      }

      attempt = 0; // reset on success

@@ -251,6 +328,8 @@ export async function runAgent(options: IAgentRunOptions): Promise<IAgentRunResu
          inputTokens: totalInput,
          outputTokens: totalOutput,
          totalTokens: totalInput + totalOutput,
+          cacheReadTokens: totalCacheRead,
+          cacheWriteTokens: totalCacheWrite,
        },
        toolCalls,
      };
@@ -1,6 +1,17 @@
-import type { ToolSet, ModelMessage, LanguageModelV3, ProviderOptions } from './plugins.js';
+import type {
+  ISmartAiCacheOptions,
+  ToolSet,
+  ModelMessage,
+  LanguageModelV3,
+  ProviderOptions,
+  TSmartAiCacheRetention,
+  TSmartAiCacheSetting,
+} from './plugins.js';

 export type { ProviderOptions };
+export type IAgentCacheOptions = ISmartAiCacheOptions;
+export type TAgentCacheRetention = TSmartAiCacheRetention;
+export type TAgentCacheSetting = TSmartAiCacheSetting;

 export interface IAgentToolCallRecord {
  toolName: string;
@@ -20,6 +31,10 @@ export interface IAgentRunOptions {
  tools?: ToolSet;
  /** Provider-specific AI SDK request options passed through to streamText() */
  providerOptions?: ProviderOptions;
+  /** Stable session id used as provider prompt-cache affinity key where supported. */
+  sessionId?: string;
+  /** Prompt-cache policy. Default: 'auto'. Set false to disable smartagent cache defaults. */
+  cache?: TAgentCacheSetting;
  /**
   * Maximum number of LLM↔tool round trips.
   * Each step may execute multiple tools in parallel.
@@ -30,6 +45,12 @@ export interface IAgentRunOptions {
  messages?: ModelMessage[];
  /** Called for each streamed text delta */
  onToken?: (delta: string) => void;
+  /** Called when the model starts a streamed reasoning summary */
+  onReasoningStart?: (id: string, providerMetadata?: unknown) => void;
+  /** Called for each streamed reasoning summary delta */
+  onReasoningDelta?: (id: string, delta: string, providerMetadata?: unknown) => void;
+  /** Called when a streamed reasoning summary completes */
+  onReasoningEnd?: (id: string, text: string, providerMetadata?: unknown) => void;
  /** Called when a tool call starts */
  onToolCall?: (toolName: string, input: unknown) => void;
  /** Called when a tool call completes */
@@ -61,7 +82,13 @@ export interface IAgentRunResult {
  /** Finish reason from the final step */
  finishReason: string;
  /** Accumulated token usage across all steps */
-  usage: { inputTokens: number; outputTokens: number; totalTokens: number };
+  usage: {
+    inputTokens: number;
+    outputTokens: number;
+    totalTokens: number;
+    cacheReadTokens: number;
+    cacheWriteTokens: number;
+  };
  /** Tool calls observed during the run, including inputs and outputs/errors when available */
  toolCalls: IAgentToolCallRecord[];
 }
Author	SHA1	Message	Date
jkunz	f183bf19ac	v3.4.0 Default (tags) / security (push) Failing after 1s Details Default (tags) / test (push) Failing after 1s Details Default (tags) / release (push) Has been skipped Details Default (tags) / metadata (push) Has been skipped Details	2026-05-14 22:44:10 +00:00
jkunz	6fb2b3a61f	feat(agent): add streamed reasoning summary callbacks to runAgent	2026-05-14 22:44:08 +00:00
jkunz	ca56f4c4e8	v3.3.0 Default (tags) / security (push) Failing after 1s Details Default (tags) / test (push) Failing after 1s Details Default (tags) / release (push) Has been skipped Details Default (tags) / metadata (push) Has been skipped Details	2026-05-14 16:50:16 +00:00
jkunz	5ceeddd8bb	feat(deps): upgrade @push.rocks/smartai to ^4.0.0	2026-05-14 16:50:08 +00:00
jkunz	d7edb981e7	v3.2.0 Default (tags) / security (push) Failing after 1s Details Default (tags) / test (push) Failing after 1s Details Default (tags) / release (push) Has been skipped Details Default (tags) / metadata (push) Has been skipped Details	2026-05-14 11:34:11 +00:00
jkunz	e6346be884	feat(agent): add prompt caching options and cache token usage reporting	2026-05-14 11:34:04 +00:00