From 4310c8086b5d9259cdf7f118978d6e362b59870e Mon Sep 17 00:00:00 2001 From: Juergen Kunz Date: Tue, 20 Jan 2026 02:44:54 +0000 Subject: [PATCH] feat(native-tools): add native tool calling support for Ollama models - Add INativeToolCall interface for native tool call format - Add useNativeToolCalling option to IDualAgentOptions - Add getToolsAsJsonSchema() to convert tools to Ollama JSON Schema format - Add parseNativeToolCalls() to convert native tool calls to proposals - Add startTaskWithNativeTools() and continueWithNativeTools() to DriverAgent - Update DualAgentOrchestrator to support both XML parsing and native tool calling modes Native tool calling is more efficient for models like GPT-OSS that use Harmony format, as it activates Ollama's built-in tool parser instead of requiring XML generation. --- package.json | 2 +- pnpm-lock.yaml | 10 +- ts/smartagent.classes.driveragent.ts | 234 +++++++++++++++++++++++++++ ts/smartagent.classes.dualagent.ts | 100 ++++++++++-- ts/smartagent.interfaces.ts | 18 +++ 5 files changed, 341 insertions(+), 23 deletions(-) diff --git a/package.json b/package.json index 95d382f..1163262 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,7 @@ "@types/node": "^25.0.2" }, "dependencies": { - "@push.rocks/smartai": "^0.13.0", + "@push.rocks/smartai": "^0.13.1", "@push.rocks/smartbrowser": "^2.0.8", "@push.rocks/smartdeno": "^1.2.0", "@push.rocks/smartfs": "^1.2.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 75fe231..00c4c43 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9,8 +9,8 @@ importers: .: dependencies: '@push.rocks/smartai': - specifier: ^0.13.0 - version: 0.13.0(typescript@5.9.3)(ws@8.18.3)(zod@3.25.76) + specifier: ^0.13.1 + version: 0.13.1(typescript@5.9.3)(ws@8.18.3)(zod@3.25.76) '@push.rocks/smartbrowser': specifier: ^2.0.8 version: 2.0.8(typescript@5.9.3) @@ -844,8 +844,8 @@ packages: '@push.rocks/qenv@6.1.3': resolution: {integrity: sha512-+z2hsAU/7CIgpYLFqvda8cn9rUBMHqLdQLjsFfRn5jPoD7dJ5rFlpkbhfM4Ws8mHMniwWaxGKo+q/YBhtzRBLg==} - '@push.rocks/smartai@0.13.0': - resolution: {integrity: sha512-Vy7GSLL3ejGsv4oxyUUovI1j0DgrRjxLDjM19WwMp6fehDHf8485WtwSLNm3kS4If+cVJbYXVJtjkaITVV3MLQ==} + '@push.rocks/smartai@0.13.1': + resolution: {integrity: sha512-V9J6a+rjBkFpdFnC6OBm8CbEtqCfJnEsUmNKfRUOiTa+VIVtD4OOceraZah6kGHWltUhZ1XV4eLWwFf4+YO3NA==} '@push.rocks/smartarchive@4.2.4': resolution: {integrity: sha512-uiqVAXPxmr8G5rv3uZvZFMOCt8l7cZC3nzvsy4YQqKf/VkPhKIEX+b7LkAeNlxPSYUiBQUkNRoawg9+5BaMcHg==} @@ -5172,7 +5172,7 @@ snapshots: '@push.rocks/smartlog': 3.1.10 '@push.rocks/smartpath': 6.0.0 - '@push.rocks/smartai@0.13.0(typescript@5.9.3)(ws@8.18.3)(zod@3.25.76)': + '@push.rocks/smartai@0.13.1(typescript@5.9.3)(ws@8.18.3)(zod@3.25.76)': dependencies: '@anthropic-ai/sdk': 0.71.2(zod@3.25.76) '@mistralai/mistralai': 1.12.0 diff --git a/ts/smartagent.classes.driveragent.ts b/ts/smartagent.classes.driveragent.ts index 5f044a7..d418c64 100644 --- a/ts/smartagent.classes.driveragent.ts +++ b/ts/smartagent.classes.driveragent.ts @@ -442,4 +442,238 @@ Your complete output here public reset(): void { this.messageHistory = []; } + + // ================================ + // Native Tool Calling Support + // ================================ + + /** + * Start a task with native tool calling support + * Uses Ollama's native tool calling API instead of XML parsing + * @param task The task description + * @param images Optional base64-encoded images for vision tasks + * @returns Response with content, reasoning, and any tool calls + */ + public async startTaskWithNativeTools( + task: string, + images?: string[] + ): Promise<{ message: interfaces.IAgentMessage; toolCalls?: interfaces.INativeToolCall[] }> { + // Reset message history + this.messageHistory = []; + + // Build simple user message (no XML instructions needed for native tool calling) + const userMessage = `TASK: ${task}\n\nComplete this task using the available tools. When done, provide your final output.`; + + // Add to history + this.messageHistory.push({ + role: 'user', + content: userMessage, + }); + + // Build system message for native tool calling + const fullSystemMessage = this.getNativeToolsSystemMessage(); + + // Get tools in JSON schema format + const tools = this.getToolsAsJsonSchema(); + + // Check if provider supports native tool calling (Ollama) + const provider = this.provider as any; + if (typeof provider.chatWithOptions !== 'function') { + throw new Error('Provider does not support native tool calling. Use startTask() instead.'); + } + + // Call with tools + const response = await provider.chatWithOptions({ + systemMessage: fullSystemMessage, + userMessage: userMessage, + messageHistory: [], + images: images, + tools: tools.length > 0 ? tools : undefined, + }); + + // Add assistant response to history + const historyMessage: plugins.smartai.ChatMessage = { + role: 'assistant', + content: response.message || '', + reasoning: response.thinking || response.reasoning, + }; + this.messageHistory.push(historyMessage); + + // Convert Ollama tool calls to our format + let toolCalls: interfaces.INativeToolCall[] | undefined; + if (response.toolCalls && response.toolCalls.length > 0) { + toolCalls = response.toolCalls.map((tc: any) => ({ + function: { + name: tc.function.name, + arguments: tc.function.arguments, + index: tc.function.index, + }, + })); + } + + return { + message: { + role: 'assistant', + content: response.message || '', + }, + toolCalls, + }; + } + + /** + * Continue conversation with native tool calling support + * @param message The message to continue with (e.g., tool result) + * @returns Response with content, reasoning, and any tool calls + */ + public async continueWithNativeTools( + message: string + ): Promise<{ message: interfaces.IAgentMessage; toolCalls?: interfaces.INativeToolCall[] }> { + // Add the new message to history + this.messageHistory.push({ + role: 'user', + content: message, + }); + + // Build system message + const fullSystemMessage = this.getNativeToolsSystemMessage(); + + // Get tools in JSON schema format + const tools = this.getToolsAsJsonSchema(); + + // Get response from provider with history windowing + let historyForChat: plugins.smartai.ChatMessage[]; + const fullHistory = this.messageHistory.slice(0, -1); + + if (this.maxHistoryMessages > 0 && fullHistory.length > this.maxHistoryMessages) { + historyForChat = [ + fullHistory[0], + ...fullHistory.slice(-(this.maxHistoryMessages - 1)), + ]; + } else { + historyForChat = fullHistory; + } + + // Check if provider supports native tool calling + const provider = this.provider as any; + if (typeof provider.chatWithOptions !== 'function') { + throw new Error('Provider does not support native tool calling. Use continueWithMessage() instead.'); + } + + // Call with tools + const response = await provider.chatWithOptions({ + systemMessage: fullSystemMessage, + userMessage: message, + messageHistory: historyForChat, + tools: tools.length > 0 ? tools : undefined, + }); + + // Add assistant response to history + this.messageHistory.push({ + role: 'assistant', + content: response.message || '', + reasoning: response.thinking || response.reasoning, + }); + + // Convert Ollama tool calls to our format + let toolCalls: interfaces.INativeToolCall[] | undefined; + if (response.toolCalls && response.toolCalls.length > 0) { + toolCalls = response.toolCalls.map((tc: any) => ({ + function: { + name: tc.function.name, + arguments: tc.function.arguments, + index: tc.function.index, + }, + })); + } + + return { + message: { + role: 'assistant', + content: response.message || '', + }, + toolCalls, + }; + } + + /** + * Get system message for native tool calling mode + * Simplified prompt that lets the model use tools naturally + */ + private getNativeToolsSystemMessage(): string { + return `You are an AI assistant that executes tasks by using available tools. + +## Your Role +You analyze tasks, break them down into steps, and use tools to accomplish goals. + +## Guidelines +1. Think step by step about what needs to be done +2. Use the available tools to complete the task +3. Process tool results and continue until the task is complete +4. When the task is complete, provide a final summary + +## Important +- Use tools when needed to gather information or perform actions +- If you need clarification, ask the user +- Always verify your work before marking the task complete`; + } + + /** + * Convert registered tools to Ollama JSON Schema format for native tool calling + * Each tool action becomes a separate function with name format: "toolName_actionName" + * @returns Array of IOllamaTool compatible tool definitions + */ + public getToolsAsJsonSchema(): plugins.smartai.IOllamaTool[] { + const tools: plugins.smartai.IOllamaTool[] = []; + + for (const tool of this.tools.values()) { + for (const action of tool.actions) { + // Build the tool definition in Ollama format + const toolDef: plugins.smartai.IOllamaTool = { + type: 'function', + function: { + name: `${tool.name}_${action.name}`, // e.g., "json_validate" + description: `[${tool.name}] ${action.description}`, + parameters: action.parameters as plugins.smartai.IOllamaTool['function']['parameters'], + }, + }; + tools.push(toolDef); + } + } + + return tools; + } + + /** + * Parse native tool calls from provider response into IToolCallProposal format + * @param toolCalls Array of native tool calls from the provider + * @returns Array of IToolCallProposal ready for execution + */ + public parseNativeToolCalls( + toolCalls: interfaces.INativeToolCall[] + ): interfaces.IToolCallProposal[] { + return toolCalls.map(tc => { + // Split "json_validate" -> toolName="json", action="validate" + const fullName = tc.function.name; + const underscoreIndex = fullName.indexOf('_'); + + let toolName: string; + let action: string; + + if (underscoreIndex > 0) { + toolName = fullName.substring(0, underscoreIndex); + action = fullName.substring(underscoreIndex + 1); + } else { + // Fallback: treat entire name as tool name with empty action + toolName = fullName; + action = ''; + } + + return { + proposalId: this.generateProposalId(), + toolName, + action, + params: tc.function.arguments, + }; + }); + } } diff --git a/ts/smartagent.classes.dualagent.ts b/ts/smartagent.classes.dualagent.ts index ec99ad9..7a19020 100644 --- a/ts/smartagent.classes.dualagent.ts +++ b/ts/smartagent.classes.dualagent.ts @@ -242,12 +242,18 @@ export class DualAgentOrchestrator { throw new Error('Orchestrator not started. Call start() first.'); } + // Use native tool calling if enabled + const useNativeTools = this.options.useNativeToolCalling === true; + this.conversationHistory = []; let iterations = 0; let consecutiveRejections = 0; let completed = false; let finalResult: string | null = null; + // Track pending native tool calls + let pendingNativeToolCalls: interfaces.INativeToolCall[] | undefined; + // Extract images from options const images = options?.images; @@ -258,7 +264,17 @@ export class DualAgentOrchestrator { }); // Start the driver with the task and optional images - let driverResponse = await this.driver.startTask(task, images); + let driverResponse: interfaces.IAgentMessage; + + if (useNativeTools) { + // Native tool calling mode + const result = await this.driver.startTaskWithNativeTools(task, images); + driverResponse = result.message; + pendingNativeToolCalls = result.toolCalls; + } else { + // XML parsing mode + driverResponse = await this.driver.startTask(task, images); + } this.conversationHistory.push(driverResponse); // Emit task started event @@ -281,10 +297,16 @@ export class DualAgentOrchestrator { maxIterations: this.options.maxIterations, }); - // Check if task is complete - if (this.driver.isTaskComplete(driverResponse.content)) { + // Check if task is complete (for native mode, no pending tool calls and has content) + const isComplete = useNativeTools + ? (!pendingNativeToolCalls || pendingNativeToolCalls.length === 0) && driverResponse.content.length > 0 + : this.driver.isTaskComplete(driverResponse.content); + + if (isComplete) { completed = true; - finalResult = this.driver.extractTaskResult(driverResponse.content) || driverResponse.content; + finalResult = useNativeTools + ? driverResponse.content + : (this.driver.extractTaskResult(driverResponse.content) || driverResponse.content); // Emit task completed event this.emitProgress({ @@ -315,13 +337,34 @@ export class DualAgentOrchestrator { }; } - // Parse tool call proposals - const proposals = this.driver.parseToolCallProposals(driverResponse.content); + // Parse tool call proposals - native mode uses pendingNativeToolCalls, XML mode parses content + let proposals: interfaces.IToolCallProposal[]; + + if (useNativeTools && pendingNativeToolCalls && pendingNativeToolCalls.length > 0) { + // Native tool calling mode - convert native tool calls to proposals + proposals = this.driver.parseNativeToolCalls(pendingNativeToolCalls); + pendingNativeToolCalls = undefined; // Clear after processing + } else if (!useNativeTools) { + // XML parsing mode + proposals = this.driver.parseToolCallProposals(driverResponse.content); + } else { + proposals = []; + } if (proposals.length === 0) { - // No tool calls found - remind the model of the exact XML format - driverResponse = await this.driver.continueWithMessage( - `No valid tool call was found in your response. To use a tool, you MUST output the exact XML format: + if (useNativeTools) { + // Native mode: no tool calls and no content means we should continue + const result = await this.driver.continueWithNativeTools( + 'Please continue with the task. Use the available tools or provide your final output.' + ); + driverResponse = result.message; + pendingNativeToolCalls = result.toolCalls; + this.conversationHistory.push(driverResponse); + continue; + } else { + // XML mode: remind the model of the exact XML format + driverResponse = await this.driver.continueWithMessage( + `No valid tool call was found in your response. To use a tool, you MUST output the exact XML format: tool_name @@ -340,9 +383,10 @@ Or to complete the task: your final JSON output here Please output the exact XML format above.` - ); - this.conversationHistory.push(driverResponse); - continue; + ); + this.conversationHistory.push(driverResponse); + continue; + } } // Process the first proposal (one at a time) @@ -449,13 +493,28 @@ Please output the exact XML format above.` toolResult: result, }); - driverResponse = await this.driver.continueWithMessage(resultMessage); + // Continue with appropriate method based on mode + if (useNativeTools) { + const continueResult = await this.driver.continueWithNativeTools(resultMessage); + driverResponse = continueResult.message; + pendingNativeToolCalls = continueResult.toolCalls; + } else { + driverResponse = await this.driver.continueWithMessage(resultMessage); + } this.conversationHistory.push(driverResponse); } catch (error) { const errorMessage = `Tool execution failed: ${error instanceof Error ? error.message : String(error)}`; - driverResponse = await this.driver.continueWithMessage( - `TOOL ERROR: ${errorMessage}\n\nPlease try a different approach.` - ); + if (useNativeTools) { + const continueResult = await this.driver.continueWithNativeTools( + `TOOL ERROR: ${errorMessage}\n\nPlease try a different approach.` + ); + driverResponse = continueResult.message; + pendingNativeToolCalls = continueResult.toolCalls; + } else { + driverResponse = await this.driver.continueWithMessage( + `TOOL ERROR: ${errorMessage}\n\nPlease try a different approach.` + ); + } this.conversationHistory.push(driverResponse); } } else { @@ -492,7 +551,14 @@ Please output the exact XML format above.` guardianDecision: decision, }); - driverResponse = await this.driver.continueWithMessage(feedback); + // Continue with appropriate method based on mode + if (useNativeTools) { + const continueResult = await this.driver.continueWithNativeTools(feedback); + driverResponse = continueResult.message; + pendingNativeToolCalls = continueResult.toolCalls; + } else { + driverResponse = await this.driver.continueWithMessage(feedback); + } this.conversationHistory.push(driverResponse); } } diff --git a/ts/smartagent.interfaces.ts b/ts/smartagent.interfaces.ts index 1e9af0e..3d3363e 100644 --- a/ts/smartagent.interfaces.ts +++ b/ts/smartagent.interfaces.ts @@ -48,6 +48,12 @@ export interface IDualAgentOptions extends plugins.smartai.ISmartAiOptions { logPrefix?: string; /** Callback fired for each token during LLM generation (streaming mode) */ onToken?: (token: string, source: 'driver' | 'guardian') => void; + /** + * Enable native tool calling mode (default: false) + * When enabled, uses Ollama's native tool calling API instead of XML parsing + * This is more efficient for models that support it (e.g., GPT-OSS with Harmony format) + */ + useNativeToolCalling?: boolean; } // ================================ @@ -83,6 +89,18 @@ export interface IToolAction { parameters: Record; } +/** + * Native tool call from provider (matches Ollama's tool calling format) + * Format: function name is "toolName_actionName" (e.g., "json_validate") + */ +export interface INativeToolCall { + function: { + name: string; // Format: "toolName_actionName" + arguments: Record; + index?: number; + }; +} + /** * Proposed tool call from the Driver */