diff --git a/x/henry/mp-sandbox-agent/.env.example b/x/henry/mp-sandbox-agent/.env.example new file mode 100644 index 000000000000..d010e018d551 --- /dev/null +++ b/x/henry/mp-sandbox-agent/.env.example @@ -0,0 +1,16 @@ +# Required API keys +OPENAI_API_KEY=sk-xxx +ANTHROPIC_API_KEY=sk-ant-xxx +SERPAPI_API_KEY=xxx +FIRECRAWL_API_KEY=xxx + +# LLM Configuration +AI_PROVIDER=openai # openai or anthropic +AI_MODEL=gpt-4o # For OpenAI: gpt-4o, gpt-4-turbo, gpt-4, gpt-3.5-turbo + # For Anthropic: claude-3-7-sonnet-20250219, claude-3-5-sonnet-20241022, claude-3-5-haiku-20241022, + # claude-3-opus-20240229, claude-3-sonnet-20240229, claude-3-haiku-20240307 +AI_TEMPERATURE=0.0 # 0.0 to 1.0 +AI_MAX_TOKENS=4096 # Maximum tokens to generate + +# Logging +LOG_LEVEL=INFO # ERROR, WARN, INFO, DEBUG, TRACE \ No newline at end of file diff --git a/x/henry/mp-sandbox-agent/DOCUMENTATION.md b/x/henry/mp-sandbox-agent/DOCUMENTATION.md new file mode 100644 index 000000000000..3b7633b25eeb --- /dev/null +++ b/x/henry/mp-sandbox-agent/DOCUMENTATION.md @@ -0,0 +1,512 @@ +# MicroPython Sandbox Agent + +## Overview + +MicroPython Sandbox Agent is a secure, code-first AI agent framework that uses executable Python code instead of traditional JSON-based tool calls. The agent is inspired by the CodeAct approach (Wang et al., 2024) and runs generated Python code in a MicroPython WebAssembly sandbox, providing strong security guarantees while maintaining expressiveness and flexibility. + +## Key Features + +### 1. Code Generation Over Tool Calls +- **Code-First Approach**: Instead of using JSON-formatted tool calls, the agent generates executable Python code. +- **Enhanced Expressiveness**: Python code provides greater flexibility and expressiveness for handling complex tasks. +- **Iterative Reasoning**: The agent can analyze outputs, plan next steps, and generate new code in an iterative process. + +### 2. Secure Execution Environment +- **MicroPython WebAssembly Sandbox**: All generated Python code runs in a MicroPython environment compiled to WebAssembly. +- **Strong Sandboxing**: Provides isolation and protection from potentially harmful code execution. +- **Controlled Access**: Only explicitly exposed functions are available to the executed code. + +### 3. Type-Safe Tool Definitions +- **Zod Schema Validation**: Input and output validation using Zod schemas ensures type safety. +- **Clear Tool Documentation**: Automatically generates documentation for available tools. +- **Error Handling**: Robust error handling for tool execution with clear error messages. + +### 4. Multi-Model Support +- **Model Flexibility**: Support for both OpenAI (GPT-4o, GPT-4, etc.) and Anthropic (Claude) models. +- **Configurable Parameters**: Customizable model settings like temperature and token limits. +- **Easy Switching**: Simple environment variable configuration to change between providers. + +## Core Components + +### 1. Agent Class + +The `Agent` class is the primary interface for creating and managing AI agents: + +```typescript +// Creating an agent +const agent = await Agent.create("Research the weather in Paris"); + +// Run agent steps with provided tools +const answer = await agent.step({ + fetch_weather: fetchWeather, + search_web: searchWeb, + // other tools... +}); +``` + +Key methods: +- `create(goal: string)`: Creates a new agent with a specified goal +- `step(tools: Record)`: Runs one step of the agent with provided tools +- `getSteps()`: Returns the agent's step history + +### 2. PythonSandbox Class + +The `PythonSandbox` class provides a secure execution environment for Python code: + +```typescript +// Create a sandbox instance +const sandbox = await PythonSandbox.create(); + +// Expose a function to the sandbox +sandbox.expose("fetch_data", myToolDefinition); + +// Run Python code in the sandbox +const result = await sandbox.runCode("data = await fetch_data({'url': 'example.com'})"); +``` + +Key methods: +- `create()`: Creates a new sandbox instance +- `expose(name: string, func: ExposedFunction)`: Exposes a function to the sandbox +- `runCode(code: string)`: Executes Python code in the sandbox + +### 3. Tool Definition System + +Tools are defined using a type-safe API: + +```typescript +const fetchWeather = defineTool( + "Fetch weather data for a location", + z.object({ location: z.string() }), + z.object({ temperature: z.number(), conditions: z.string() }), + async (input, { log }) => { + // Implementation... + return ok({ temperature: 22, conditions: "Sunny" }); + } +); +``` + +Each tool includes: +- Description: Clear documentation of the tool's purpose +- Input schema: Zod schema defining expected input parameters +- Output schema: Zod schema defining the return value structure +- Implementation function: Async function that performs the actual work + + +## Built-in Tools + +### 1. Web Search +The `search_web` tool provides web search capabilities: + +```python +results = await search_web({"query": "latest news about AI"}) +``` + +### 2. Web Scraping +The `scrape_pages` tool extracts content from web pages: + +```python +content = await scrape_pages({"urls": ["https://example.com"]}) +``` + +### 3. Weather Information +The `fetch_weather` tool retrieves weather data: + +```python +weather = await fetch_weather({"location": "New York"}) +``` + +## Utility Components + +### Logging System + +The `Logger` class in `utils/logger.ts` provides a configurable logging system: + +```typescript +import { logger, LogLevel } from "./utils/logger"; + +// Set log level (ERROR, WARN, INFO, DEBUG, TRACE) +logger.setLevel(LogLevel.DEBUG); + +// Basic logging +logger.info("This is an informational message"); +logger.error("An error occurred: %s", errorMessage); +logger.debug("Debug data: %o", debugObject); + +// Configure logger options +logger.setTimestamps(false); // Disable timestamps in log output +logger.setShowLevel(false); // Hide log level in output + +// Create a custom logger instance +const customLogger = new Logger({ + level: LogLevel.WARN, + timestamps: true, + showLevel: true, + outputFn: (message, level) => { + // Custom output function + myLoggingService.log(message, level); + } +}); +``` + +This logging system replaces direct `console.log` calls and provides: + +- Multiple severity levels (ERROR, WARN, INFO, DEBUG, TRACE) +- Configurable formatting (timestamps, level indicators) +- Support for string interpolation with %s, %d, %o, etc. +- Customizable output destinations through outputFn +- Runtime configuration + +## Usage Examples + +### Basic Usage +```typescript +import { Agent } from "./agent"; +import { fetchWeather, searchWeb, scrapePages } from "./tools"; + +async function main() { + // Get the query from command line arguments + const request = process.argv[2]; + if (!request) { + console.error("Please provide a request as a command line argument"); + process.exit(1); + } + + // Create an agent with the query + const agent = await Agent.create(request); + + // Define available tools + const tools = { + fetch_weather: fetchWeather, + search_web: searchWeb, + scrape_pages: scrapePages, + }; + + // Run the agent until it has an answer + let answer = null; + while (answer === null) { + answer = await agent.step(tools); + } + + // Display the final answer + console.log("\nFinal answer:"); + console.log(answer); +} + +main().catch(error => { + console.error("Error:", error); + process.exit(1); +}); +``` + +### Using Custom Tools +```typescript +import { Agent } from "./agent"; +import { defineTool } from "./tools/helpers"; +import { z } from "zod"; +import { ok } from "./tools/types"; + +// Define a custom tool +const calculator = defineTool( + "Performs basic arithmetic operations", + z.object({ + operation: z.enum(["add", "subtract", "multiply", "divide"]), + a: z.number(), + b: z.number() + }), + z.object({ result: z.number() }), + async (input, { log }) => { + const { operation, a, b } = input; + let result; + + switch (operation) { + case "add": result = a + b; break; + case "subtract": result = a - b; break; + case "multiply": result = a * b; break; + case "divide": + if (b === 0) return { type: "error", error: "Division by zero" }; + result = a / b; + break; + } + + log(`Calculated ${operation}: ${a} ${operation} ${b} = ${result}`); + return ok({ result }); + } +); + +async function runCalculatorAgent() { + const agent = await Agent.create("Calculate the result of complex math expressions"); + + const tools = { calculator }; + + let answer = null; + while (answer === null) { + answer = await agent.step(tools); + } + + console.log("Result:", answer); +} +``` + +## Future Vision + +Based on the TODO section in the README, the project aims to enhance the agent's capabilities: + +1. **Enhanced Agent Capabilities**: + - Support for "artifacts" (named documents that can be passed to the agent) + - Ability to control when the agent can stop execution + - Improved reasoning and token management + +2. **Research and Content Generation**: + - Web search and information extraction + - Content processing tools for large documents + - Advanced scraping capabilities + +3. **Technical Enhancements**: + - Persistent state storage (Redis/PostgreSQL/filesystem) + - Improved error handling and recovery + +This vision positions the package as a powerful tool for generating, executing, and reasoning with Python code to solve complex tasks while maintaining a secure execution environment. + +## Improvement Suggestions + +The following improvements would enhance the codebase's architecture, security, and maintainability: + +1. **Architectural Improvements**: + - Reduce coupling between the Agent and PythonSandbox classes + - Split the Agent class into smaller components with single responsibilities + - Define clear interfaces for key components to improve testability + - ✅ Make model selection configurable rather than hardcoded + +2. **Code Quality**: + - ✅ Replace `any` types with proper TypeScript definitions + - ✅ Implement consistent error handling with proper context information + - ✅ Replace direct console.log statements with a configurable logging system + - ✅ Add proper validation and defaults for environment variables + +3. **Security Enhancements**: + - Implement input validation for all external inputs (URLs, API parameters) + - Add resource limits to the sandbox (memory, execution time) + - Improve handling of API keys and sensitive information + - Implement proper security boundaries for the sandbox + +4. **Testing and Documentation**: + - Expand test coverage, especially for integration scenarios + - Add end-to-end tests for complete system behavior + - Improve JSDoc comments for all public APIs + - Add architectural documentation with component diagrams + +These improvements would significantly enhance the codebase's maintainability, security, and extensibility without changing its core concepts. + +## Implementation Progress + +### ✅ Configurable Logging System (Completed) + +A configurable logging system has been implemented in `utils/logger.ts` to replace direct console.log statements. This system provides: + +- Different log levels (ERROR, WARN, INFO, DEBUG, TRACE) +- Environment variable configuration (LOG_LEVEL) +- Formatted output with timestamps and level indicators +- String interpolation for cleaner log messages +- Customizable output functions + +Usage example: + +```typescript +import { logger, LogLevel } from "./utils/logger"; + +// Set log level +logger.setLevel(LogLevel.DEBUG); + +// Log messages at different levels +logger.error("Critical error: %s", errorMessage); +logger.warn("Warning: The operation may be slow"); +logger.info("Processing file: %s", filename); +logger.debug("Request payload: %o", payload); + +// Configure output format +logger.setTimestamps(false); // Disable timestamps +logger.setShowLevel(false); // Hide log level +``` + +The Agent and main.ts files have been updated to use this logging system, providing better control over verbosity and output format. + +### ✅ Consistent Error Handling (Completed) + +A comprehensive error handling system has been implemented in `utils/errors.ts` to provide consistent error handling with proper context information. The system includes: + +- **Custom Error Classes**: A hierarchy of error classes for different types of errors, each with specific context fields. +- **Context Information**: All errors can have context information attached to provide details for debugging. +- **Error Wrapping**: Utility functions to wrap unknown errors in structured format. +- **Integration with Logger**: Special error logging methods that display context information. + +Key error classes: +- `AppError`: Base error class with context support +- `ValidationError`: For input validation failures +- `ConfigurationError`: For configuration and environment issues +- `APIError`: For issues with external API calls +- `SandboxError`: For Python code execution failures +- `ToolError`: For errors in tool execution + +Usage example: + +```typescript +import { ValidationError, APIError, wrapError } from "./utils/errors"; +import { logger } from "./utils/logger"; + +// Create an error with context +const validationError = new ValidationError("Invalid city name") + .addContext({ + providedValue: city, + expectedFormat: "non-empty string" + }); + +// Log the error with full context +logger.logError(validationError); + +// Wrap an unknown error +try { + await someOperation(); +} catch (error) { + const wrappedError = wrapError(error, "Operation failed"); + wrappedError.addContext({ + operation: "someOperation", + input: JSON.stringify(input) + }); + logger.logError(wrappedError); +} +``` + +This error handling system is now integrated throughout the codebase, including: +- Sandbox code execution +- Tool implementations (especially API calls) +- Agent steps and API interactions +- Configuration validation + +### ✅ Type-Safe Code with Proper TypeScript Definitions (Completed) + +The codebase has been updated to use proper TypeScript definitions, eliminating `any` types and providing better type safety. The improvements include: + +- **Generic Type Parameters**: Tools and functions now use generic type parameters for better type checking. +- **Defined Interfaces**: Well-defined interfaces for key data structures and APIs. +- **JSON Value Type**: A proper type for JSON values that can be passed between JavaScript and Python. +- **Type Guards**: Added type guards to ensure type safety when dealing with unknown data. +- **Type-Safe API Design**: Redesigned APIs to use proper TypeScript features. + +Key type definitions: + +```typescript +// JSON value type for Python/JavaScript interop +export type JsonValue = + | string + | number + | boolean + | null + | JsonValue[] + | { [key: string]: JsonValue }; + +// Type-safe tool definition +export interface Tool { + fn: (input: TInput, context: ToolContext) => Promise>; + input: z.ZodType; + output: z.ZodType; + description: string; +} + +// Type-safe sandbox function exposure +export interface ExposedFunction { + fn: (input: TInput) => Promise; + input: z.ZodType; + output: z.ZodType; + description: string; +} +``` + +These changes improve: +- Compile-time type checking +- Code editor autocompletion and IntelliSense +- Refactoring safety +- Documentation through types +- Developer experience + +### ✅ Configurable Model Selection (Completed) + +A flexible model configuration system has been implemented to support both OpenAI and Anthropic models: + +- **Model Configuration**: Centralized configuration in `utils/config.ts` for all model settings +- **Environment Variables**: Support for environment variables to control model selection and parameters +- **Provider Abstraction**: An LLM service layer in `services/llm.ts` that abstracts provider-specific implementation details +- **Default Values**: Sensible defaults for all model parameters + +The system supports multiple configuration options: +- `AI_PROVIDER`: Choose between 'openai' and 'anthropic' +- `AI_MODEL`: Specify the exact model version to use +- `AI_TEMPERATURE`: Control the temperature parameter (0.0 to 1.0) +- `AI_MAX_TOKENS`: Set the maximum tokens to generate + +Latest models supported: +- OpenAI: gpt-4o, gpt-4-turbo, gpt-4, gpt-3.5-turbo +- Anthropic: claude-3-7-sonnet-20250219, claude-3-5-sonnet-20241022, claude-3-5-haiku-20241022, etc. + +Usage example: + +```bash +# Use OpenAI GPT-4o +AI_PROVIDER=openai AI_MODEL=gpt-4o bun run main.ts "What's the weather in Paris?" + +# Use Anthropic Claude 3.7 Sonnet +AI_PROVIDER=anthropic AI_MODEL=claude-3-7-sonnet-20250219 bun run main.ts "What's the weather in Paris?" +``` + +### ✅ Improved Prompt Management (Completed) + +The prompt management system has been improved for better organization and maintainability: + +- **Centralized Prompts**: All prompts are now defined in `agent/prompts.ts` with clear structure +- **Documented Prompts**: Each prompt has JSDoc comments explaining its purpose and usage +- **Modular Structure**: Separate prompt constants for different stages of the agent lifecycle +- **Functional Approach**: Function-based templates for prompts that need dynamic content + +Key prompts: +- `systemPrompt`: Defines the agent's role and response format +- `firstStepPrompt`: Instructions for the first step of execution +- `continuePrompt`: Template for continuing after code execution +- `toolDocsPrompt`: Template for tool documentation +- `finalAnswerPrompt`: Prompt for generating the final answer + +This structure improves: +- Code organization and readability +- Prompt maintenance and versioning +- Documentation of prompt purposes and structures + +## Installation + +1. Clone the repository +2. Install dependencies: +```bash +bun install +``` +3. Create a `.env` file with your API keys and configuration: +``` +# Required API keys +OPENAI_API_KEY=your_openai_api_key_here +ANTHROPIC_API_KEY=your_anthropic_api_key_here +SERPAPI_API_KEY=your_serpapi_key_here +FIRECRAWL_API_KEY=your_firecrawl_key_here + +# LLM Configuration +AI_PROVIDER=openai # openai or anthropic +AI_MODEL=gpt-4o # For OpenAI: gpt-4o, gpt-4-turbo, gpt-4, gpt-3.5-turbo + # For Anthropic: claude-3-7-sonnet-20250219, claude-3-5-sonnet-20241022, etc. +AI_TEMPERATURE=0.0 # 0.0 to 1.0 +AI_MAX_TOKENS=4096 # Maximum tokens to generate + +# Logging +LOG_LEVEL=INFO # ERROR, WARN, INFO, DEBUG, TRACE +``` + +## Development + +To run tests: +```bash +bun test +``` \ No newline at end of file diff --git a/x/henry/mp-sandbox-agent/README.md b/x/henry/mp-sandbox-agent/README.md index 9c80d75b80a8..f2e610164b4f 100644 --- a/x/henry/mp-sandbox-agent/README.md +++ b/x/henry/mp-sandbox-agent/README.md @@ -17,10 +17,68 @@ An AI agent that generates and executes Python code, inspired by [CodeAct (Wang bun install ``` -3. Create a `.env` file in the root directory with your OpenAI API key: +3. Create a `.env` file in the root directory with your API keys and configuration: ``` -OPENAI_API_KEY=your_api_key_here +# Required API keys +OPENAI_API_KEY=your_openai_api_key_here +ANTHROPIC_API_KEY=your_anthropic_api_key_here +SERPAPI_API_KEY=your_serpapi_key_here +FIRECRAWL_API_KEY=your_firecrawl_key_here + +# LLM Configuration +AI_PROVIDER=openai # openai or anthropic +AI_MODEL=gpt-4o # See .env.example for available models +AI_TEMPERATURE=0.0 # 0.0 to 1.0 +AI_MAX_TOKENS=4096 # Maximum tokens to generate + +# Logging +LOG_LEVEL=INFO # ERROR, WARN, INFO, DEBUG, TRACE +``` + +See `.env.example` for a full list of configuration options. + +## Usage + +Run the agent with a query: + +```bash +bun start "What's the weather in Paris?" +``` + +You can set the log level using the LOG_LEVEL environment variable: + +```bash +LOG_LEVEL=DEBUG bun start "What's the weather in Paris?" +``` + +Available log levels: ERROR, WARN, INFO, DEBUG, TRACE + +Or programmatically: + +```typescript +import { Agent } from "./agent"; +import { fetchWeather, searchWeb } from "./tools"; +import { logger, LogLevel } from "./utils/logger"; + +async function main() { + // Configure the logger + logger.setLevel(LogLevel.INFO); + + const agent = await Agent.create("What's the weather in Paris?"); + + const tools = { + fetch_weather: fetchWeather, + search_web: searchWeb, + }; + + let answer = null; + while (answer === null) { + answer = await agent.step(tools); + } + + logger.info("Final answer: %s", answer); +} ``` ## Development @@ -30,3 +88,42 @@ To run tests: ```bash bun test ``` + +For more detailed documentation, see [DOCUMENTATION.md](./DOCUMENTATION.md) + +## TODO + +Future enhancements to consider: + +- [agent] Final summary config: disable or configure prompt +- [agent] "Artifacts" support (list of named docs/strings that can be passed to agent.step() that are shown to the agent) +- [agent] Ability to pass "canStopExecution" to agent.step() - if true, the agent will have access to the stop execution tool during that step +- [helpers] Add a tokenizer for better token management +- Create an "Extract from page" tool that scrapes a page, processes it by 32k token chunks, and extracts relevant information +- Implement a more robust web search and content processing system +- Add ability to persist agent state to redis/postgres/filesystem for better recovery and continuation of long-running tasks + +## Improvement Status + +This project is currently undergoing improvements based on code review feedback: + +✅ **Completed**: +- Configurable logging system (replacing direct console.log statements) +- Consistent error handling with proper context information +- Replace `any` types with proper TypeScript definitions +- Configurable model selection with support for OpenAI and Anthropic +- Improved environment variable validation +- Better prompt organization and management + +🔄 **In Progress**: +- None currently + +⏳ **Pending**: +- Reduce coupling between Agent and PythonSandbox classes +- Implement input validation for external inputs +- Add resource limits to the sandbox +- Improve handling of API keys +- Expand test coverage +- Improve JSDoc comments + +For more details, see the [Implementation Progress](./DOCUMENTATION.md#implementation-progress) section in the documentation. diff --git a/x/henry/mp-sandbox-agent/agent.ts b/x/henry/mp-sandbox-agent/agent.ts deleted file mode 100644 index 77aa04245a11..000000000000 --- a/x/henry/mp-sandbox-agent/agent.ts +++ /dev/null @@ -1,116 +0,0 @@ -import OpenAI from "openai"; -import { PythonSandbox } from "./sandbox"; -import type { Tool } from "./tools/types"; -import { z } from "zod"; - -function generateFunctionDocs(functions: Record): string { - let docs = "Available functions:\n"; - - for (const [fnName, { description, input, output }] of Object.entries( - functions - )) { - // Function signature with description - const inputObject = input as z.ZodObject; - const outputObject = output as z.ZodObject; - - docs += `- ${fnName}(${Object.keys(inputObject.shape).join( - ", " - )}): async function that ${description}\n`; - - // Input parameters - docs += " Parameters:\n"; - for (const [paramName, paramSchema] of Object.entries(inputObject.shape)) { - const zodSchema = paramSchema as z.ZodType; - docs += ` * ${paramName} (${zodSchema.description || "any"})\n`; - } - - // Output fields - docs += " Returns an object with fields:\n"; - for (const [fieldName, fieldSchema] of Object.entries(outputObject.shape)) { - const zodSchema = fieldSchema as z.ZodType; - docs += ` * ${fieldName} (${zodSchema.description || "any"})\n`; - } - } - - return docs; -} - -export class Agent { - private sandbox!: PythonSandbox; - private openai: OpenAI; - private exposedTools: Set = new Set(); - private goal: string; - - private constructor(goal: string, apiKey: string) { - this.goal = goal; - this.openai = new OpenAI({ apiKey }); - } - - static async create(goal: string, apiKey: string): Promise { - const agent = new Agent(goal, apiKey); - agent.sandbox = await PythonSandbox.create(); - return agent; - } - - private generateSystemPrompt(tools: Record): string { - return ( - "You are a Python code generator working towards the following goal:\n" + - this.goal + - "\n\n" + - "Your response should follow this format:\n\n" + - "1. (Optional) A brief explanation of what the code will do and why, in plain text\n" + - "2. A Python code block that:\n" + - " - Contains no imports\n" + - " - Contains only top-level statements (no function definitions)\n" + - " - Can use await expressions directly (top-level await is supported)\n" + - " - Contains no comments\n" + - " - Is simple and self-contained\n\n" + - generateFunctionDocs(tools) + - "\n" + - "Example response format:\n" + - "This code will fetch and display the current weather in London.\n\n" + - "```python\n" + - "weather = await fetch_weather('London')\n" + - 'print(f\'Weather in {weather["city"]}: {weather["temperature"]}°C\')\n' + - "```" - ); - } - - async step( - tools: Record, - input: string - ): Promise<{ stdout: string; stderr: string }> { - // Expose or update tools - for (const [name, tool] of Object.entries(tools)) { - this.sandbox.expose(name, tool); - this.exposedTools.add(name); - } - - const response = await this.openai.chat.completions.create({ - model: "o3-mini", - messages: [ - { - role: "system", - content: this.generateSystemPrompt(tools), - }, - { - role: "user", - content: input, - }, - ], - }); - - if (!response.choices[0].message.content) { - throw new Error("No code generated from OpenAI"); - } - - // Extract code from the response - const content = response.choices[0].message.content; - const codeMatch = content.match(/```python\n([\s\S]*?)```/) || - content.match(/```\n([\s\S]*?)```/) || [null, content]; - const code = codeMatch[1].trim(); - - // Execute the code - return await this.sandbox.runCode(code); - } -} diff --git a/x/henry/mp-sandbox-agent/agent/helpers.test.ts b/x/henry/mp-sandbox-agent/agent/helpers.test.ts new file mode 100644 index 000000000000..5934129bc26c --- /dev/null +++ b/x/henry/mp-sandbox-agent/agent/helpers.test.ts @@ -0,0 +1,115 @@ +import { describe, expect, it } from "bun:test"; +import { z } from "zod"; +import { generateToolDocs } from "./helpers"; +import { defineTool } from "../tools/helpers"; +import { ToolOutput } from "../tools/types"; +import type { AnyTool, Tool } from "../tools/types"; + +// Helper function to convert any Tool to AnyTool for testing +function asAnyTool>(tool: T): AnyTool { + return tool as unknown as AnyTool; +} + +describe("generateToolDocs", () => { + it("should generate docs for a simple tool", () => { + const simpleTool = defineTool( + "A simple test function", + z.object({ + name: z.string().describe("The name parameter"), + }), + z.string().describe("The return value"), + async () => ({ type: "success", result: "test" }) + ); + + const docs = generateToolDocs({ simpleTool: asAnyTool(simpleTool) }); + expect(docs).toContain( + "All functions listed may return None if they fail (check for None before accessing the result)" + ); + expect(docs).toContain("simpleTool(name): A simple test function"); + expect(docs).toContain("* name: The name parameter"); + expect(docs).toContain("Returns:"); + expect(docs).toContain("The return value"); + }); + + it("should generate docs for a tool with complex types", () => { + const complexTool = defineTool( + "A complex test function", + z.object({ + user: z + .object({ + name: z.string().describe("User's name"), + age: z.number().describe("User's age"), + }) + .describe("User object"), + options: z.array(z.string()).describe("List of options"), + }), + z.object({ + id: z.number().describe("User ID"), + settings: z + .array( + z.object({ + key: z.string().describe("Setting key"), + value: z.string().describe("Setting value"), + }) + ) + .describe("User settings"), + }), + async () => ({ type: "success", result: { id: 1, settings: [] } }) + ); + + const docs = generateToolDocs({ complexTool: asAnyTool(complexTool) }); + expect(docs).toContain( + "complexTool(user, options): A complex test function" + ); + expect(docs).toContain("* user: dictionary with keys:"); + expect(docs).toContain(" * name: User's name"); + expect(docs).toContain(" * age: User's age"); + expect(docs).toContain("* options: list of string"); + expect(docs).toContain("Returns:"); + expect(docs).toContain("dictionary with keys:"); + expect(docs).toContain("* id: User ID"); + expect(docs).toContain("* settings: list of dictionary with keys:"); + expect(docs).toContain(" * key: Setting key"); + expect(docs).toContain(" * value: Setting value"); + }); + + it("should handle multiple tools", () => { + const tool1 = defineTool( + "First tool", + z.object({ a: z.string() }), + z.number(), + async () => ({ type: "success", result: 1 }) + ); + + const tool2 = defineTool( + "Second tool", + z.object({ b: z.boolean() }), + z.string(), + async () => ({ type: "success", result: "test" }) + ); + + const docs = generateToolDocs({ + tool1: asAnyTool(tool1), + tool2: asAnyTool(tool2) + }); + expect(docs).toContain("tool1(a): First tool"); + expect(docs).toContain("tool2(b): Second tool"); + }); + + it("should handle tools with nested output types", () => { + const outputTool = defineTool( + "Output test function", + z.object({ input: z.string() }), + z.string(), + async () => ({ type: "success", result: "test" }) + ); + + const docs = generateToolDocs({ outputTool: asAnyTool(outputTool) }); + // Should only show the success case type + expect(docs).toContain("Returns:"); + expect(docs).toContain("string"); + // Should not show the discriminated union structure + expect(docs).not.toContain("type:"); + expect(docs).not.toContain("result:"); + }); +}); diff --git a/x/henry/mp-sandbox-agent/agent/helpers.ts b/x/henry/mp-sandbox-agent/agent/helpers.ts new file mode 100644 index 000000000000..8060993c4152 --- /dev/null +++ b/x/henry/mp-sandbox-agent/agent/helpers.ts @@ -0,0 +1,83 @@ +import type { AnyTool } from "../tools/types"; +import { z } from "zod"; + +/** + * Converts a Zod schema to a readable description string + * @param schema The Zod schema to describe + * @param indent Indentation level for nested schemas + * @returns A human-readable description of the schema + */ +function describeZodType(schema: z.ZodTypeAny, indent = ""): string { + if (schema instanceof z.ZodArray) { + return `list of ${describeZodType(schema.element, indent + " ")}`; + } else if (schema instanceof z.ZodObject) { + let desc = "dictionary with keys:\n"; + for (const [fieldName, fieldSchema] of Object.entries(schema.shape)) { + desc += `${indent} * ${fieldName}: ${describeZodType( + fieldSchema as z.ZodType, + indent + " " + ) + .split("\n") + .join("\n" + indent)}\n`; + } + return desc; + } else if (schema instanceof z.ZodUnion && schema.options.length === 2) { + // Check if this is a ToolOutput schema + const successCase = schema.options.find( + (opt: z.ZodTypeAny) => + opt instanceof z.ZodObject && + 'type' in opt.shape && + opt.shape.type instanceof z.ZodLiteral && + opt.shape.type.value === "success" + ) as z.ZodObject<{ type: z.ZodLiteral<"success">; result: z.ZodTypeAny }> | undefined; + + if (successCase?.shape.result) { + return describeZodType(successCase.shape.result, indent); + } + // If we can't handle this union type, just describe it as a union + return `union of ${schema.options + .map((opt: z.ZodType) => describeZodType(opt, indent + " ")) + .join(" | ")}`; + } else { + return ( + schema.description || + schema.constructor.name.replace("Zod", "").toLowerCase() || + "any" + ); + } +} + +/** + * Generates documentation for tools that can be used in Python code + * @param tools Dictionary of tools to document + * @returns A string containing documentation for all tools + */ +export function generateToolDocs(tools: Record): string { + let docs = + "Note: \n" + + "- All functions listed may return None if they fail (check for None before accessing the result)\n" + + "- All functions listed here are asynchronous and must be always be awaited, even if they don't return anything or you don't care about the result.\n"; + + for (const [fnName, { description, input, output }] of Object.entries(tools)) { + // Check that input is an object schema + if (!(input instanceof z.ZodObject)) { + continue; + } + + // Function signature with description + const paramNames = Object.keys(input.shape); + docs += `- ${fnName}(${paramNames.join(", ")}): ${description}\n`; + + // Input parameters + docs += " Parameters:\n"; + for (const [paramName, paramSchema] of Object.entries(input.shape)) { + docs += ` * ${paramName}: ${describeZodType(paramSchema as z.ZodTypeAny, " ")}\n`; + } + + // Output fields + docs += " Returns:\n"; + docs += describeZodType(output, " "); + } + + return docs; +} diff --git a/x/henry/mp-sandbox-agent/agent/index.ts b/x/henry/mp-sandbox-agent/agent/index.ts new file mode 100644 index 000000000000..9a5c2ad615d5 --- /dev/null +++ b/x/henry/mp-sandbox-agent/agent/index.ts @@ -0,0 +1,302 @@ +import { PythonSandbox } from "../sandbox"; +import type { Tool, AnyTool } from "../tools/types"; +import { generateToolDocs } from "./helpers"; +import { z } from "zod"; +import { defineTool } from "../tools/helpers"; +import { systemPrompt, firstStepPrompt, continuePrompt, toolDocsPrompt, finalAnswerPrompt } from "./prompts"; +import { logger } from "../utils/logger"; +import { + ValidationError, + APIError, + SandboxError, + wrapError +} from "../utils/errors"; +import { LLMService } from "../services/llm"; +import type { Message } from "../services/llm"; +import { loadModelConfig } from "../utils/config"; +import type { ModelConfig } from "../utils/config"; + +/** + * Represents a single step in the agent's execution + */ +interface StepResult { + /** The text generated by the agent */ + generation: string; + /** The output from executing the generated code */ + codeOutput: string; +} + +/** + * An AI agent that generates and executes Python code to solve tasks + */ +export class Agent { + /** The sandbox for executing Python code */ + private sandbox!: PythonSandbox; + /** The LLM service for generating code */ + private llmService: LLMService; + /** The model configuration */ + private modelConfig: ModelConfig; + /** Set of tool names that have been exposed to the sandbox */ + private exposedTools: Set = new Set(); + /** The goal the agent is trying to achieve */ + private goal: string; + /** History of all steps taken by the agent */ + private steps: StepResult[] = []; + /** Whether the agent should continue or return a final answer */ + private shouldContinue = true; + + private constructor(goal: string, modelConfig: ModelConfig) { + this.goal = goal; + this.modelConfig = modelConfig; + this.llmService = new LLMService(modelConfig); + } + + /** + * Creates a special tool that allows the agent to stop execution and provide a final answer + * @returns A Tool that signals the end of execution + */ + private getFinalExecutionTool(): Tool, null> { + return defineTool( + "Must be used when the execution logs contain enough information to provide a final answer to the user." + + "After using this function, the user will ask you to write a final answer based on your execution logs. " + + "This function must be awaited like any other function.", + z.object({}), + z.null(), + async () => { + this.shouldContinue = false; + return { type: "success", result: null }; + } + ); + } + + static async create(goal: string): Promise { + logger.separator(); + logger.info(`Creating agent with goal: ${goal}`); + logger.separator(); + + // Load model configuration from environment variables + const modelConfig = loadModelConfig(); + logger.info(`Using ${modelConfig.provider} with model: ${modelConfig.model}`); + + const agent = new Agent(goal, modelConfig); + agent.sandbox = await PythonSandbox.create(); + return agent; + } + + /** + * Executes one step of the agent, generating and running Python code + * @param _tools Dictionary of tools to make available to the agent + * @returns The final answer if the agent decides to stop, or null if it needs to continue + */ + async step(_tools: Record): Promise { + const tools = { ..._tools }; + if (Object.keys(tools).some((name) => name === "stop_execution")) { + throw new ValidationError("Reserved tool name cannot be used") + .addContext({ + reservedToolName: "stop_execution", + providedTools: Object.keys(tools) + }); + } + + // Add stop_execution tool, converting to AnyTool for compatibility + const stopTool = this.getFinalExecutionTool(); + tools["stop_execution"] = stopTool as unknown as AnyTool; + + // Expose or update tools + const errors: Array<{ tool: string; error: string }> = []; + const logs: Array = []; + + for (const [name, tool] of Object.entries(tools)) { + this.sandbox.expose(name, { + ...tool, + fn: async (input: unknown) => { + // Call the tool with the input and a logging context + const result = await tool.fn(input, { + log: (message: string) => { + logs.push(message + "\n"); + }, + }); + + // If successful, return the result + if (result.type === "success") { + return result.result; + } + + // If there was an error, record it and return null + errors.push({ tool: name, error: result.error }); + return null; + }, + }); + + // Keep track of which tools have been exposed + this.exposedTools.add(name); + } + + // Initialize messages with system and user prompts + const messages: Message[] = [ + { + role: "system", + content: systemPrompt, + }, + { + role: "user", + content: this.goal, + }, + ]; + + // Add messages for each previous step + for (const step of this.steps) { + // Add the assistant's response from the previous step + messages.push({ + role: "assistant", + content: step.generation, + }); + + // Add the user's response with the code output + messages.push({ + role: "user", + content: continuePrompt(step.codeOutput), + }); + } + + // For the first step, add instructions to begin with analysis and code block + if (!this.steps.length) { + messages[messages.length - 1].content += firstStepPrompt; + } + + // Add tool documentation to the last message + messages[messages.length - 1].content += toolDocsPrompt(generateToolDocs(tools)); + + logger.separator(); + logger.debug("Messages:"); + logger.debug(JSON.stringify(messages, null, 2)); + logger.separator(); + + let llmResponse; + try { + llmResponse = await this.llmService.generateCompletion(messages); + } catch (error) { + // The LLMService already wraps the error, so we just rethrow it + throw error; + } + + // Extract code from the response + const content = llmResponse.content; + logger.separator(); + logger.info("Code generation response:"); + logger.info(content); + logger.separator(); + + const codeMatch = content.match(/```python\n([\s\S]*?)```/) || + content.match(/```\n([\s\S]*?)```/) || [null, content]; + const code = codeMatch[1].trim(); + + // Execute the code + // Execute the code with improved error handling + const codeOutput = await (async () => { + try { + // Run the code in sandbox + const codeOutput = await this.sandbox.runCode(code); + + // Format the outputs + let output = ""; + if (codeOutput.stdout) { + output += `STDOUT:\n${codeOutput.stdout}\n\n`; + } + if (logs.length > 0) { + output += `EXECUTION LOGS:\n${logs.join("\n")}\n\n`; + } + if (codeOutput.stderr) { + output += `STDERR:\n${codeOutput.stderr}\n\n`; + } + if (errors.length > 0) { + output += `ERRORS:\n${errors + .map((e) => `* ${e.tool}: ${e.error}`) + .join("\n")}\n\n`; + } + + if (!output) { + return "No output returned from the code."; + } + + return output; + } catch (error) { + // Log detailed error for debugging + if (error instanceof SandboxError) { + logger.debug("Sandbox execution failed with error:"); + logger.debug(`Message: ${error.message}`); + logger.debug(`Context: ${JSON.stringify(error.context, null, 2)}`); + logger.debug(`Stdout: ${error.stdout}`); + logger.debug(`Stderr: ${error.stderr}`); + + // Return formatted error for the model + return `STDERR:\n${error.stderr || error.message}`; + } else { + // For other errors, wrap them for consistent handling + const wrappedError = wrapError(error, "Code execution failed"); + logger.debug(`Code execution error: ${wrappedError.message}`); + + // Return a user-friendly error message for the model + return `STDERR:\n${wrappedError.message}`; + } + } + })(); + + logger.separator(); + logger.info("Code output:"); + logger.info(codeOutput); + logger.separator(); + + messages.push({ + role: "assistant", + content: content, + }); + + if (!this.shouldContinue) { + // Add the final answer prompt + messages.push({ + role: "user", + content: finalAnswerPrompt, + }); + + try { + const finalResponse = await this.llmService.generateCompletion(messages); + return finalResponse.content; + } catch (error) { + // Handle API errors in final response generation + if (error instanceof APIError) { + logger.logError(error.addContext({ + isFinalAnswer: true + })); + } else { + logger.logError(wrapError(error, "Failed to generate final response")); + } + + // Return a fallback response since this is the final step + return "I was unable to generate a final response due to an API error. Please check the execution logs for the information gathered so far."; + } + } + + const stepResult: StepResult = { + generation: content, + codeOutput: codeOutput, + }; + + logger.separator(); + logger.debug("Step result:"); + logger.debug(JSON.stringify(stepResult, null, 2)); + logger.separator(); + + this.steps.push(stepResult); + + return null; + } + + /** + * Gets the history of steps taken by the agent + * @returns Array of step results containing generation and execution output + */ + getSteps(): readonly StepResult[] { + return [...this.steps]; + } +} diff --git a/x/henry/mp-sandbox-agent/agent/prompts.ts b/x/henry/mp-sandbox-agent/agent/prompts.ts new file mode 100644 index 000000000000..3305b26b2592 --- /dev/null +++ b/x/henry/mp-sandbox-agent/agent/prompts.ts @@ -0,0 +1,63 @@ +/** + * This file contains all prompts used by the agent + * Each prompt is a separate exported constant with descriptive name + */ + +/** + * System prompt that establishes the agent's role, response format, + * and explains the Python environment and conversation structure + */ +export const systemPrompt = ` + +You are a research AI agent that uses Python code to work towards a goal that has been provided by the user. +You use as many steps as you need to achieve the goal. You run a new piece of code at each step. + + + +Your response must always follow this format: +1. An analysis of the situation, in plain text. Explain what you need to do next to achieve the goal. +2. A Python code block that: + - Contains no imports + - Contains only top-level statements (no function definitions) + - Can use await expressions directly (top-level await is supported) + - Contains no comments + - Is simple and self-contained + + + +Your Python code is executed in a MicroPython sandbox which has access to a set of functions in addition to the builtins. +You do not need to import them, and you must never use the \`import\` keyword, which isn't available in the environment. +This environment supports top-level awaits. Your code does not need to be wrapped in an async function. +You don't need to import or initialize asyncio, it's already available and your code will be executed. + + + +After each step, the user will provide you your Execution Logs: the standard output and error logs generated by your code. +You will then be asked to continue working towards the goal by providing a new analysis of the situation, and a new Python code block. +Once you believe you have enough information to provide a final answer to the user, you must output a code block that uses the \`stop_execution\` function. + +`; + +/** + * Prompt for the first step, instructing the agent to begin with analysis and code + */ +export const firstStepPrompt = "\nPlease begin by an analysis and a python code block to achieve the goal.\n"; + +/** + * Prompt template for continuing after seeing code output + * Use codeOutput as a placeholder for the actual output + */ +export const continuePrompt = (codeOutput: string) => + `Here is the output of the code you generated:\n\n${codeOutput}\n\nPlease continue generating code.`; + +/** + * Prompt for tool documentation + * Use toolDocs as a placeholder for the actual tool documentation + */ +export const toolDocsPrompt = (toolDocs: string) => + `\n\nYou currently have access to the following function:\n${toolDocs}`; + +/** + * Prompt for requesting a final answer + */ +export const finalAnswerPrompt = "Please provide a comprehensive final answer to the goal based on the execution logs you have."; diff --git a/x/henry/mp-sandbox-agent/bun.lock b/x/henry/mp-sandbox-agent/bun.lock index 65a8c8040311..a60b231c8257 100644 --- a/x/henry/mp-sandbox-agent/bun.lock +++ b/x/henry/mp-sandbox-agent/bun.lock @@ -4,9 +4,12 @@ "": { "name": "mp-sandbox", "dependencies": { + "@anthropic-ai/sdk": "^0.39.0", "@micropython/micropython-webassembly-pyscript": "^1.24.1", "dotenv": "^16.4.7", + "google-search-results-nodejs": "^2.1.0", "openai": "^4.85.1", + "tiktoken": "^1.0.20", "zod": "^3.24.2", }, "devDependencies": { @@ -18,6 +21,8 @@ }, }, "packages": { + "@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.39.0", "", { "dependencies": { "@types/node": "^18.11.18", "@types/node-fetch": "^2.6.4", "abort-controller": "^3.0.0", "agentkeepalive": "^4.2.1", "form-data-encoder": "1.7.2", "formdata-node": "^4.3.2", "node-fetch": "^2.6.7" } }, "sha512-eMyDIPRZbt1CCLErRCi3exlAvNkBtRe+kW5vvJyef93PmNr/clstYgHhtvmkxN82nlKgzyGPCyGxrm0JQ1ZIdg=="], + "@micropython/micropython-webassembly-pyscript": ["@micropython/micropython-webassembly-pyscript@1.24.1", "", {}, "sha512-FC+Lv9TcwyBczC3FcyKYV/loxieXF7B7I5mwxA8wlmw8zm7RFHTL83kOSb06m0bOwgIkiSBwVlxd+2wTBM5NjA=="], "@types/bun": ["@types/bun@1.2.2", "", { "dependencies": { "bun-types": "1.2.2" } }, "sha512-tr74gdku+AEDN5ergNiBnplr7hpDp3V1h7fqI2GcR/rsUaM39jpSeKH0TFibRvU0KwniRx5POgaYnaXbk0hU+w=="], @@ -68,6 +73,8 @@ "get-proto": ["get-proto@1.0.1", "", { "dependencies": { "dunder-proto": "^1.0.1", "es-object-atoms": "^1.0.0" } }, "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g=="], + "google-search-results-nodejs": ["google-search-results-nodejs@2.1.0", "", {}, "sha512-7jEAoAV/Ful7Q2BxrYrkOst14aqwUvkEUwlRBrYATUylpp/bf3uE9lDgyk9brqbDt61hA2xkAm2sog9orHImVw=="], + "gopd": ["gopd@1.2.0", "", {}, "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg=="], "has-symbols": ["has-symbols@1.1.0", "", {}, "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ=="], @@ -92,6 +99,8 @@ "openai": ["openai@4.85.1", "", { "dependencies": { "@types/node": "^18.11.18", "@types/node-fetch": "^2.6.4", "abort-controller": "^3.0.0", "agentkeepalive": "^4.2.1", "form-data-encoder": "1.7.2", "formdata-node": "^4.3.2", "node-fetch": "^2.6.7" }, "peerDependencies": { "ws": "^8.18.0", "zod": "^3.23.8" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-jkX2fntHljUvSH3MkWh4jShl10oNkb+SsCj4auKlbu2oF4KWAnmHLNR5EpnUHK1ZNW05Rp0fjbJzYwQzMsH8ZA=="], + "tiktoken": ["tiktoken@1.0.20", "", {}, "sha512-zVIpXp84kth/Ni2me1uYlJgl2RZ2EjxwDaWLeDY/s6fZiyO9n1QoTOM5P7ZSYfToPvAvwYNMbg5LETVYVKyzfQ=="], + "tr46": ["tr46@0.0.3", "", {}, "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="], "typescript": ["typescript@5.7.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-84MVSjMEHP+FQRPy3pX9sTVV/INIex71s9TL2Gm5FG/WG1SqXeKyZ0k7/blY/4FdOzI12CBy1vGc4og/eus0fw=="], diff --git a/x/henry/mp-sandbox-agent/main.ts b/x/henry/mp-sandbox-agent/main.ts index 7ec53f237f59..4a1d73b58e66 100644 --- a/x/henry/mp-sandbox-agent/main.ts +++ b/x/henry/mp-sandbox-agent/main.ts @@ -1,47 +1,66 @@ -// hello.ts +// main.ts import * as dotenv from "dotenv"; import { fetchWeather } from "./tools/fetch_weather"; import { Agent } from "./agent"; +import { scrapePages } from "./tools/scrape"; +import { searchWeb } from "./tools/serp"; +import { logger, LogLevel } from "./utils/logger"; +import { ConfigurationError, wrapError } from "./utils/errors"; +import type { AnyTool } from "./tools/types"; +import { loadModelConfig } from "./utils/config"; // Load environment variables from .env file dotenv.config(); -const apiKey = process.env.OPENAI_API_KEY; -if (!apiKey) { - throw new Error( - "Please set the OPENAI_API_KEY environment variable in your .env file" - ); -} - -// Generate system prompt for the model - async function main() { - const request = process.argv[2]; - if (!request) { - console.error("Please provide a request as a command line argument"); + try { + // Set log level from environment variable or default to INFO + const logLevelStr = process.env.LOG_LEVEL?.toUpperCase() || 'INFO'; + const logLevel = LogLevel[logLevelStr as keyof typeof LogLevel] ?? LogLevel.INFO; + logger.setLevel(logLevel); + + // Load and validate model configuration (this will throw if required env vars are missing) + loadModelConfig(); + + const request = process.argv[2]; + if (!request) { + logger.error("Please provide a request as a command line argument"); + process.exit(1); + } + + const agent = await Agent.create(request); + // Convert typed tools to AnyTool for compatibility + const asAnyTool = (tool: T): AnyTool => tool as unknown as AnyTool; + + const tools: Record = { + fetch_weather: asAnyTool(fetchWeather), + scrape_pages: asAnyTool(scrapePages), + search_web: asAnyTool(searchWeb), + }; + + let answer: string | null = null; + while (answer === null) { + answer = await agent.step(tools); + } + + // Always show the final answer, even at ERROR level + const currentLevel = logger.getLevel(); + logger.setLevel(LogLevel.INFO); + logger.setTimestamps(false); + logger.setShowLevel(false); + logger.info("\nFinal answer:"); + logger.info(answer); + + // Reset logger settings + logger.setTimestamps(true); + logger.setShowLevel(true); + logger.setLevel(currentLevel); + } catch (error) { + // Wrap and log the error with full context + const wrappedError = wrapError(error, "Failed to execute agent"); + logger.logError(wrappedError, "Application terminated with error"); process.exit(1); } - - // Initialize agent with a goal - const agent = await Agent.create( - "Help users get weather information for cities around the world", - apiKey as string - ); - - // Define available tools - const tools = { - fetch_weather: fetchWeather, - }; - - // Run a step with the user's request - const { stdout, stderr } = await agent.step(tools, request); - - // Output results - if (stdout) console.log("\nOutput:", stdout); - if (stderr) console.log("\nErrors:", stderr); } -main().catch((error) => { - console.error("Error:", error); - process.exit(1); -}); +main(); diff --git a/x/henry/mp-sandbox-agent/package.json b/x/henry/mp-sandbox-agent/package.json index 5d6da8adf8bd..cc4fa5e61a97 100644 --- a/x/henry/mp-sandbox-agent/package.json +++ b/x/henry/mp-sandbox-agent/package.json @@ -2,6 +2,10 @@ "name": "mp-sandbox", "module": "index.ts", "type": "module", + "scripts": { + "test": "bun test sandbox.test.ts agent/helpers.test.ts", + "start": "bun main.ts" + }, "devDependencies": { "@types/bun": "latest" }, @@ -9,9 +13,12 @@ "typescript": "^5.0.0" }, "dependencies": { + "@anthropic-ai/sdk": "^0.39.0", "@micropython/micropython-webassembly-pyscript": "^1.24.1", "dotenv": "^16.4.7", + "google-search-results-nodejs": "^2.1.0", "openai": "^4.85.1", + "tiktoken": "^1.0.20", "zod": "^3.24.2" } } \ No newline at end of file diff --git a/x/henry/mp-sandbox-agent/sandbox.test.ts b/x/henry/mp-sandbox-agent/sandbox.test.ts index 2e88f2d06041..277725659a9f 100644 --- a/x/henry/mp-sandbox-agent/sandbox.test.ts +++ b/x/henry/mp-sandbox-agent/sandbox.test.ts @@ -11,15 +11,15 @@ describe("PythonSandbox", () => { }); test("should support importing and calling exposed functions", async () => { - const sandbox = await PythonSandbox.create("test"); + const sandbox = await PythonSandbox.create(); sandbox.expose("fake_function", { - fn: () => "Hello, World!", + fn: async () => "Hello, World!", input: z.object({}), output: z.string(), description: "A fake function that returns a string", }); const { stdout, stderr } = await sandbox.runCode( - "from test import fake_function\nprint(fake_function())" + "\nprint(await fake_function())" ); expect(stdout).toBe("Hello, World!\n"); expect(stderr).toBe(""); @@ -28,13 +28,13 @@ describe("PythonSandbox", () => { test("should support importing and calling exposed functions with arguments", async () => { const sandbox = await PythonSandbox.create("test"); sandbox.expose("add", { - fn: ({ a, b }: { a: number; b: number }) => a + b, + fn: async ({ a, b }: { a: number; b: number }) => a + b, input: z.object({ a: z.number(), b: z.number() }), output: z.number(), description: "Adds two numbers", }); const { stdout, stderr } = await sandbox.runCode( - "from test import add\nprint(add(1, 2))" + "\nprint(await add(1, 2))" ); expect(stdout).toBe("3\n"); expect(stderr).toBe(""); @@ -43,13 +43,13 @@ describe("PythonSandbox", () => { test("should support importing and calling exposed functions with positional arguments", async () => { const sandbox = await PythonSandbox.create("test"); sandbox.expose("sub", { - fn: ({ b, a }: { a: number; b: number }) => b - a, + fn: async ({ b, a }: { a: number; b: number }) => b - a, input: z.object({ b: z.number(), a: z.number() }), output: z.number(), description: "Subtracts two numbers", }); const { stdout, stderr } = await sandbox.runCode( - "from test import sub\nprint(sub(1, 2))" + "\nprint(await sub(1, 2))" ); expect(stdout).toBe("-1\n"); expect(stderr).toBe(""); @@ -58,13 +58,13 @@ describe("PythonSandbox", () => { test("should support importing and calling exposed functions with keyword arguments", async () => { const sandbox = await PythonSandbox.create("test"); sandbox.expose("multiply", { - fn: ({ a, b }: { a: number; b: number }) => a * b, + fn: async ({ a, b }: { a: number; b: number }) => a * b, input: z.object({ a: z.number(), b: z.number() }), output: z.number(), description: "Multiplies two numbers", }); const { stdout, stderr } = await sandbox.runCode( - "from test import multiply\nprint(multiply(a=1, b=2))" + "\nprint(await multiply(a=1, b=2))" ); expect(stdout).toBe("2\n"); expect(stderr).toBe(""); @@ -79,7 +79,7 @@ describe("PythonSandbox", () => { description: "Returns a string after a delay", }); const { stdout, stderr } = await sandbox.runCode( - "from test import async_function\nprint(await async_function())" + "\nprint(await async_function())" ); expect(stdout).toBe("Hello, World!\n"); expect(stderr).toBe(""); @@ -100,9 +100,37 @@ describe("PythonSandbox", () => { await sandbox.runCode("raise Exception('This is a test error')"); } catch (error) { expect(error).toBeInstanceOf(Error); - expect((error as Error).message).toBe( - 'Traceback (most recent call last):\n File "", line 2, in \nException: This is a test error\n' + expect((error as Error).message).toContain( + 'This is a test error' ); + // Make sure it contains the Python traceback + expect((error as Error).message).toContain('Traceback (most recent call last)'); } }); + test("should support list kw parameters", async () => { + const sandbox = await PythonSandbox.create("test"); + sandbox.expose("list_function", { + fn: async ({ l }) => l, + input: z.object({ l: z.array(z.string()) }), + output: z.array(z.string()), + description: "Returns a list of strings", + }); + const { stdout, stderr } = await sandbox.runCode( + "\nprint(await list_function(l=['a', 'b', 'c']))" + ); + expect(stdout).toBe("['a', 'b', 'c']\n"); + }); + test("should support list parameters", async () => { + const sandbox = await PythonSandbox.create("test"); + sandbox.expose("list_function", { + fn: async ({ l }) => l, + input: z.object({ l: z.array(z.string()) }), + output: z.array(z.string()), + description: "Returns a list of strings", + }); + const { stdout, stderr } = await sandbox.runCode( + "\nprint(await list_function(['a', 'b', 'c']))" + ); + expect(stdout).toBe("['a', 'b', 'c']\n"); + }); }); diff --git a/x/henry/mp-sandbox-agent/sandbox.ts b/x/henry/mp-sandbox-agent/sandbox.ts index acebd352c5aa..7493d9d90eb5 100644 --- a/x/henry/mp-sandbox-agent/sandbox.ts +++ b/x/henry/mp-sandbox-agent/sandbox.ts @@ -3,16 +3,51 @@ import { type MicroPythonInstance, } from "@micropython/micropython-webassembly-pyscript/micropython.mjs"; import * as z from "zod"; -import type { Tool } from "./tools/types"; +import { logger } from "./utils/logger"; +import { SandboxError, wrapError } from "./utils/errors"; export interface CodeExecutionResult { result: unknown; stdout: string; } +/** + * Represents a parsed JSON value (string, number, boolean, null, array, or object) + */ +export type JsonValue = + | string + | number + | boolean + | null + | JsonValue[] + | { [key: string]: JsonValue }; + +/** + * Represents a function that can be exposed to the sandbox + */ +export interface ExposedFunction< + TInput = unknown, + TOutput = unknown +> { + /** Function to execute when called from Python */ + fn: (input: TInput) => Promise; + /** Schema to validate and parse the input */ + input: z.ZodType; + /** Schema to validate and parse the output */ + output: z.ZodType; + /** Description of the function */ + description: string; +} + +/** + * Untyped version of ExposedFunction for internal use + */ +type AnyExposedFunction = ExposedFunction; + export class PythonSandbox { private mp!: MicroPythonInstance; - private exposedFunctions: { [key: string]: Tool } = {}; + private exposedFunctions: { [key: string]: AnyExposedFunction } = {}; + private module: Record = {}; private moduleId: string; private stdoutBuffer: string[] = []; private stderrBuffer: string[] = []; @@ -54,69 +89,135 @@ export class PythonSandbox { return { stdout, stderr }; } - expose(name: string, func: Tool) { - this.exposedFunctions[name] = func; - - const wrapper = (...args: unknown[]) => { - // Parse input according to schema - const inputObject = func.input as z.ZodObject; - const params = func.input.parse( - args.length === 1 && typeof args[0] === "object" - ? args[0] - : { - [Object.keys(inputObject.shape)[0]]: args[0], - [Object.keys(inputObject.shape)[1]]: args[1], - } - ); - return func.fn(params); + /** + * Expose a function to the Python environment + * @param name The name of the function in Python + * @param func The function to expose + */ + expose(name: string, func: ExposedFunction): void { + this.exposedFunctions[name] = func as AnyExposedFunction; + + // Create a wrapper function that handles JSON serialization/deserialization + const wrapper = (_args: string, _kwargs: string): Promise => { + // Parse input JSON strings + const args: JsonValue[] = JSON.parse(_args); + const kwargs: Record = JSON.parse(_kwargs); + + // Convert positional and keyword arguments to an object that can be validated + // against the input schema + const paramsObj: Record = {}; + + // Handle object schemas differently than other schemas + if (func.input instanceof z.ZodObject) { + // Map parameters from positional args and keyword args + for (const [i, key] of Object.keys(func.input.shape).entries()) { + if (key in kwargs) { + paramsObj[key] = kwargs[key]; + } else if (i < args.length) { + paramsObj[key] = args[i]; + } + } + } else { + // For non-object schemas, just use the first argument + if (args.length > 0) { + return Promise.resolve(JSON.stringify(args[0])); + } + } + + // Parse with the input schema to validate and transform + const params = func.input.parse(paramsObj); + + // Call the function + const result = func.fn(params); + + // Function to convert result to a JSON-compatible value + const serializeValue = (value: unknown): string | number | boolean => { + if ( + typeof value === "string" || + typeof value === "number" || + typeof value === "boolean" + ) { + return value; + } + return JSON.stringify(value); + }; + + // Handle async results + if (result instanceof Promise) { + return result.then(serializeValue); + } + + return Promise.resolve(serializeValue(result)); }; // Create an object to hold our exposed functions - const module = { [name]: wrapper }; - this.mp.registerJsModule(this.moduleId, module); + this.module[name] = wrapper; + this.mp.registerJsModule(this.moduleId, this.module); + } + + private generateWrapperFunction(name: string): string { + return ` +async def ${name}(*args, **kwargs): + args = json.dumps(args) + kwargs = json.dumps(kwargs) + + r = await _${name}(args, kwargs) + try: + return json.loads(r) + except: + return r`; + } + + private generateImports(): string { + const imports = ["import json"]; + + for (const name of Object.keys(this.exposedFunctions)) { + imports.push(`from ${this.moduleId} import ${name} as _${name}`); + imports.push(this.generateWrapperFunction(name)); + } + + return imports.join("\n"); } async runCode(code: string): Promise<{ stdout: string; stderr: string }> { - // Clear stdout and stderr buffers before running new code this.clearBuffers(); - - // Import exposed functions if any - const importCode = Object.keys(this.exposedFunctions) - .map((name) => `from ${this.moduleId} import ${name}`) - .join("\n"); + + const codeLength = code.length; + const codeSummary = code.length > 50 + ? `${code.substring(0, 47)}...` + : code; + + logger.debug(`Running code (${codeLength} chars): ${codeSummary}`); + const importCode = this.generateImports(); try { - // Run the actual code - await this.mp.runPythonAsync(`${importCode}\n${code.trim()}`); - - return this.getOutput(); + const fullCode = `${importCode}\n\n${code.trim()}`; + await this.mp.runPythonAsync(fullCode); + const output = this.getOutput(); + logger.debug(`Code execution successful with ${output.stdout.length} bytes of stdout`); + return output; } catch (error) { - // Get stdout before throwing + // Get stdout and stderr before creating error object const { stdout, stderr } = this.getOutput(); - - // Create a proper error object - let errorObj: Error; - if (error instanceof Error) { - errorObj = error; - } else if (typeof error === "string") { - errorObj = new Error(error); - } else { - errorObj = new Error(String(error)); - } - - // Add stdout and stderr to the error - Object.defineProperty(errorObj, "stdout", { - value: stdout, - enumerable: true, - writable: false, + + logger.debug(`Code execution failed with ${stderr.length} bytes of stderr`); + + // Create a SandboxError with context + const errorMessage = error instanceof Error ? error.message : String(error); + const sandboxError = new SandboxError( + `Python code execution failed: ${errorMessage}`, + stdout, + stderr, + { cause: error instanceof Error ? error : undefined } + ).addContext({ + codeLength, + codeSummary, + moduleId: this.moduleId, + hasStdout: stdout.length > 0, + hasStderr: stderr.length > 0 }); - Object.defineProperty(errorObj, "stderr", { - value: stderr, - enumerable: true, - writable: false, - }); - - throw errorObj; + + throw sandboxError; } } } diff --git a/x/henry/mp-sandbox-agent/services/llm.ts b/x/henry/mp-sandbox-agent/services/llm.ts new file mode 100644 index 000000000000..c301d53d5d89 --- /dev/null +++ b/x/henry/mp-sandbox-agent/services/llm.ts @@ -0,0 +1,167 @@ +import OpenAI from "openai"; +import Anthropic from "@anthropic-ai/sdk"; +import { type ModelConfig, type Provider } from "../utils/config"; +import { type ChatCompletionMessageParam } from "openai/resources/chat/completions"; +import { APIError } from "../utils/errors"; +import { logger } from "../utils/logger"; + +/** + * Message format common across different providers + */ +export interface Message { + role: "system" | "user" | "assistant"; + content: string; +} + +/** + * Response format from LLM providers + */ +export interface LLMResponse { + content: string; + totalTokens?: number; +} + +/** + * Service for interacting with various LLM providers + */ +export class LLMService { + private config: ModelConfig; + private openaiClient?: OpenAI; + private anthropicClient?: Anthropic; + + constructor(config: ModelConfig) { + this.config = config; + + // Initialize the appropriate client based on provider + if (config.provider === "openai") { + this.openaiClient = new OpenAI({ apiKey: config.apiKey }); + } else if (config.provider === "anthropic") { + this.anthropicClient = new Anthropic({ apiKey: config.apiKey }); + } + } + + /** + * Converts our internal message format to OpenAI's format + */ + private toOpenAIMessages(messages: Message[]): ChatCompletionMessageParam[] { + return messages.map(message => ({ + role: message.role, + content: message.content, + })); + } + + /** + * Converts our internal message format to Anthropic's format + */ + private toAnthropicMessages(messages: Message[]): Anthropic.Messages.MessageParam[] { + // Anthropic requires system message to be separate from the conversation + const systemMessage = messages.find(m => m.role === "system"); + const nonSystemMessages = messages.filter(m => m.role !== "system"); + + const anthropicMessages: Anthropic.Messages.MessageParam[] = nonSystemMessages.map(message => ({ + role: message.role === "user" ? "user" : "assistant", + content: message.content, + })); + + return anthropicMessages; + } + + /** + * Generate a completion from the selected LLM provider + */ + async generateCompletion(messages: Message[]): Promise { + try { + if (this.config.provider === "openai") { + return await this.generateOpenAICompletion(messages); + } else if (this.config.provider === "anthropic") { + return await this.generateAnthropicCompletion(messages); + } else { + throw new APIError(`Unsupported provider: ${this.config.provider}`) + .addContext({ supportedProviders: ["openai", "anthropic"] }); + } + } catch (error) { + // Enhance error reporting with provider-specific details + throw new APIError( + `Failed to generate response from ${this.config.provider}`, + error instanceof Error && 'status' in error ? (error as any).status : undefined, + { cause: error instanceof Error ? error : undefined } + ).addContext({ + provider: this.config.provider, + model: this.config.model, + messageCount: messages.length, + }); + } + } + + /** + * Generate a completion using OpenAI + */ + private async generateOpenAICompletion(messages: Message[]): Promise { + if (!this.openaiClient) { + throw new APIError("OpenAI client not initialized") + .addContext({ provider: "openai" }); + } + + const openaiMessages = this.toOpenAIMessages(messages); + + logger.debug(`Sending request to OpenAI with model: ${this.config.model}`); + const response = await this.openaiClient.chat.completions.create({ + model: this.config.model, + messages: openaiMessages, + temperature: this.config.temperature, + max_tokens: this.config.maxTokens, + }); + + if (!response.choices[0].message.content) { + throw new APIError("OpenAI returned empty response content") + .addContext({ + responseId: response.id, + model: this.config.model + }); + } + + return { + content: response.choices[0].message.content, + totalTokens: response.usage?.total_tokens, + }; + } + + /** + * Generate a completion using Anthropic + */ + private async generateAnthropicCompletion(messages: Message[]): Promise { + if (!this.anthropicClient) { + throw new APIError("Anthropic client not initialized") + .addContext({ provider: "anthropic" }); + } + + // Find system message + const systemMessage = messages.find(m => m.role === "system")?.content || ""; + const anthropicMessages = this.toAnthropicMessages(messages); + + logger.debug(`Sending request to Anthropic with model: ${this.config.model}`); + const response = await this.anthropicClient.messages.create({ + model: this.config.model, + messages: anthropicMessages, + system: systemMessage, + temperature: this.config.temperature, + max_tokens: this.config.maxTokens || 4096, + }); + + // Check for text content in the first content block + const content = response.content[0]; + if (!content || !('text' in content)) { + throw new APIError("Anthropic returned empty or invalid response content") + .addContext({ + responseId: response.id, + model: this.config.model, + contentType: content ? typeof content : 'undefined' + }); + } + + return { + content: content.text, + totalTokens: response.usage?.input_tokens + response.usage?.output_tokens, + }; + } +} \ No newline at end of file diff --git a/x/henry/mp-sandbox-agent/test-scrape.ts b/x/henry/mp-sandbox-agent/test-scrape.ts new file mode 100644 index 000000000000..7a7de03909ee --- /dev/null +++ b/x/henry/mp-sandbox-agent/test-scrape.ts @@ -0,0 +1,29 @@ +import * as dotenv from "dotenv"; +import { scrapePages } from "./tools/scrape"; +import { logger, LogLevel } from "./utils/logger"; + +// Load environment variables from .env file +dotenv.config(); + +// Set up logger +logger.setLevel(LogLevel.INFO); + +async function testScrape() { + console.log("Testing scrape tool with cleaner logging..."); + + const result = await scrapePages.fn( + { urls: ["https://example.com", "https://news.ycombinator.com"] }, + { log: (...args) => console.log(...args) } + ); + + console.log("\nTest completed. Tool returned:", + result.type === "success" + ? `Success with ${result.result.length} results` + : `Error: ${result.error}` + ); +} + +testScrape().catch(err => { + console.error("Test failed with error:", err); + process.exit(1); +}); \ No newline at end of file diff --git a/x/henry/mp-sandbox-agent/tools/fetch_weather.ts b/x/henry/mp-sandbox-agent/tools/fetch_weather.ts index 0ef926486b35..3f53f2dfa0a6 100644 --- a/x/henry/mp-sandbox-agent/tools/fetch_weather.ts +++ b/x/henry/mp-sandbox-agent/tools/fetch_weather.ts @@ -1,35 +1,72 @@ import { defineTool } from "./helpers"; import { z } from "zod"; +import { ok, err } from "./types"; +import { logger } from "../utils/logger"; +import { APIError, ValidationError } from "../utils/errors"; + +const WeatherSchema = z.object({ + city: z.string().describe("Full city name with country"), + temperature: z.number().describe("Temperature in °C"), + precipitation: z.number().describe("Precipitation in mm"), + weathercode: z.number().describe("WMO weather code"), + units: z + .object({ + temperature: z.string().describe("Temperature unit (e.g., °C)"), + precipitation: z.string().describe("Precipitation unit (e.g., mm)"), + }) + .describe("Measurement units for temperature and precipitation"), +}); export const fetchWeather = defineTool( - "Fetches current weather data for the specified city", + "Fetches current weather data for the specified city. " + + "Automatically logs results in the execution logs (no need to do it manually).", z.object({ city: z.string().describe("Name of the city to get weather for"), }), - z.object({ - city: z.string().describe("Full city name with country"), - temperature: z.number().describe("Temperature in °C"), - precipitation: z.number().describe("Precipitation in mm"), - weathercode: z.number().describe("WMO weather code"), - units: z - .object({ - temperature: z.string().describe("Temperature unit (e.g., °C)"), - precipitation: z.string().describe("Precipitation unit (e.g., mm)"), - }) - .describe("Measurement units for temperature and precipitation"), - }), - async ({ city }) => { + WeatherSchema, + async ({ city }, { log }) => { + logger.debug(`Fetching weather for city: "${city}"`); + + // Validate city parameter + if (!city.trim()) { + const validationError = new ValidationError("City name cannot be empty") + .addContext({ providedCity: city }); + logger.debug(`Weather validation error: ${validationError.message}`); + return err(`Invalid city name: ${validationError.message}`); + } + try { // First get coordinates for the city - const geocodeResponse = await fetch( - `https://geocoding-api.open-meteo.com/v1/search?name=${encodeURIComponent( - city - )}&count=1&language=en&format=json` - ); + const geocodeUrl = `https://geocoding-api.open-meteo.com/v1/search?name=${encodeURIComponent( + city + )}&count=1&language=en&format=json`; + + logger.debug(`Geocoding API request: ${geocodeUrl}`); + const geocodeResponse = await fetch(geocodeUrl); + + if (!geocodeResponse.ok) { + const apiError = new APIError( + "Geocoding API request failed", + geocodeResponse.status + ).addContext({ + city, + url: geocodeUrl, + statusText: geocodeResponse.statusText + }); + logger.debug(`Geocoding API error: ${apiError.message} (${apiError.statusCode})`); + return err(`Failed to geocode city: ${apiError.message}`); + } + const geocodeData = await geocodeResponse.json(); if (!geocodeData.results?.[0]) { - throw new Error(`City "${city}" not found`); + const cityNotFoundError = new ValidationError(`City "${city}" not found`) + .addContext({ + searchedCity: city, + responseData: JSON.stringify(geocodeData) + }); + logger.debug(`City not found: ${city}`); + return err(`City "${city}" not found in geocoding database`); } const { @@ -38,14 +75,52 @@ export const fetchWeather = defineTool( name: foundCity, country, } = geocodeData.results[0]; + + logger.debug(`Found city coordinates: ${foundCity}, ${country} (${latitude}, ${longitude})`); // Then get weather for those coordinates - const response = await fetch( - `https://api.open-meteo.com/v1/forecast?latitude=${latitude}&longitude=${longitude}¤t=temperature_2m,precipitation,weathercode` - ); + const weatherUrl = `https://api.open-meteo.com/v1/forecast?latitude=${latitude}&longitude=${longitude}¤t=temperature_2m,precipitation,weathercode`; + logger.debug(`Weather API request: ${weatherUrl}`); + + const response = await fetch(weatherUrl); + + if (!response.ok) { + const apiError = new APIError( + "Weather API request failed", + response.status + ).addContext({ + city: `${foundCity}, ${country}`, + coordinates: { latitude, longitude }, + url: weatherUrl, + statusText: response.statusText + }); + logger.debug(`Weather API error: ${apiError.message} (${apiError.statusCode})`); + return err(`Failed to fetch weather data: ${apiError.message}`); + } + const data = await response.json(); + + // Validate weather data + if (!data.current || + typeof data.current.temperature_2m !== 'number' || + typeof data.current.precipitation !== 'number' || + typeof data.current.weathercode !== 'number') { + const dataError = new APIError("Weather API returned invalid data format") + .addContext({ + response: JSON.stringify(data), + city: `${foundCity}, ${country}` + }); + logger.debug(`Weather data validation error: ${dataError.message}`); + return err(`Weather data for ${foundCity}, ${country} has invalid format`); + } + + log( + `Weather data for ${foundCity}, ${country}:\n${JSON.stringify(data)}` + ); + + logger.debug(`Successfully fetched weather for ${foundCity}, ${country}`); - return { + return ok({ city: `${foundCity}, ${country}`, temperature: data.current.temperature_2m, precipitation: data.current.precipitation, @@ -54,10 +129,20 @@ export const fetchWeather = defineTool( temperature: data.current_units.temperature_2m, precipitation: data.current_units.precipitation, }, - }; + }); } catch (error) { - console.error("Error fetching weather:", error); - return null; + const wrappedError = error instanceof Error + ? error + : new Error(String(error)); + + logger.debug(`Error in fetch_weather tool: ${wrappedError.message}`); + if (wrappedError.stack) { + logger.debug(`Stack trace: ${wrappedError.stack}`); + } + + return err( + `Error fetching weather: ${wrappedError.message}` + ); } } ); diff --git a/x/henry/mp-sandbox-agent/tools/helpers.ts b/x/henry/mp-sandbox-agent/tools/helpers.ts index 50ceecbe2be5..7c5db7acbdcf 100644 --- a/x/henry/mp-sandbox-agent/tools/helpers.ts +++ b/x/henry/mp-sandbox-agent/tools/helpers.ts @@ -1,16 +1,31 @@ -import type { Tool } from "./types"; +import type { Tool, ToolContext, ToolOutput } from "./types"; import { z } from "zod"; -export function defineTool, O extends z.ZodType>( +/** + * Helper function to define a new tool with type safety + * + * @param description Description of what the tool does + * @param input Zod schema for validating the input + * @param output Zod schema for validating the success result + * @param fn Implementation function + * @returns A Tool object that can be used by the agent + */ +export function defineTool< + TInputSchema extends z.ZodType, + TOutputSchema extends z.ZodType +>( description: string, - input: I, - output: O, - implementation: (args: z.infer) => Promise | null> -): Tool { + input: TInputSchema, + output: TOutputSchema, + fn: ( + args: z.infer, + context: ToolContext + ) => Promise>> +): Tool, z.infer> { return { description, input, output, - fn: implementation, + fn, }; } diff --git a/x/henry/mp-sandbox-agent/tools/scrape.ts b/x/henry/mp-sandbox-agent/tools/scrape.ts new file mode 100644 index 000000000000..1e8a194b1159 --- /dev/null +++ b/x/henry/mp-sandbox-agent/tools/scrape.ts @@ -0,0 +1,127 @@ +import { z } from "zod"; +import { defineTool } from "./helpers"; +import { encoding_for_model } from "tiktoken"; + +if (!process.env.FIRECRAWL_API_KEY) { + throw new Error( + "Please set the FIRECRAWL_API_KEY environment variable in your .env file" + ); +} + +const metadataSchema = z.object({ + title: z.string().optional(), + description: z.string().optional(), + language: z.string().optional(), + sourceURL: z.string(), + pageStatusCode: z.number().optional(), + pageError: z.string().optional(), +}); + +const scrapeResponseSchema = z.object({ + success: z.boolean(), + data: z.object({ + metadata: metadataSchema, + markdown: z.string(), + }), +}); + +export const scrapePages = defineTool( + "Scrapes multiple webpages and returns their content in markdown format.", + z.object({ + urls: z.array(z.string()).describe("The URLs to scrape"), + }), + z.array(scrapeResponseSchema), + async (input, { log }) => { + try { + const results = await Promise.all( + input.urls.map((url) => + fetch("https://api.firecrawl.dev/v1/scrape", { + method: "POST", + headers: { + Authorization: `Bearer ${process.env.FIRECRAWL_API_KEY}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + url, + formats: ["markdown"], + }), + }).then((r) => r.json()) + ) + ); + + // Use OpenAI's tokenizer (cl100k_base is used by GPT-4 and newer models) + const tokenizer = encoding_for_model("gpt-4"); + + // Function to count tokens using tiktoken + function countTokens(text: string): number { + try { + const tokens = tokenizer.encode(text); + return tokens.length; + } catch (error) { + console.error("Error counting tokens:", error); + // Fallback to a simple approximation if tiktoken fails + return Math.ceil(text.length / 4); + } + } + + // Function to truncate text to token limit using tiktoken + function truncateToTokenLimit(text: string, tokenLimit: number): string { + try { + const tokens = tokenizer.encode(text); + + if (tokens.length <= tokenLimit) { + return text; + } + + // For safety, use a character-based approach for truncation + // Calculate roughly how many characters to include to stay under token limit + const charLimit = Math.floor((tokenLimit / tokens.length) * text.length); + + // Truncate text directly (more reliable than using tiktoken's decode) + const truncatedText = text.substring(0, charLimit); + + return truncatedText + "\n... [content truncated, full content available in result]"; + } catch (error) { + console.error("Error truncating text:", error); + // Fallback to a simple approximation if tiktoken fails + return text.substring(0, tokenLimit * 4) + "\n... [content truncated, full content available in result]"; + } + } + + // Log a human-friendly summary + console.log( + "Scrape results:", + input.urls + .map((url, i) => + results[i].success + ? `${url}: Scraped successfully (${results[i].data.markdown.length} chars, ~${countTokens(results[i].data.markdown)} tokens)` + : `${url}: Failed to scrape` + ) + .join("\n") + ); + + // Log content for the agent with proper token limiting + const TOKEN_LIMIT = 2000; + log( + input.urls + .map((url, i) => + results[i].success + ? `Content from ${url}:\n${truncateToTokenLimit(results[i].data.markdown, TOKEN_LIMIT)}` + : `${url}: Failed to scrape` + ) + .join("\n\n---\n\n") + ); + + return { + type: "success", + result: results, + }; + } catch (error) { + return { + type: "error", + error: + error instanceof Error ? error.message : "Unknown error occurred", + }; + } + } +); diff --git a/x/henry/mp-sandbox-agent/tools/serp.ts b/x/henry/mp-sandbox-agent/tools/serp.ts new file mode 100644 index 000000000000..2c0b56ed027a --- /dev/null +++ b/x/henry/mp-sandbox-agent/tools/serp.ts @@ -0,0 +1,90 @@ +import { z } from "zod"; +import { defineTool } from "./helpers"; + +if (!process.env.SERPAPI_API_KEY) { + throw new Error( + "Please set the SERPAPI_API_KEY environment variable in your .env file" + ); +} + +const searchResultSchema = z.object({ + position: z.number().optional(), + title: z.string().optional(), + link: z.string().optional(), + snippet: z.string().optional(), + displayed_link: z.string().optional(), +}); + +const searchResponseSchema = z.object({ + organic_results: z.array(searchResultSchema).optional(), + search_metadata: z + .object({ + status: z.string(), + id: z.string(), + }) + .optional(), + error: z.string().optional(), +}); + +export const searchWeb = defineTool( + "Search the web using Google Search. Returns organic search results for the given query. " + + "The page parameter is optional and defaults to 1. It can be used to get results beyond the first page. " + + "Logs the output of the search in the execution logs (no need to do it manually).", + z.object({ + query: z.string().describe("The search query to execute"), + page: z + .number() + .default(1) + .describe("The page number of results to fetch (1-based)"), + }), + searchResponseSchema, + async (input, { log }) => { + try { + // Calculate start parameter for pagination (Google uses 0-based indexing with 10 results per page) + const start = (input.page - 1) * 10; + + const params = { + engine: "google", + q: input.query, + start: start.toString(), + num: "10", + api_key: process.env.SERPAPI_API_KEY!, + }; + + const response = await fetch( + `https://serpapi.com/search.json?${new URLSearchParams(params)}` + ); + const data = await response.json(); + + if (data.error) { + return { type: "error", error: data.error }; + } + + log( + `Retrieved ${data.organic_results?.length || 0} results for query "${ + input.query + }" (page ${input.page}):\n${JSON.stringify( + data.organic_results?.map((r: any) => ({ + title: r.title, + link: r.link, + snippet: r.snippet, + })) + )}` + ); + + return { + type: "success", + result: { + organic_results: data.organic_results || [], + search_metadata: data.search_metadata, + }, + }; + } catch (error) { + return { + type: "error", + error: + error instanceof Error ? error.message : "Unknown error occurred", + }; + } + } +); diff --git a/x/henry/mp-sandbox-agent/tools/types.ts b/x/henry/mp-sandbox-agent/tools/types.ts index 6134b4373322..bb906e470c47 100644 --- a/x/henry/mp-sandbox-agent/tools/types.ts +++ b/x/henry/mp-sandbox-agent/tools/types.ts @@ -1,8 +1,64 @@ import { z } from "zod"; -export type Tool = { - fn: Function; - input: z.ZodType; - output: z.ZodType; +/** + * Utility to provide logging and other functionality to tool executions + */ +export interface ToolContext { + /** Function to log information during tool execution */ + log: (message: string) => void; +} + +/** + * Represents a tool that can be used by the agent + */ +export interface Tool { + /** The function that implements the tool's behavior */ + fn: ( + input: TInput, + context: ToolContext + ) => Promise>; + /** Schema for validating and parsing the input */ + input: z.ZodType; + /** Schema for validating and parsing the output */ + output: z.ZodType; + /** Description of what the tool does */ description: string; -}; +} + +/** + * Type-erased Tool for internal use when specific type information is not needed + */ +export type AnyTool = Tool; + +/** + * Creates a Zod schema for validating tool outputs + * @param valueSchema Schema for the success result + * @returns A union schema that can validate either success or error results + */ +export const ToolOutput = (valueSchema: T) => + z.union([ + z.object({ type: z.literal("success"), result: valueSchema }), + z.object({ type: z.literal("error"), error: z.string() }), + ]); + +/** + * Represents the output of a tool execution + * Either a success with a result of type T, or an error with a string message + */ +export type ToolOutput = + | { type: "success"; result: T } + | { type: "error"; error: string }; + +export function isOk( + output: ToolOutput +): output is { type: "success"; result: T } { + return output.type === "success"; +} + +export function ok(result: T): ToolOutput { + return { type: "success", result }; +} + +export function err(error: string): ToolOutput { + return { type: "error", error }; +} diff --git a/x/henry/mp-sandbox-agent/utils/config.ts b/x/henry/mp-sandbox-agent/utils/config.ts new file mode 100644 index 000000000000..b98fe27bbe5c --- /dev/null +++ b/x/henry/mp-sandbox-agent/utils/config.ts @@ -0,0 +1,160 @@ +import { z } from "zod"; +import { ValidationError } from "./errors"; +import { logger } from "./logger"; + +/** + * Supported AI provider types + */ +export type Provider = "openai" | "anthropic"; + +/** + * Interface for AI model configuration + */ +export interface ModelConfig { + /** The AI provider (openai, anthropic) */ + provider: Provider; + /** The model name to use */ + model: string; + /** API key for the provider */ + apiKey: string; + /** Maximum tokens to generate (defaults based on provider) */ + maxTokens?: number; + /** Temperature setting for generation (defaults based on provider) */ + temperature?: number; +} + +/** + * Supported OpenAI models + */ +export const OPENAI_MODELS = [ + "gpt-4o", + "gpt-4-turbo", + "gpt-4", + "gpt-3.5-turbo", +] as const; + +/** + * Supported Anthropic models + */ +export const ANTHROPIC_MODELS = [ + // Latest models + "claude-3-7-sonnet-20250219", + "claude-3-5-sonnet-20241022", + "claude-3-5-haiku-20241022", + // Previous models + "claude-3-opus-20240229", + "claude-3-sonnet-20240229", + "claude-3-haiku-20240307", +] as const; + +/** + * Zod schema for validating OpenAI models + */ +const openAIModelSchema = z.enum(OPENAI_MODELS); + +/** + * Zod schema for validating Anthropic models + */ +const anthropicModelSchema = z.enum(ANTHROPIC_MODELS); + +/** + * Default configuration values for each provider + */ +export const DEFAULT_CONFIGS: Record> = { + openai: { + provider: "openai", + model: "gpt-4o", + temperature: 0.0, + maxTokens: 4096, + }, + anthropic: { + provider: "anthropic", + model: "claude-3-7-sonnet-20250219", // Updated to latest model + temperature: 0.0, + maxTokens: 4096, + }, +}; + +/** + * Loads model configuration from environment variables with sensible defaults + * @returns A validated ModelConfig object + */ +export function loadModelConfig(): ModelConfig { + // Determine provider from environment variable or default to OpenAI + const provider = (process.env.AI_PROVIDER?.toLowerCase() || "openai") as Provider; + + if (provider !== "openai" && provider !== "anthropic") { + throw new ValidationError(`Invalid AI provider: ${provider}. Must be one of: openai, anthropic`) + .addContext({ + supportedProviders: ["openai", "anthropic"], + providedValue: provider + }); + } + + // Get API key based on provider + const apiKey = provider === "openai" + ? process.env.OPENAI_API_KEY + : process.env.ANTHROPIC_API_KEY; + + if (!apiKey) { + throw new ValidationError(`Missing API key for ${provider}`) + .addContext({ + provider, + requiredEnvVar: provider === "openai" ? "OPENAI_API_KEY" : "ANTHROPIC_API_KEY" + }); + } + + // Get model from environment variable or use default + const modelFromEnv = process.env.AI_MODEL; + let model: string; + + // Validate model based on provider + if (modelFromEnv) { + if (provider === "openai") { + const result = openAIModelSchema.safeParse(modelFromEnv); + if (!result.success) { + logger.warn(`Invalid OpenAI model: ${modelFromEnv}. Using default: ${DEFAULT_CONFIGS.openai.model}`); + model = DEFAULT_CONFIGS.openai.model; + } else { + model = result.data; + } + } else { + const result = anthropicModelSchema.safeParse(modelFromEnv); + if (!result.success) { + logger.warn(`Invalid Anthropic model: ${modelFromEnv}. Using default: ${DEFAULT_CONFIGS.anthropic.model}`); + model = DEFAULT_CONFIGS.anthropic.model; + } else { + model = result.data; + } + } + } else { + // Use default model for the provider + model = DEFAULT_CONFIGS[provider].model; + } + + // Parse temperature if provided + const temperatureFromEnv = process.env.AI_TEMPERATURE + ? parseFloat(process.env.AI_TEMPERATURE) + : undefined; + + const temperature = temperatureFromEnv !== undefined + ? Math.max(0, Math.min(1, temperatureFromEnv)) // Clamp between 0 and 1 + : DEFAULT_CONFIGS[provider].temperature; + + // Parse max tokens if provided + const maxTokensFromEnv = process.env.AI_MAX_TOKENS + ? parseInt(process.env.AI_MAX_TOKENS, 10) + : undefined; + + const maxTokens = maxTokensFromEnv !== undefined && !isNaN(maxTokensFromEnv) + ? maxTokensFromEnv + : DEFAULT_CONFIGS[provider].maxTokens; + + return { + provider, + model, + apiKey, + temperature, + maxTokens, + }; +} \ No newline at end of file diff --git a/x/henry/mp-sandbox-agent/utils/errors.ts b/x/henry/mp-sandbox-agent/utils/errors.ts new file mode 100644 index 000000000000..323d84733607 --- /dev/null +++ b/x/henry/mp-sandbox-agent/utils/errors.ts @@ -0,0 +1,214 @@ +/** + * Custom error classes and error handling utilities for the MicroPython Sandbox Agent. + * Provides consistent error handling with proper context information. + */ + +/** + * Base error class for the application + * Contains context information to help with debugging + */ +export class AppError extends Error { + /** Error code for categorizing errors */ + public code: string; + + /** Additional context about the error */ + public context: Record; + + /** Original cause of the error, if it was wrapped */ + public cause?: Error; + + /** Whether the error has been handled */ + public handled: boolean = false; + + constructor(message: string, options?: ErrorOptions) { + super(message, options); + this.name = this.constructor.name; + this.code = 'APP_ERROR'; + this.context = {}; + + // Capture the stack trace + if (Error.captureStackTrace) { + Error.captureStackTrace(this, this.constructor); + } + + // Extract cause if provided + if (options?.cause instanceof Error) { + this.cause = options.cause; + } + } + + /** + * Add additional context to the error + */ + addContext(context: Record): this { + this.context = { ...this.context, ...context }; + return this; + } + + /** + * Mark the error as handled to prevent duplicate logging + */ + markHandled(): this { + this.handled = true; + return this; + } + + /** + * Get a structured representation of the error for logging + */ + toJSON(): Record { + return { + name: this.name, + message: this.message, + code: this.code, + context: this.context, + stack: this.stack, + cause: this.cause ? ( + this.cause instanceof AppError + ? this.cause.toJSON() + : { + name: this.cause.name, + message: this.cause.message, + stack: this.cause.stack + } + ) : undefined + }; + } +} + +/** + * Error class for validation errors + */ +export class ValidationError extends AppError { + constructor(message: string, options?: ErrorOptions) { + super(message, options); + this.code = 'VALIDATION_ERROR'; + } +} + +/** + * Error class for configuration errors + */ +export class ConfigurationError extends AppError { + constructor(message: string, options?: ErrorOptions) { + super(message, options); + this.code = 'CONFIG_ERROR'; + } +} + +/** + * Error class for external API errors + */ +export class APIError extends AppError { + public statusCode?: number; + + constructor(message: string, statusCode?: number, options?: ErrorOptions) { + super(message, options); + this.code = 'API_ERROR'; + this.statusCode = statusCode; + } + + toJSON(): Record { + return { + ...super.toJSON(), + statusCode: this.statusCode + }; + } +} + +/** + * Error class for sandbox execution errors + */ +export class SandboxError extends AppError { + public stdout: string; + public stderr: string; + + constructor(message: string, stdout: string = '', stderr: string = '', options?: ErrorOptions) { + super(message, options); + this.code = 'SANDBOX_ERROR'; + this.stdout = stdout; + this.stderr = stderr; + } + + toJSON(): Record { + return { + ...super.toJSON(), + stdout: this.stdout, + stderr: this.stderr + }; + } +} + +/** + * Error class for tool execution errors + */ +export class ToolError extends AppError { + public toolName: string; + + constructor(toolName: string, message: string, options?: ErrorOptions) { + super(`Error in tool '${toolName}': ${message}`, options); + this.code = 'TOOL_ERROR'; + this.toolName = toolName; + } + + toJSON(): Record { + return { + ...super.toJSON(), + toolName: this.toolName + }; + } +} + +/** + * Wraps an unknown error in an AppError for consistent handling + */ +export function wrapError(error: unknown, defaultMessage = 'An unexpected error occurred'): AppError { + if (error instanceof AppError) { + return error; + } + + if (error instanceof Error) { + return new AppError(error.message, { cause: error }); + } + + if (typeof error === 'string') { + return new AppError(error); + } + + return new AppError(defaultMessage).addContext({ originalError: error }); +} + +/** + * Creates a validation error with the provided field information + */ +export function createValidationError( + message: string, + fieldName: string, + value?: unknown +): ValidationError { + return new ValidationError(message).addContext({ + field: fieldName, + invalidValue: value + }); +} + +/** + * Creates a configuration error with the provided config information + */ +export function createConfigError( + message: string, + configKey: string, + expectedValue?: string +): ConfigurationError { + return new ConfigurationError(message).addContext({ + configKey, + expectedValue + }); +} + +/** + * Type guard to check if an error is an instance of AppError + */ +export function isAppError(error: unknown): error is AppError { + return error instanceof AppError; +} \ No newline at end of file diff --git a/x/henry/mp-sandbox-agent/utils/logger.ts b/x/henry/mp-sandbox-agent/utils/logger.ts new file mode 100644 index 000000000000..78ced5156b7c --- /dev/null +++ b/x/henry/mp-sandbox-agent/utils/logger.ts @@ -0,0 +1,259 @@ +/** + * A configurable logging system for the MicroPython Sandbox Agent. + * Supports different log levels and output formats. + */ +import { AppError, isAppError } from "./errors"; + +export enum LogLevel { + ERROR = 0, + WARN = 1, + INFO = 2, + DEBUG = 3, + TRACE = 4, +} + +export interface LoggerOptions { + /** Minimum level to log */ + level: LogLevel; + /** Whether to include timestamps in logs */ + timestamps?: boolean; + /** Whether to include log level in logs */ + showLevel?: boolean; + /** Custom output function (defaults to console) */ + outputFn: (message: string, level: LogLevel) => void; +} + +export class Logger { + private options: LoggerOptions; + + constructor(options: Partial = {}) { + this.options = { + level: options.level ?? LogLevel.INFO, + timestamps: options.timestamps ?? true, + showLevel: options.showLevel ?? true, + outputFn: options.outputFn ?? this.defaultOutputFn, + }; + } + + /** + * Default output function that logs to the console + */ + private defaultOutputFn(message: string, level: LogLevel): void { + switch (level) { + case LogLevel.ERROR: + console.error(message); + break; + case LogLevel.WARN: + console.warn(message); + break; + case LogLevel.INFO: + case LogLevel.DEBUG: + case LogLevel.TRACE: + default: + console.log(message); + break; + } + } + + /** + * Format a log message based on configuration + */ + private formatMessage(level: LogLevel, message: string): string { + const parts: string[] = []; + + if (this.options.timestamps) { + parts.push(`[${new Date().toISOString()}]`); + } + + if (this.options.showLevel) { + parts.push(`[${LogLevel[level]}]`); + } + + parts.push(message); + return parts.join(' '); + } + + /** + * Log a message if the level is enabled + * @param level The log level + * @param message The message to log + * @param args Values to substitute into the message + */ + private log(level: LogLevel, message: string, ...args: unknown[]): void { + if (level > this.options.level) return; + + let formattedMessage = this.formatMessage(level, message); + + // Handle additional args by replacing %s, %d, etc. + if (args.length > 0) { + formattedMessage = formattedMessage.replace(/%[sdjifoO%]/g, (match): string => { + if (match === '%%') return '%'; + + const value = args.shift(); + if (value === undefined) return ''; + + // Format based on specifier + switch (match) { + case '%j': + case '%o': + case '%O': + try { + return JSON.stringify(value, null, 2); + } catch (err) { + return String(value); + } + case '%d': + case '%i': + return Number(value).toString(); + case '%f': + return Number(value).toFixed(6); + case '%s': + default: + return String(value); + } + }); + } + + this.options.outputFn(formattedMessage, level); + } + + /** + * Log an error message + * @param message The message to log + * @param args Values to substitute into the message + */ + error(message: string, ...args: unknown[]): void { + this.log(LogLevel.ERROR, message, ...args); + } + + /** + * Log an error object with full context + * @param error The error object to log + * @param message Optional message to display before the error + */ + logError(error: unknown, message?: string): void { + if (this.options.level < LogLevel.ERROR) return; + + if (isAppError(error)) { + // Already an AppError with context + if (message) { + this.error(message); + } + + // Log the structured error + const errorObj = error.toJSON(); + this.options.outputFn( + this.formatMessage(LogLevel.ERROR, `[${errorObj.code}] ${errorObj.message}`), + LogLevel.ERROR + ); + + // Log context if present + if (errorObj.context && typeof errorObj.context === 'object' && Object.keys(errorObj.context).length > 0) { + this.options.outputFn( + this.formatMessage(LogLevel.ERROR, `Context: ${JSON.stringify(errorObj.context, null, 2)}`), + LogLevel.ERROR + ); + } + + // Log stack trace at debug level + if (errorObj.stack && this.options.level >= LogLevel.DEBUG) { + this.options.outputFn( + this.formatMessage(LogLevel.DEBUG, `Stack: ${errorObj.stack}`), + LogLevel.DEBUG + ); + } + + // Log cause if present + if (errorObj.cause) { + this.options.outputFn( + this.formatMessage(LogLevel.ERROR, `Caused by: ${JSON.stringify(errorObj.cause, null, 2)}`), + LogLevel.ERROR + ); + } + } else if (error instanceof Error) { + // Standard Error object + this.error(message || error.message); + if (this.options.level >= LogLevel.DEBUG && error.stack) { + this.debug(`Stack: ${error.stack}`); + } + } else { + // Unknown error type + this.error(message || 'Unknown error occurred'); + this.debug(`Error details: ${JSON.stringify(error, null, 2)}`); + } + } + + /** + * Log a warning message + * @param message The message to log + * @param args Values to substitute into the message + */ + warn(message: string, ...args: unknown[]): void { + this.log(LogLevel.WARN, message, ...args); + } + + /** + * Log an info message + * @param message The message to log + * @param args Values to substitute into the message + */ + info(message: string, ...args: unknown[]): void { + this.log(LogLevel.INFO, message, ...args); + } + + /** + * Log a debug message + * @param message The message to log + * @param args Values to substitute into the message + */ + debug(message: string, ...args: unknown[]): void { + this.log(LogLevel.DEBUG, message, ...args); + } + + /** + * Log a trace message (most verbose) + * @param message The message to log + * @param args Values to substitute into the message + */ + trace(message: string, ...args: unknown[]): void { + this.log(LogLevel.TRACE, message, ...args); + } + + /** + * Create a separator line for visual grouping in logs + */ + separator(): void { + this.info("--------------------------------"); + } + + /** + * Change the log level + */ + setLevel(level: LogLevel): void { + this.options.level = level; + } + + /** + * Get the current log level + */ + getLevel(): LogLevel { + return this.options.level; + } + + /** + * Enable or disable timestamps + */ + setTimestamps(enabled: boolean): void { + this.options.timestamps = enabled; + } + + /** + * Enable or disable showing log level + */ + setShowLevel(enabled: boolean): void { + this.options.showLevel = enabled; + } +} + +// Create a default logger instance for easy imports +export const logger = new Logger(); \ No newline at end of file