diff --git a/x/henry/mp-sandbox-agent/.env.example b/x/henry/mp-sandbox-agent/.env.example
new file mode 100644
index 000000000000..d010e018d551
--- /dev/null
+++ b/x/henry/mp-sandbox-agent/.env.example
@@ -0,0 +1,16 @@
+# Required API keys
+OPENAI_API_KEY=sk-xxx
+ANTHROPIC_API_KEY=sk-ant-xxx
+SERPAPI_API_KEY=xxx
+FIRECRAWL_API_KEY=xxx
+
+# LLM Configuration
+AI_PROVIDER=openai  # openai or anthropic
+AI_MODEL=gpt-4o    # For OpenAI: gpt-4o, gpt-4-turbo, gpt-4, gpt-3.5-turbo
+                   # For Anthropic: claude-3-7-sonnet-20250219, claude-3-5-sonnet-20241022, claude-3-5-haiku-20241022,
+                   #                claude-3-opus-20240229, claude-3-sonnet-20240229, claude-3-haiku-20240307
+AI_TEMPERATURE=0.0  # 0.0 to 1.0
+AI_MAX_TOKENS=4096  # Maximum tokens to generate
+
+# Logging
+LOG_LEVEL=INFO      # ERROR, WARN, INFO, DEBUG, TRACE
\ No newline at end of file
diff --git a/x/henry/mp-sandbox-agent/DOCUMENTATION.md b/x/henry/mp-sandbox-agent/DOCUMENTATION.md
new file mode 100644
index 000000000000..3b7633b25eeb
--- /dev/null
+++ b/x/henry/mp-sandbox-agent/DOCUMENTATION.md
@@ -0,0 +1,512 @@
+# MicroPython Sandbox Agent
+
+## Overview
+
+MicroPython Sandbox Agent is a secure, code-first AI agent framework that uses executable Python code instead of traditional JSON-based tool calls. The agent is inspired by the CodeAct approach (Wang et al., 2024) and runs generated Python code in a MicroPython WebAssembly sandbox, providing strong security guarantees while maintaining expressiveness and flexibility.
+
+## Key Features
+
+### 1. Code Generation Over Tool Calls
+- **Code-First Approach**: Instead of using JSON-formatted tool calls, the agent generates executable Python code.
+- **Enhanced Expressiveness**: Python code provides greater flexibility and expressiveness for handling complex tasks.
+- **Iterative Reasoning**: The agent can analyze outputs, plan next steps, and generate new code in an iterative process.
+
+### 2. Secure Execution Environment
+- **MicroPython WebAssembly Sandbox**: All generated Python code runs in a MicroPython environment compiled to WebAssembly.
+- **Strong Sandboxing**: Provides isolation and protection from potentially harmful code execution.
+- **Controlled Access**: Only explicitly exposed functions are available to the executed code.
+
+### 3. Type-Safe Tool Definitions
+- **Zod Schema Validation**: Input and output validation using Zod schemas ensures type safety.
+- **Clear Tool Documentation**: Automatically generates documentation for available tools.
+- **Error Handling**: Robust error handling for tool execution with clear error messages.
+
+### 4. Multi-Model Support
+- **Model Flexibility**: Support for both OpenAI (GPT-4o, GPT-4, etc.) and Anthropic (Claude) models.
+- **Configurable Parameters**: Customizable model settings like temperature and token limits.
+- **Easy Switching**: Simple environment variable configuration to change between providers.
+
+## Core Components
+
+### 1. Agent Class
+
+The `Agent` class is the primary interface for creating and managing AI agents:
+
+```typescript
+// Creating an agent
+const agent = await Agent.create("Research the weather in Paris");
+
+// Run agent steps with provided tools
+const answer = await agent.step({
+  fetch_weather: fetchWeather,
+  search_web: searchWeb,
+  // other tools...
+});
+```
+
+Key methods:
+- `create(goal: string)`: Creates a new agent with a specified goal
+- `step(tools: Record<string, Tool>)`: Runs one step of the agent with provided tools
+- `getSteps()`: Returns the agent's step history
+
+### 2. PythonSandbox Class
+
+The `PythonSandbox` class provides a secure execution environment for Python code:
+
+```typescript
+// Create a sandbox instance
+const sandbox = await PythonSandbox.create();
+
+// Expose a function to the sandbox
+sandbox.expose("fetch_data", myToolDefinition);
+
+// Run Python code in the sandbox
+const result = await sandbox.runCode("data = await fetch_data({'url': 'example.com'})");
+```
+
+Key methods:
+- `create()`: Creates a new sandbox instance
+- `expose(name: string, func: ExposedFunction)`: Exposes a function to the sandbox
+- `runCode(code: string)`: Executes Python code in the sandbox
+
+### 3. Tool Definition System
+
+Tools are defined using a type-safe API:
+
+```typescript
+const fetchWeather = defineTool(
+  "Fetch weather data for a location",
+  z.object({ location: z.string() }),
+  z.object({ temperature: z.number(), conditions: z.string() }),
+  async (input, { log }) => {
+    // Implementation...
+    return ok({ temperature: 22, conditions: "Sunny" });
+  }
+);
+```
+
+Each tool includes:
+- Description: Clear documentation of the tool's purpose
+- Input schema: Zod schema defining expected input parameters
+- Output schema: Zod schema defining the return value structure
+- Implementation function: Async function that performs the actual work
+
+
+## Built-in Tools
+
+### 1. Web Search
+The `search_web` tool provides web search capabilities:
+
+```python
+results = await search_web({"query": "latest news about AI"})
+```
+
+### 2. Web Scraping
+The `scrape_pages` tool extracts content from web pages:
+
+```python
+content = await scrape_pages({"urls": ["https://example.com"]})
+```
+
+### 3. Weather Information
+The `fetch_weather` tool retrieves weather data:
+
+```python
+weather = await fetch_weather({"location": "New York"})
+```
+
+## Utility Components
+
+### Logging System
+
+The `Logger` class in `utils/logger.ts` provides a configurable logging system:
+
+```typescript
+import { logger, LogLevel } from "./utils/logger";
+
+// Set log level (ERROR, WARN, INFO, DEBUG, TRACE)
+logger.setLevel(LogLevel.DEBUG);
+
+// Basic logging
+logger.info("This is an informational message");
+logger.error("An error occurred: %s", errorMessage);
+logger.debug("Debug data: %o", debugObject);
+
+// Configure logger options
+logger.setTimestamps(false); // Disable timestamps in log output
+logger.setShowLevel(false);  // Hide log level in output
+
+// Create a custom logger instance
+const customLogger = new Logger({
+  level: LogLevel.WARN,
+  timestamps: true,
+  showLevel: true,
+  outputFn: (message, level) => {
+    // Custom output function
+    myLoggingService.log(message, level);
+  }
+});
+```
+
+This logging system replaces direct `console.log` calls and provides:
+
+- Multiple severity levels (ERROR, WARN, INFO, DEBUG, TRACE)
+- Configurable formatting (timestamps, level indicators)
+- Support for string interpolation with %s, %d, %o, etc.
+- Customizable output destinations through outputFn
+- Runtime configuration
+
+## Usage Examples
+
+### Basic Usage
+```typescript
+import { Agent } from "./agent";
+import { fetchWeather, searchWeb, scrapePages } from "./tools";
+
+async function main() {
+  // Get the query from command line arguments
+  const request = process.argv[2];
+  if (!request) {
+    console.error("Please provide a request as a command line argument");
+    process.exit(1);
+  }
+  
+  // Create an agent with the query
+  const agent = await Agent.create(request);
+  
+  // Define available tools
+  const tools = {
+    fetch_weather: fetchWeather,
+    search_web: searchWeb,
+    scrape_pages: scrapePages,
+  };
+  
+  // Run the agent until it has an answer
+  let answer = null;
+  while (answer === null) {
+    answer = await agent.step(tools);
+  }
+  
+  // Display the final answer
+  console.log("\nFinal answer:");
+  console.log(answer);
+}
+
+main().catch(error => {
+  console.error("Error:", error);
+  process.exit(1);
+});
+```
+
+### Using Custom Tools
+```typescript
+import { Agent } from "./agent";
+import { defineTool } from "./tools/helpers";
+import { z } from "zod";
+import { ok } from "./tools/types";
+
+// Define a custom tool
+const calculator = defineTool(
+  "Performs basic arithmetic operations",
+  z.object({ 
+    operation: z.enum(["add", "subtract", "multiply", "divide"]), 
+    a: z.number(), 
+    b: z.number() 
+  }),
+  z.object({ result: z.number() }),
+  async (input, { log }) => {
+    const { operation, a, b } = input;
+    let result;
+    
+    switch (operation) {
+      case "add": result = a + b; break;
+      case "subtract": result = a - b; break;
+      case "multiply": result = a * b; break;
+      case "divide": 
+        if (b === 0) return { type: "error", error: "Division by zero" };
+        result = a / b; 
+        break;
+    }
+    
+    log(`Calculated ${operation}: ${a} ${operation} ${b} = ${result}`);
+    return ok({ result });
+  }
+);
+
+async function runCalculatorAgent() {
+  const agent = await Agent.create("Calculate the result of complex math expressions");
+  
+  const tools = { calculator };
+  
+  let answer = null;
+  while (answer === null) {
+    answer = await agent.step(tools);
+  }
+  
+  console.log("Result:", answer);
+}
+```
+
+## Future Vision
+
+Based on the TODO section in the README, the project aims to enhance the agent's capabilities:
+
+1. **Enhanced Agent Capabilities**:
+   - Support for "artifacts" (named documents that can be passed to the agent)
+   - Ability to control when the agent can stop execution
+   - Improved reasoning and token management
+
+2. **Research and Content Generation**:
+   - Web search and information extraction
+   - Content processing tools for large documents
+   - Advanced scraping capabilities
+
+3. **Technical Enhancements**:
+   - Persistent state storage (Redis/PostgreSQL/filesystem)
+   - Improved error handling and recovery
+
+This vision positions the package as a powerful tool for generating, executing, and reasoning with Python code to solve complex tasks while maintaining a secure execution environment.
+
+## Improvement Suggestions
+
+The following improvements would enhance the codebase's architecture, security, and maintainability:
+
+1. **Architectural Improvements**:
+   - Reduce coupling between the Agent and PythonSandbox classes
+   - Split the Agent class into smaller components with single responsibilities
+   - Define clear interfaces for key components to improve testability
+   - ✅ Make model selection configurable rather than hardcoded
+
+2. **Code Quality**:
+   - ✅ Replace `any` types with proper TypeScript definitions
+   - ✅ Implement consistent error handling with proper context information
+   - ✅ Replace direct console.log statements with a configurable logging system
+   - ✅ Add proper validation and defaults for environment variables
+
+3. **Security Enhancements**:
+   - Implement input validation for all external inputs (URLs, API parameters)
+   - Add resource limits to the sandbox (memory, execution time)
+   - Improve handling of API keys and sensitive information
+   - Implement proper security boundaries for the sandbox
+
+4. **Testing and Documentation**:
+   - Expand test coverage, especially for integration scenarios
+   - Add end-to-end tests for complete system behavior
+   - Improve JSDoc comments for all public APIs
+   - Add architectural documentation with component diagrams
+
+These improvements would significantly enhance the codebase's maintainability, security, and extensibility without changing its core concepts.
+
+## Implementation Progress
+
+### ✅ Configurable Logging System (Completed)
+
+A configurable logging system has been implemented in `utils/logger.ts` to replace direct console.log statements. This system provides:
+
+- Different log levels (ERROR, WARN, INFO, DEBUG, TRACE)
+- Environment variable configuration (LOG_LEVEL)
+- Formatted output with timestamps and level indicators
+- String interpolation for cleaner log messages
+- Customizable output functions
+
+Usage example:
+
+```typescript
+import { logger, LogLevel } from "./utils/logger";
+
+// Set log level
+logger.setLevel(LogLevel.DEBUG);
+
+// Log messages at different levels
+logger.error("Critical error: %s", errorMessage);
+logger.warn("Warning: The operation may be slow");
+logger.info("Processing file: %s", filename);
+logger.debug("Request payload: %o", payload);
+
+// Configure output format
+logger.setTimestamps(false); // Disable timestamps
+logger.setShowLevel(false);  // Hide log level
+```
+
+The Agent and main.ts files have been updated to use this logging system, providing better control over verbosity and output format.
+
+### ✅ Consistent Error Handling (Completed)
+
+A comprehensive error handling system has been implemented in `utils/errors.ts` to provide consistent error handling with proper context information. The system includes:
+
+- **Custom Error Classes**: A hierarchy of error classes for different types of errors, each with specific context fields.
+- **Context Information**: All errors can have context information attached to provide details for debugging.
+- **Error Wrapping**: Utility functions to wrap unknown errors in structured format.
+- **Integration with Logger**: Special error logging methods that display context information.
+
+Key error classes:
+- `AppError`: Base error class with context support
+- `ValidationError`: For input validation failures
+- `ConfigurationError`: For configuration and environment issues
+- `APIError`: For issues with external API calls
+- `SandboxError`: For Python code execution failures
+- `ToolError`: For errors in tool execution
+
+Usage example:
+
+```typescript
+import { ValidationError, APIError, wrapError } from "./utils/errors";
+import { logger } from "./utils/logger";
+
+// Create an error with context
+const validationError = new ValidationError("Invalid city name")
+  .addContext({
+    providedValue: city,
+    expectedFormat: "non-empty string"
+  });
+
+// Log the error with full context
+logger.logError(validationError);
+
+// Wrap an unknown error
+try {
+  await someOperation();
+} catch (error) {
+  const wrappedError = wrapError(error, "Operation failed");
+  wrappedError.addContext({
+    operation: "someOperation",
+    input: JSON.stringify(input)
+  });
+  logger.logError(wrappedError);
+}
+```
+
+This error handling system is now integrated throughout the codebase, including:
+- Sandbox code execution
+- Tool implementations (especially API calls)
+- Agent steps and API interactions
+- Configuration validation
+
+### ✅ Type-Safe Code with Proper TypeScript Definitions (Completed)
+
+The codebase has been updated to use proper TypeScript definitions, eliminating `any` types and providing better type safety. The improvements include:
+
+- **Generic Type Parameters**: Tools and functions now use generic type parameters for better type checking.
+- **Defined Interfaces**: Well-defined interfaces for key data structures and APIs.
+- **JSON Value Type**: A proper type for JSON values that can be passed between JavaScript and Python.
+- **Type Guards**: Added type guards to ensure type safety when dealing with unknown data.
+- **Type-Safe API Design**: Redesigned APIs to use proper TypeScript features.
+
+Key type definitions:
+
+```typescript
+// JSON value type for Python/JavaScript interop
+export type JsonValue = 
+  | string
+  | number
+  | boolean
+  | null
+  | JsonValue[]
+  | { [key: string]: JsonValue };
+
+// Type-safe tool definition
+export interface Tool<TInput = unknown, TOutput = unknown> {
+  fn: (input: TInput, context: ToolContext) => Promise<ToolOutput<TOutput>>;
+  input: z.ZodType<TInput>;
+  output: z.ZodType<TOutput>;
+  description: string;
+}
+
+// Type-safe sandbox function exposure
+export interface ExposedFunction<TInput = unknown, TOutput = unknown> {
+  fn: (input: TInput) => Promise<TOutput>;
+  input: z.ZodType<TInput>;
+  output: z.ZodType<TOutput>;
+  description: string;
+}
+```
+
+These changes improve:
+- Compile-time type checking
+- Code editor autocompletion and IntelliSense
+- Refactoring safety
+- Documentation through types
+- Developer experience
+
+### ✅ Configurable Model Selection (Completed)
+
+A flexible model configuration system has been implemented to support both OpenAI and Anthropic models:
+
+- **Model Configuration**: Centralized configuration in `utils/config.ts` for all model settings
+- **Environment Variables**: Support for environment variables to control model selection and parameters
+- **Provider Abstraction**: An LLM service layer in `services/llm.ts` that abstracts provider-specific implementation details
+- **Default Values**: Sensible defaults for all model parameters
+
+The system supports multiple configuration options:
+- `AI_PROVIDER`: Choose between 'openai' and 'anthropic'
+- `AI_MODEL`: Specify the exact model version to use
+- `AI_TEMPERATURE`: Control the temperature parameter (0.0 to 1.0)
+- `AI_MAX_TOKENS`: Set the maximum tokens to generate
+
+Latest models supported:
+- OpenAI: gpt-4o, gpt-4-turbo, gpt-4, gpt-3.5-turbo
+- Anthropic: claude-3-7-sonnet-20250219, claude-3-5-sonnet-20241022, claude-3-5-haiku-20241022, etc.
+
+Usage example:
+
+```bash
+# Use OpenAI GPT-4o
+AI_PROVIDER=openai AI_MODEL=gpt-4o bun run main.ts "What's the weather in Paris?"
+
+# Use Anthropic Claude 3.7 Sonnet
+AI_PROVIDER=anthropic AI_MODEL=claude-3-7-sonnet-20250219 bun run main.ts "What's the weather in Paris?"
+```
+
+### ✅ Improved Prompt Management (Completed)
+
+The prompt management system has been improved for better organization and maintainability:
+
+- **Centralized Prompts**: All prompts are now defined in `agent/prompts.ts` with clear structure
+- **Documented Prompts**: Each prompt has JSDoc comments explaining its purpose and usage
+- **Modular Structure**: Separate prompt constants for different stages of the agent lifecycle
+- **Functional Approach**: Function-based templates for prompts that need dynamic content
+
+Key prompts:
+- `systemPrompt`: Defines the agent's role and response format
+- `firstStepPrompt`: Instructions for the first step of execution
+- `continuePrompt`: Template for continuing after code execution
+- `toolDocsPrompt`: Template for tool documentation
+- `finalAnswerPrompt`: Prompt for generating the final answer
+
+This structure improves:
+- Code organization and readability
+- Prompt maintenance and versioning
+- Documentation of prompt purposes and structures
+
+## Installation
+
+1. Clone the repository
+2. Install dependencies:
+```bash
+bun install
+```
+3. Create a `.env` file with your API keys and configuration:
+```
+# Required API keys
+OPENAI_API_KEY=your_openai_api_key_here
+ANTHROPIC_API_KEY=your_anthropic_api_key_here
+SERPAPI_API_KEY=your_serpapi_key_here
+FIRECRAWL_API_KEY=your_firecrawl_key_here
+
+# LLM Configuration
+AI_PROVIDER=openai  # openai or anthropic
+AI_MODEL=gpt-4o    # For OpenAI: gpt-4o, gpt-4-turbo, gpt-4, gpt-3.5-turbo
+                   # For Anthropic: claude-3-7-sonnet-20250219, claude-3-5-sonnet-20241022, etc.
+AI_TEMPERATURE=0.0  # 0.0 to 1.0
+AI_MAX_TOKENS=4096  # Maximum tokens to generate
+
+# Logging
+LOG_LEVEL=INFO      # ERROR, WARN, INFO, DEBUG, TRACE
+```
+
+## Development
+
+To run tests:
+```bash
+bun test
+```
\ No newline at end of file
diff --git a/x/henry/mp-sandbox-agent/README.md b/x/henry/mp-sandbox-agent/README.md
index 9c80d75b80a8..f2e610164b4f 100644
--- a/x/henry/mp-sandbox-agent/README.md
+++ b/x/henry/mp-sandbox-agent/README.md
@@ -17,10 +17,68 @@ An AI agent that generates and executes Python code, inspired by [CodeAct (Wang
 bun install
 ```
 
-3. Create a `.env` file in the root directory with your OpenAI API key:
+3. Create a `.env` file in the root directory with your API keys and configuration:
 
 ```
-OPENAI_API_KEY=your_api_key_here
+# Required API keys
+OPENAI_API_KEY=your_openai_api_key_here
+ANTHROPIC_API_KEY=your_anthropic_api_key_here
+SERPAPI_API_KEY=your_serpapi_key_here
+FIRECRAWL_API_KEY=your_firecrawl_key_here
+
+# LLM Configuration
+AI_PROVIDER=openai  # openai or anthropic
+AI_MODEL=gpt-4o    # See .env.example for available models
+AI_TEMPERATURE=0.0  # 0.0 to 1.0
+AI_MAX_TOKENS=4096  # Maximum tokens to generate
+
+# Logging
+LOG_LEVEL=INFO      # ERROR, WARN, INFO, DEBUG, TRACE
+```
+
+See `.env.example` for a full list of configuration options.
+
+## Usage
+
+Run the agent with a query:
+
+```bash
+bun start "What's the weather in Paris?"
+```
+
+You can set the log level using the LOG_LEVEL environment variable:
+
+```bash
+LOG_LEVEL=DEBUG bun start "What's the weather in Paris?"
+```
+
+Available log levels: ERROR, WARN, INFO, DEBUG, TRACE
+
+Or programmatically:
+
+```typescript
+import { Agent } from "./agent";
+import { fetchWeather, searchWeb } from "./tools";
+import { logger, LogLevel } from "./utils/logger";
+
+async function main() {
+  // Configure the logger
+  logger.setLevel(LogLevel.INFO);
+  
+  const agent = await Agent.create("What's the weather in Paris?");
+  
+  const tools = {
+    fetch_weather: fetchWeather,
+    search_web: searchWeb,
+  };
+  
+  let answer = null;
+  while (answer === null) {
+    answer = await agent.step(tools);
+  }
+  
+  logger.info("Final answer: %s", answer);
+}
 ```
 
 ## Development
@@ -30,3 +88,42 @@ To run tests:
 ```bash
 bun test
 ```
+
+For more detailed documentation, see [DOCUMENTATION.md](./DOCUMENTATION.md)
+
+## TODO
+
+Future enhancements to consider:
+
+- [agent] Final summary config: disable or configure prompt
+- [agent] "Artifacts" support (list of named docs/strings that can be passed to agent.step() that are shown to the agent)
+- [agent] Ability to pass "canStopExecution" to agent.step() - if true, the agent will have access to the stop execution tool during that step
+- [helpers] Add a tokenizer for better token management
+- Create an "Extract from page" tool that scrapes a page, processes it by 32k token chunks, and extracts relevant information
+- Implement a more robust web search and content processing system
+- Add ability to persist agent state to redis/postgres/filesystem for better recovery and continuation of long-running tasks
+
+## Improvement Status
+
+This project is currently undergoing improvements based on code review feedback:
+
+✅ **Completed**:
+- Configurable logging system (replacing direct console.log statements)
+- Consistent error handling with proper context information
+- Replace `any` types with proper TypeScript definitions
+- Configurable model selection with support for OpenAI and Anthropic
+- Improved environment variable validation
+- Better prompt organization and management
+
+🔄 **In Progress**:
+- None currently
+
+⏳ **Pending**:
+- Reduce coupling between Agent and PythonSandbox classes
+- Implement input validation for external inputs
+- Add resource limits to the sandbox
+- Improve handling of API keys
+- Expand test coverage
+- Improve JSDoc comments
+
+For more details, see the [Implementation Progress](./DOCUMENTATION.md#implementation-progress) section in the documentation.
diff --git a/x/henry/mp-sandbox-agent/agent.ts b/x/henry/mp-sandbox-agent/agent.ts
deleted file mode 100644
index 77aa04245a11..000000000000
--- a/x/henry/mp-sandbox-agent/agent.ts
+++ /dev/null
@@ -1,116 +0,0 @@
-import OpenAI from "openai";
-import { PythonSandbox } from "./sandbox";
-import type { Tool } from "./tools/types";
-import { z } from "zod";
-
-function generateFunctionDocs(functions: Record<string, Tool>): string {
-  let docs = "Available functions:\n";
-
-  for (const [fnName, { description, input, output }] of Object.entries(
-    functions
-  )) {
-    // Function signature with description
-    const inputObject = input as z.ZodObject<any>;
-    const outputObject = output as z.ZodObject<any>;
-
-    docs += `- ${fnName}(${Object.keys(inputObject.shape).join(
-      ", "
-    )}): async function that ${description}\n`;
-
-    // Input parameters
-    docs += "  Parameters:\n";
-    for (const [paramName, paramSchema] of Object.entries(inputObject.shape)) {
-      const zodSchema = paramSchema as z.ZodType;
-      docs += `  * ${paramName} (${zodSchema.description || "any"})\n`;
-    }
-
-    // Output fields
-    docs += "  Returns an object with fields:\n";
-    for (const [fieldName, fieldSchema] of Object.entries(outputObject.shape)) {
-      const zodSchema = fieldSchema as z.ZodType;
-      docs += `  * ${fieldName} (${zodSchema.description || "any"})\n`;
-    }
-  }
-
-  return docs;
-}
-
-export class Agent {
-  private sandbox!: PythonSandbox;
-  private openai: OpenAI;
-  private exposedTools: Set<string> = new Set();
-  private goal: string;
-
-  private constructor(goal: string, apiKey: string) {
-    this.goal = goal;
-    this.openai = new OpenAI({ apiKey });
-  }
-
-  static async create(goal: string, apiKey: string): Promise<Agent> {
-    const agent = new Agent(goal, apiKey);
-    agent.sandbox = await PythonSandbox.create();
-    return agent;
-  }
-
-  private generateSystemPrompt(tools: Record<string, Tool>): string {
-    return (
-      "You are a Python code generator working towards the following goal:\n" +
-      this.goal +
-      "\n\n" +
-      "Your response should follow this format:\n\n" +
-      "1. (Optional) A brief explanation of what the code will do and why, in plain text\n" +
-      "2. A Python code block that:\n" +
-      "   - Contains no imports\n" +
-      "   - Contains only top-level statements (no function definitions)\n" +
-      "   - Can use await expressions directly (top-level await is supported)\n" +
-      "   - Contains no comments\n" +
-      "   - Is simple and self-contained\n\n" +
-      generateFunctionDocs(tools) +
-      "\n" +
-      "Example response format:\n" +
-      "This code will fetch and display the current weather in London.\n\n" +
-      "```python\n" +
-      "weather = await fetch_weather('London')\n" +
-      'print(f\'Weather in {weather["city"]}: {weather["temperature"]}°C\')\n' +
-      "```"
-    );
-  }
-
-  async step(
-    tools: Record<string, Tool>,
-    input: string
-  ): Promise<{ stdout: string; stderr: string }> {
-    // Expose or update tools
-    for (const [name, tool] of Object.entries(tools)) {
-      this.sandbox.expose(name, tool);
-      this.exposedTools.add(name);
-    }
-
-    const response = await this.openai.chat.completions.create({
-      model: "o3-mini",
-      messages: [
-        {
-          role: "system",
-          content: this.generateSystemPrompt(tools),
-        },
-        {
-          role: "user",
-          content: input,
-        },
-      ],
-    });
-
-    if (!response.choices[0].message.content) {
-      throw new Error("No code generated from OpenAI");
-    }
-
-    // Extract code from the response
-    const content = response.choices[0].message.content;
-    const codeMatch = content.match(/```python\n([\s\S]*?)```/) ||
-      content.match(/```\n([\s\S]*?)```/) || [null, content];
-    const code = codeMatch[1].trim();
-
-    // Execute the code
-    return await this.sandbox.runCode(code);
-  }
-}
diff --git a/x/henry/mp-sandbox-agent/agent/helpers.test.ts b/x/henry/mp-sandbox-agent/agent/helpers.test.ts
new file mode 100644
index 000000000000..5934129bc26c
--- /dev/null
+++ b/x/henry/mp-sandbox-agent/agent/helpers.test.ts
@@ -0,0 +1,115 @@
+import { describe, expect, it } from "bun:test";
+import { z } from "zod";
+import { generateToolDocs } from "./helpers";
+import { defineTool } from "../tools/helpers";
+import { ToolOutput } from "../tools/types";
+import type { AnyTool, Tool } from "../tools/types";
+
+// Helper function to convert any Tool to AnyTool for testing
+function asAnyTool<T extends Tool<any, any>>(tool: T): AnyTool {
+  return tool as unknown as AnyTool;
+}
+
+describe("generateToolDocs", () => {
+  it("should generate docs for a simple tool", () => {
+    const simpleTool = defineTool(
+      "A simple test function",
+      z.object({
+        name: z.string().describe("The name parameter"),
+      }),
+      z.string().describe("The return value"),
+      async () => ({ type: "success", result: "test" })
+    );
+
+    const docs = generateToolDocs({ simpleTool: asAnyTool(simpleTool) });
+    expect(docs).toContain(
+      "All functions listed may return None if they fail (check for None before accessing the result)"
+    );
+    expect(docs).toContain("simpleTool(name): A simple test function");
+    expect(docs).toContain("* name: The name parameter");
+    expect(docs).toContain("Returns:");
+    expect(docs).toContain("The return value");
+  });
+
+  it("should generate docs for a tool with complex types", () => {
+    const complexTool = defineTool(
+      "A complex test function",
+      z.object({
+        user: z
+          .object({
+            name: z.string().describe("User's name"),
+            age: z.number().describe("User's age"),
+          })
+          .describe("User object"),
+        options: z.array(z.string()).describe("List of options"),
+      }),
+      z.object({
+        id: z.number().describe("User ID"),
+        settings: z
+          .array(
+            z.object({
+              key: z.string().describe("Setting key"),
+              value: z.string().describe("Setting value"),
+            })
+          )
+          .describe("User settings"),
+      }),
+      async () => ({ type: "success", result: { id: 1, settings: [] } })
+    );
+
+    const docs = generateToolDocs({ complexTool: asAnyTool(complexTool) });
+    expect(docs).toContain(
+      "complexTool(user, options): A complex test function"
+    );
+    expect(docs).toContain("* user: dictionary with keys:");
+    expect(docs).toContain("  * name: User's name");
+    expect(docs).toContain("  * age: User's age");
+    expect(docs).toContain("* options: list of string");
+    expect(docs).toContain("Returns:");
+    expect(docs).toContain("dictionary with keys:");
+    expect(docs).toContain("* id: User ID");
+    expect(docs).toContain("* settings: list of dictionary with keys:");
+    expect(docs).toContain("  * key: Setting key");
+    expect(docs).toContain("  * value: Setting value");
+  });
+
+  it("should handle multiple tools", () => {
+    const tool1 = defineTool(
+      "First tool",
+      z.object({ a: z.string() }),
+      z.number(),
+      async () => ({ type: "success", result: 1 })
+    );
+
+    const tool2 = defineTool(
+      "Second tool",
+      z.object({ b: z.boolean() }),
+      z.string(),
+      async () => ({ type: "success", result: "test" })
+    );
+
+    const docs = generateToolDocs({ 
+      tool1: asAnyTool(tool1), 
+      tool2: asAnyTool(tool2) 
+    });
+    expect(docs).toContain("tool1(a): First tool");
+    expect(docs).toContain("tool2(b): Second tool");
+  });
+
+  it("should handle tools with nested output types", () => {
+    const outputTool = defineTool(
+      "Output test function",
+      z.object({ input: z.string() }),
+      z.string(),
+      async () => ({ type: "success", result: "test" })
+    );
+
+    const docs = generateToolDocs({ outputTool: asAnyTool(outputTool) });
+    // Should only show the success case type
+    expect(docs).toContain("Returns:");
+    expect(docs).toContain("string");
+    // Should not show the discriminated union structure
+    expect(docs).not.toContain("type:");
+    expect(docs).not.toContain("result:");
+  });
+});
diff --git a/x/henry/mp-sandbox-agent/agent/helpers.ts b/x/henry/mp-sandbox-agent/agent/helpers.ts
new file mode 100644
index 000000000000..8060993c4152
--- /dev/null
+++ b/x/henry/mp-sandbox-agent/agent/helpers.ts
@@ -0,0 +1,83 @@
+import type { AnyTool } from "../tools/types";
+import { z } from "zod";
+
+/**
+ * Converts a Zod schema to a readable description string
+ * @param schema The Zod schema to describe
+ * @param indent Indentation level for nested schemas
+ * @returns A human-readable description of the schema
+ */
+function describeZodType(schema: z.ZodTypeAny, indent = ""): string {
+  if (schema instanceof z.ZodArray) {
+    return `list of ${describeZodType(schema.element, indent + "  ")}`;
+  } else if (schema instanceof z.ZodObject) {
+    let desc = "dictionary with keys:\n";
+    for (const [fieldName, fieldSchema] of Object.entries(schema.shape)) {
+      desc += `${indent}  * ${fieldName}: ${describeZodType(
+        fieldSchema as z.ZodType,
+        indent + "  "
+      )
+        .split("\n")
+        .join("\n" + indent)}\n`;
+    }
+    return desc;
+  } else if (schema instanceof z.ZodUnion && schema.options.length === 2) {
+    // Check if this is a ToolOutput schema
+    const successCase = schema.options.find(
+      (opt: z.ZodTypeAny) =>
+        opt instanceof z.ZodObject && 
+        'type' in opt.shape && 
+        opt.shape.type instanceof z.ZodLiteral && 
+        opt.shape.type.value === "success"
+    ) as z.ZodObject<{ type: z.ZodLiteral<"success">; result: z.ZodTypeAny }> | undefined;
+
+    if (successCase?.shape.result) {
+      return describeZodType(successCase.shape.result, indent);
+    }
+    // If we can't handle this union type, just describe it as a union
+    return `union of ${schema.options
+      .map((opt: z.ZodType) => describeZodType(opt, indent + "  "))
+      .join(" | ")}`;
+  } else {
+    return (
+      schema.description ||
+      schema.constructor.name.replace("Zod", "").toLowerCase() ||
+      "any"
+    );
+  }
+}
+
+/**
+ * Generates documentation for tools that can be used in Python code
+ * @param tools Dictionary of tools to document
+ * @returns A string containing documentation for all tools
+ */
+export function generateToolDocs(tools: Record<string, AnyTool>): string {
+  let docs =
+    "Note: \n" +
+    "- All functions listed may return None if they fail (check for None before accessing the result)\n" +
+    "- All functions listed here are asynchronous and must be always be awaited, even if they don't return anything or you don't care about the result.\n";
+  
+  for (const [fnName, { description, input, output }] of Object.entries(tools)) {
+    // Check that input is an object schema
+    if (!(input instanceof z.ZodObject)) {
+      continue;
+    }
+    
+    // Function signature with description
+    const paramNames = Object.keys(input.shape);
+    docs += `- ${fnName}(${paramNames.join(", ")}): ${description}\n`;
+
+    // Input parameters
+    docs += "  Parameters:\n";
+    for (const [paramName, paramSchema] of Object.entries(input.shape)) {
+      docs += `  * ${paramName}: ${describeZodType(paramSchema as z.ZodTypeAny, "  ")}\n`;
+    }
+
+    // Output fields
+    docs += "  Returns:\n";
+    docs += describeZodType(output, "  ");
+  }
+
+  return docs;
+}
diff --git a/x/henry/mp-sandbox-agent/agent/index.ts b/x/henry/mp-sandbox-agent/agent/index.ts
new file mode 100644
index 000000000000..9a5c2ad615d5
--- /dev/null
+++ b/x/henry/mp-sandbox-agent/agent/index.ts
@@ -0,0 +1,302 @@
+import { PythonSandbox } from "../sandbox";
+import type { Tool, AnyTool } from "../tools/types";
+import { generateToolDocs } from "./helpers";
+import { z } from "zod";
+import { defineTool } from "../tools/helpers";
+import { systemPrompt, firstStepPrompt, continuePrompt, toolDocsPrompt, finalAnswerPrompt } from "./prompts";
+import { logger } from "../utils/logger";
+import { 
+  ValidationError,
+  APIError,
+  SandboxError,
+  wrapError
+} from "../utils/errors";
+import { LLMService } from "../services/llm";
+import type { Message } from "../services/llm";
+import { loadModelConfig } from "../utils/config";
+import type { ModelConfig } from "../utils/config";
+
+/**
+ * Represents a single step in the agent's execution
+ */
+interface StepResult {
+  /** The text generated by the agent */
+  generation: string;
+  /** The output from executing the generated code */
+  codeOutput: string;
+}
+
+/**
+ * An AI agent that generates and executes Python code to solve tasks
+ */
+export class Agent {
+  /** The sandbox for executing Python code */
+  private sandbox!: PythonSandbox;
+  /** The LLM service for generating code */
+  private llmService: LLMService;
+  /** The model configuration */
+  private modelConfig: ModelConfig;
+  /** Set of tool names that have been exposed to the sandbox */
+  private exposedTools: Set<string> = new Set();
+  /** The goal the agent is trying to achieve */
+  private goal: string;
+  /** History of all steps taken by the agent */
+  private steps: StepResult[] = [];
+  /** Whether the agent should continue or return a final answer */
+  private shouldContinue = true;
+
+  private constructor(goal: string, modelConfig: ModelConfig) {
+    this.goal = goal;
+    this.modelConfig = modelConfig;
+    this.llmService = new LLMService(modelConfig);
+  }
+
+  /**
+   * Creates a special tool that allows the agent to stop execution and provide a final answer
+   * @returns A Tool that signals the end of execution
+   */
+  private getFinalExecutionTool(): Tool<Record<string, never>, null> {
+    return defineTool(
+      "Must be used when the execution logs contain enough information to provide a final answer to the user." +
+        "After using this function, the user will ask you to write a final answer based on your execution logs. " +
+        "This function must be awaited like any other function.",
+      z.object({}),
+      z.null(),
+      async () => {
+        this.shouldContinue = false;
+        return { type: "success", result: null };
+      }
+    );
+  }
+
+  static async create(goal: string): Promise<Agent> {
+    logger.separator();
+    logger.info(`Creating agent with goal: ${goal}`);
+    logger.separator();
+    
+    // Load model configuration from environment variables
+    const modelConfig = loadModelConfig();
+    logger.info(`Using ${modelConfig.provider} with model: ${modelConfig.model}`);
+    
+    const agent = new Agent(goal, modelConfig);
+    agent.sandbox = await PythonSandbox.create();
+    return agent;
+  }
+
+  /**
+   * Executes one step of the agent, generating and running Python code
+   * @param _tools Dictionary of tools to make available to the agent
+   * @returns The final answer if the agent decides to stop, or null if it needs to continue
+   */
+  async step(_tools: Record<string, AnyTool>): Promise<string | null> {
+    const tools = { ..._tools };
+    if (Object.keys(tools).some((name) => name === "stop_execution")) {
+      throw new ValidationError("Reserved tool name cannot be used")
+        .addContext({
+          reservedToolName: "stop_execution",
+          providedTools: Object.keys(tools)
+        });
+    }
+    
+    // Add stop_execution tool, converting to AnyTool for compatibility
+    const stopTool = this.getFinalExecutionTool();
+    tools["stop_execution"] = stopTool as unknown as AnyTool;
+
+    // Expose or update tools
+    const errors: Array<{ tool: string; error: string }> = [];
+    const logs: Array<string> = [];
+    
+    for (const [name, tool] of Object.entries(tools)) {
+      this.sandbox.expose<unknown, unknown>(name, {
+        ...tool,
+        fn: async (input: unknown) => {
+          // Call the tool with the input and a logging context
+          const result = await tool.fn(input, {
+            log: (message: string) => {
+              logs.push(message + "\n");
+            },
+          });
+          
+          // If successful, return the result
+          if (result.type === "success") {
+            return result.result;
+          }
+          
+          // If there was an error, record it and return null
+          errors.push({ tool: name, error: result.error });
+          return null;
+        },
+      });
+      
+      // Keep track of which tools have been exposed
+      this.exposedTools.add(name);
+    }
+
+    // Initialize messages with system and user prompts
+    const messages: Message[] = [
+      {
+        role: "system",
+        content: systemPrompt,
+      },
+      {
+        role: "user",
+        content: this.goal,
+      },
+    ];
+
+    // Add messages for each previous step
+    for (const step of this.steps) {
+      // Add the assistant's response from the previous step
+      messages.push({
+        role: "assistant",
+        content: step.generation,
+      });
+      
+      // Add the user's response with the code output
+      messages.push({
+        role: "user",
+        content: continuePrompt(step.codeOutput),
+      });
+    }
+
+    // For the first step, add instructions to begin with analysis and code block
+    if (!this.steps.length) {
+      messages[messages.length - 1].content += firstStepPrompt;
+    }
+
+    // Add tool documentation to the last message
+    messages[messages.length - 1].content += toolDocsPrompt(generateToolDocs(tools));
+
+    logger.separator();
+    logger.debug("Messages:");
+    logger.debug(JSON.stringify(messages, null, 2));
+    logger.separator();
+
+    let llmResponse;
+    try {
+      llmResponse = await this.llmService.generateCompletion(messages);
+    } catch (error) {
+      // The LLMService already wraps the error, so we just rethrow it
+      throw error;
+    }
+
+    // Extract code from the response
+    const content = llmResponse.content;
+    logger.separator();
+    logger.info("Code generation response:");
+    logger.info(content);
+    logger.separator();
+
+    const codeMatch = content.match(/```python\n([\s\S]*?)```/) ||
+      content.match(/```\n([\s\S]*?)```/) || [null, content];
+    const code = codeMatch[1].trim();
+
+    // Execute the code
+    // Execute the code with improved error handling
+    const codeOutput = await (async () => {
+      try {
+        // Run the code in sandbox
+        const codeOutput = await this.sandbox.runCode(code);
+        
+        // Format the outputs
+        let output = "";
+        if (codeOutput.stdout) {
+          output += `STDOUT:\n${codeOutput.stdout}\n\n`;
+        }
+        if (logs.length > 0) {
+          output += `EXECUTION LOGS:\n${logs.join("\n")}\n\n`;
+        }
+        if (codeOutput.stderr) {
+          output += `STDERR:\n${codeOutput.stderr}\n\n`;
+        }
+        if (errors.length > 0) {
+          output += `ERRORS:\n${errors
+            .map((e) => `* ${e.tool}: ${e.error}`)
+            .join("\n")}\n\n`;
+        }
+
+        if (!output) {
+          return "No output returned from the code.";
+        }
+
+        return output;
+      } catch (error) {
+        // Log detailed error for debugging
+        if (error instanceof SandboxError) {
+          logger.debug("Sandbox execution failed with error:");
+          logger.debug(`Message: ${error.message}`);
+          logger.debug(`Context: ${JSON.stringify(error.context, null, 2)}`);
+          logger.debug(`Stdout: ${error.stdout}`);
+          logger.debug(`Stderr: ${error.stderr}`);
+          
+          // Return formatted error for the model
+          return `STDERR:\n${error.stderr || error.message}`;
+        } else {
+          // For other errors, wrap them for consistent handling
+          const wrappedError = wrapError(error, "Code execution failed");
+          logger.debug(`Code execution error: ${wrappedError.message}`);
+          
+          // Return a user-friendly error message for the model
+          return `STDERR:\n${wrappedError.message}`;
+        }
+      }
+    })();
+
+    logger.separator();
+    logger.info("Code output:");
+    logger.info(codeOutput);
+    logger.separator();
+
+    messages.push({
+      role: "assistant",
+      content: content,
+    });
+
+    if (!this.shouldContinue) {
+      // Add the final answer prompt
+      messages.push({
+        role: "user",
+        content: finalAnswerPrompt,
+      });
+      
+      try {
+        const finalResponse = await this.llmService.generateCompletion(messages);
+        return finalResponse.content;
+      } catch (error) {
+        // Handle API errors in final response generation
+        if (error instanceof APIError) {
+          logger.logError(error.addContext({
+            isFinalAnswer: true
+          }));
+        } else {
+          logger.logError(wrapError(error, "Failed to generate final response"));
+        }
+        
+        // Return a fallback response since this is the final step
+        return "I was unable to generate a final response due to an API error. Please check the execution logs for the information gathered so far.";
+      }
+    }
+
+    const stepResult: StepResult = {
+      generation: content,
+      codeOutput: codeOutput,
+    };
+
+    logger.separator();
+    logger.debug("Step result:");
+    logger.debug(JSON.stringify(stepResult, null, 2));
+    logger.separator();
+
+    this.steps.push(stepResult);
+
+    return null;
+  }
+
+  /**
+   * Gets the history of steps taken by the agent
+   * @returns Array of step results containing generation and execution output
+   */
+  getSteps(): readonly StepResult[] {
+    return [...this.steps];
+  }
+}
diff --git a/x/henry/mp-sandbox-agent/agent/prompts.ts b/x/henry/mp-sandbox-agent/agent/prompts.ts
new file mode 100644
index 000000000000..3305b26b2592
--- /dev/null
+++ b/x/henry/mp-sandbox-agent/agent/prompts.ts
@@ -0,0 +1,63 @@
+/**
+ * This file contains all prompts used by the agent
+ * Each prompt is a separate exported constant with descriptive name
+ */
+
+/**
+ * System prompt that establishes the agent's role, response format,
+ * and explains the Python environment and conversation structure
+ */
+export const systemPrompt = `
+<role>
+You are a research AI agent that uses Python code to work towards a goal that has been provided by the user.
+You use as many steps as you need to achieve the goal. You run a new piece of code at each step.
+</role>
+
+<format>
+Your response must always follow this format:
+1. An analysis of the situation, in plain text. Explain what you need to do next to achieve the goal.
+2. A Python code block that:
+   - Contains no imports
+   - Contains only top-level statements (no function definitions)
+   - Can use await expressions directly (top-level await is supported)
+   - Contains no comments
+   - Is simple and self-contained
+</format>
+
+<python_environment>
+Your Python code is executed in a MicroPython sandbox which has access to a set of functions in addition to the builtins.
+You do not need to import them, and you must never use the \`import\` keyword, which isn't available in the environment.
+This environment supports top-level awaits. Your code does not need to be wrapped in an async function.
+You don't need to import or initialize asyncio, it's already available and your code will be executed.
+</python_environment>
+
+<conversation_structure>
+After each step, the user will provide you your Execution Logs: the standard output and error logs generated by your code.
+You will then be asked to continue working towards the goal by providing a new analysis of the situation, and a new Python code block.
+Once you believe you have enough information to provide a final answer to the user, you must output a code block that uses the \`stop_execution\` function.
+</conversation_structure>
+`;
+
+/**
+ * Prompt for the first step, instructing the agent to begin with analysis and code
+ */
+export const firstStepPrompt = "\nPlease begin by an analysis and a python code block to achieve the goal.\n";
+
+/**
+ * Prompt template for continuing after seeing code output
+ * Use codeOutput as a placeholder for the actual output
+ */
+export const continuePrompt = (codeOutput: string) => 
+  `Here is the output of the code you generated:\n\n${codeOutput}\n\nPlease continue generating code.`;
+
+/**
+ * Prompt for tool documentation
+ * Use toolDocs as a placeholder for the actual tool documentation
+ */
+export const toolDocsPrompt = (toolDocs: string) => 
+  `\n\nYou currently have access to the following function:\n${toolDocs}`;
+
+/**
+ * Prompt for requesting a final answer
+ */
+export const finalAnswerPrompt = "Please provide a comprehensive final answer to the goal based on the execution logs you have.";
diff --git a/x/henry/mp-sandbox-agent/bun.lock b/x/henry/mp-sandbox-agent/bun.lock
index 65a8c8040311..a60b231c8257 100644
--- a/x/henry/mp-sandbox-agent/bun.lock
+++ b/x/henry/mp-sandbox-agent/bun.lock
@@ -4,9 +4,12 @@
     "": {
       "name": "mp-sandbox",
       "dependencies": {
+        "@anthropic-ai/sdk": "^0.39.0",
         "@micropython/micropython-webassembly-pyscript": "^1.24.1",
         "dotenv": "^16.4.7",
+        "google-search-results-nodejs": "^2.1.0",
         "openai": "^4.85.1",
+        "tiktoken": "^1.0.20",
         "zod": "^3.24.2",
       },
       "devDependencies": {
@@ -18,6 +21,8 @@
     },
   },
   "packages": {
+    "@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.39.0", "", { "dependencies": { "@types/node": "^18.11.18", "@types/node-fetch": "^2.6.4", "abort-controller": "^3.0.0", "agentkeepalive": "^4.2.1", "form-data-encoder": "1.7.2", "formdata-node": "^4.3.2", "node-fetch": "^2.6.7" } }, "sha512-eMyDIPRZbt1CCLErRCi3exlAvNkBtRe+kW5vvJyef93PmNr/clstYgHhtvmkxN82nlKgzyGPCyGxrm0JQ1ZIdg=="],
+
     "@micropython/micropython-webassembly-pyscript": ["@micropython/micropython-webassembly-pyscript@1.24.1", "", {}, "sha512-FC+Lv9TcwyBczC3FcyKYV/loxieXF7B7I5mwxA8wlmw8zm7RFHTL83kOSb06m0bOwgIkiSBwVlxd+2wTBM5NjA=="],
 
     "@types/bun": ["@types/bun@1.2.2", "", { "dependencies": { "bun-types": "1.2.2" } }, "sha512-tr74gdku+AEDN5ergNiBnplr7hpDp3V1h7fqI2GcR/rsUaM39jpSeKH0TFibRvU0KwniRx5POgaYnaXbk0hU+w=="],
@@ -68,6 +73,8 @@
 
     "get-proto": ["get-proto@1.0.1", "", { "dependencies": { "dunder-proto": "^1.0.1", "es-object-atoms": "^1.0.0" } }, "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g=="],
 
+    "google-search-results-nodejs": ["google-search-results-nodejs@2.1.0", "", {}, "sha512-7jEAoAV/Ful7Q2BxrYrkOst14aqwUvkEUwlRBrYATUylpp/bf3uE9lDgyk9brqbDt61hA2xkAm2sog9orHImVw=="],
+
     "gopd": ["gopd@1.2.0", "", {}, "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg=="],
 
     "has-symbols": ["has-symbols@1.1.0", "", {}, "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ=="],
@@ -92,6 +99,8 @@
 
     "openai": ["openai@4.85.1", "", { "dependencies": { "@types/node": "^18.11.18", "@types/node-fetch": "^2.6.4", "abort-controller": "^3.0.0", "agentkeepalive": "^4.2.1", "form-data-encoder": "1.7.2", "formdata-node": "^4.3.2", "node-fetch": "^2.6.7" }, "peerDependencies": { "ws": "^8.18.0", "zod": "^3.23.8" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-jkX2fntHljUvSH3MkWh4jShl10oNkb+SsCj4auKlbu2oF4KWAnmHLNR5EpnUHK1ZNW05Rp0fjbJzYwQzMsH8ZA=="],
 
+    "tiktoken": ["tiktoken@1.0.20", "", {}, "sha512-zVIpXp84kth/Ni2me1uYlJgl2RZ2EjxwDaWLeDY/s6fZiyO9n1QoTOM5P7ZSYfToPvAvwYNMbg5LETVYVKyzfQ=="],
+
     "tr46": ["tr46@0.0.3", "", {}, "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="],
 
     "typescript": ["typescript@5.7.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-84MVSjMEHP+FQRPy3pX9sTVV/INIex71s9TL2Gm5FG/WG1SqXeKyZ0k7/blY/4FdOzI12CBy1vGc4og/eus0fw=="],
diff --git a/x/henry/mp-sandbox-agent/main.ts b/x/henry/mp-sandbox-agent/main.ts
index 7ec53f237f59..4a1d73b58e66 100644
--- a/x/henry/mp-sandbox-agent/main.ts
+++ b/x/henry/mp-sandbox-agent/main.ts
@@ -1,47 +1,66 @@
-// hello.ts
+// main.ts
 import * as dotenv from "dotenv";
 import { fetchWeather } from "./tools/fetch_weather";
 import { Agent } from "./agent";
+import { scrapePages } from "./tools/scrape";
+import { searchWeb } from "./tools/serp";
+import { logger, LogLevel } from "./utils/logger";
+import { ConfigurationError, wrapError } from "./utils/errors";
+import type { AnyTool } from "./tools/types";
+import { loadModelConfig } from "./utils/config";
 
 // Load environment variables from .env file
 dotenv.config();
 
-const apiKey = process.env.OPENAI_API_KEY;
-if (!apiKey) {
-  throw new Error(
-    "Please set the OPENAI_API_KEY environment variable in your .env file"
-  );
-}
-
-// Generate system prompt for the model
-
 async function main() {
-  const request = process.argv[2];
-  if (!request) {
-    console.error("Please provide a request as a command line argument");
+  try {
+    // Set log level from environment variable or default to INFO
+    const logLevelStr = process.env.LOG_LEVEL?.toUpperCase() || 'INFO';
+    const logLevel = LogLevel[logLevelStr as keyof typeof LogLevel] ?? LogLevel.INFO;
+    logger.setLevel(logLevel);
+    
+    // Load and validate model configuration (this will throw if required env vars are missing)
+    loadModelConfig();
+    
+    const request = process.argv[2];
+    if (!request) {
+      logger.error("Please provide a request as a command line argument");
+      process.exit(1);
+    }
+
+    const agent = await Agent.create(request);
+    // Convert typed tools to AnyTool for compatibility
+    const asAnyTool = <T>(tool: T): AnyTool => tool as unknown as AnyTool;
+    
+    const tools: Record<string, AnyTool> = {
+      fetch_weather: asAnyTool(fetchWeather),
+      scrape_pages: asAnyTool(scrapePages),
+      search_web: asAnyTool(searchWeb),
+    };
+    
+    let answer: string | null = null;
+    while (answer === null) {
+      answer = await agent.step(tools);
+    }
+    
+    // Always show the final answer, even at ERROR level
+    const currentLevel = logger.getLevel();
+    logger.setLevel(LogLevel.INFO);
+    logger.setTimestamps(false);
+    logger.setShowLevel(false);
+    logger.info("\nFinal answer:");
+    logger.info(answer);
+    
+    // Reset logger settings
+    logger.setTimestamps(true);
+    logger.setShowLevel(true);
+    logger.setLevel(currentLevel);
+  } catch (error) {
+    // Wrap and log the error with full context
+    const wrappedError = wrapError(error, "Failed to execute agent");
+    logger.logError(wrappedError, "Application terminated with error");
     process.exit(1);
   }
-
-  // Initialize agent with a goal
-  const agent = await Agent.create(
-    "Help users get weather information for cities around the world",
-    apiKey as string
-  );
-
-  // Define available tools
-  const tools = {
-    fetch_weather: fetchWeather,
-  };
-
-  // Run a step with the user's request
-  const { stdout, stderr } = await agent.step(tools, request);
-
-  // Output results
-  if (stdout) console.log("\nOutput:", stdout);
-  if (stderr) console.log("\nErrors:", stderr);
 }
 
-main().catch((error) => {
-  console.error("Error:", error);
-  process.exit(1);
-});
+main();
diff --git a/x/henry/mp-sandbox-agent/package.json b/x/henry/mp-sandbox-agent/package.json
index 5d6da8adf8bd..cc4fa5e61a97 100644
--- a/x/henry/mp-sandbox-agent/package.json
+++ b/x/henry/mp-sandbox-agent/package.json
@@ -2,6 +2,10 @@
   "name": "mp-sandbox",
   "module": "index.ts",
   "type": "module",
+  "scripts": {
+    "test": "bun test sandbox.test.ts agent/helpers.test.ts",
+    "start": "bun main.ts"
+  },
   "devDependencies": {
     "@types/bun": "latest"
   },
@@ -9,9 +13,12 @@
     "typescript": "^5.0.0"
   },
   "dependencies": {
+    "@anthropic-ai/sdk": "^0.39.0",
     "@micropython/micropython-webassembly-pyscript": "^1.24.1",
     "dotenv": "^16.4.7",
+    "google-search-results-nodejs": "^2.1.0",
     "openai": "^4.85.1",
+    "tiktoken": "^1.0.20",
     "zod": "^3.24.2"
   }
 }
\ No newline at end of file
diff --git a/x/henry/mp-sandbox-agent/sandbox.test.ts b/x/henry/mp-sandbox-agent/sandbox.test.ts
index 2e88f2d06041..277725659a9f 100644
--- a/x/henry/mp-sandbox-agent/sandbox.test.ts
+++ b/x/henry/mp-sandbox-agent/sandbox.test.ts
@@ -11,15 +11,15 @@ describe("PythonSandbox", () => {
   });
 
   test("should support importing and calling exposed functions", async () => {
-    const sandbox = await PythonSandbox.create("test");
+    const sandbox = await PythonSandbox.create();
     sandbox.expose("fake_function", {
-      fn: () => "Hello, World!",
+      fn: async () => "Hello, World!",
       input: z.object({}),
       output: z.string(),
       description: "A fake function that returns a string",
     });
     const { stdout, stderr } = await sandbox.runCode(
-      "from test import fake_function\nprint(fake_function())"
+      "\nprint(await fake_function())"
     );
     expect(stdout).toBe("Hello, World!\n");
     expect(stderr).toBe("");
@@ -28,13 +28,13 @@ describe("PythonSandbox", () => {
   test("should support importing and calling exposed functions with arguments", async () => {
     const sandbox = await PythonSandbox.create("test");
     sandbox.expose("add", {
-      fn: ({ a, b }: { a: number; b: number }) => a + b,
+      fn: async ({ a, b }: { a: number; b: number }) => a + b,
       input: z.object({ a: z.number(), b: z.number() }),
       output: z.number(),
       description: "Adds two numbers",
     });
     const { stdout, stderr } = await sandbox.runCode(
-      "from test import add\nprint(add(1, 2))"
+      "\nprint(await add(1, 2))"
     );
     expect(stdout).toBe("3\n");
     expect(stderr).toBe("");
@@ -43,13 +43,13 @@ describe("PythonSandbox", () => {
   test("should support importing and calling exposed functions with positional arguments", async () => {
     const sandbox = await PythonSandbox.create("test");
     sandbox.expose("sub", {
-      fn: ({ b, a }: { a: number; b: number }) => b - a,
+      fn: async ({ b, a }: { a: number; b: number }) => b - a,
       input: z.object({ b: z.number(), a: z.number() }),
       output: z.number(),
       description: "Subtracts two numbers",
     });
     const { stdout, stderr } = await sandbox.runCode(
-      "from test import sub\nprint(sub(1, 2))"
+      "\nprint(await sub(1, 2))"
     );
     expect(stdout).toBe("-1\n");
     expect(stderr).toBe("");
@@ -58,13 +58,13 @@ describe("PythonSandbox", () => {
   test("should support importing and calling exposed functions with keyword arguments", async () => {
     const sandbox = await PythonSandbox.create("test");
     sandbox.expose("multiply", {
-      fn: ({ a, b }: { a: number; b: number }) => a * b,
+      fn: async ({ a, b }: { a: number; b: number }) => a * b,
       input: z.object({ a: z.number(), b: z.number() }),
       output: z.number(),
       description: "Multiplies two numbers",
     });
     const { stdout, stderr } = await sandbox.runCode(
-      "from test import multiply\nprint(multiply(a=1, b=2))"
+      "\nprint(await multiply(a=1, b=2))"
     );
     expect(stdout).toBe("2\n");
     expect(stderr).toBe("");
@@ -79,7 +79,7 @@ describe("PythonSandbox", () => {
       description: "Returns a string after a delay",
     });
     const { stdout, stderr } = await sandbox.runCode(
-      "from test import async_function\nprint(await async_function())"
+      "\nprint(await async_function())"
     );
     expect(stdout).toBe("Hello, World!\n");
     expect(stderr).toBe("");
@@ -100,9 +100,37 @@ describe("PythonSandbox", () => {
       await sandbox.runCode("raise Exception('This is a test error')");
     } catch (error) {
       expect(error).toBeInstanceOf(Error);
-      expect((error as Error).message).toBe(
-        'Traceback (most recent call last):\n  File "<stdin>", line 2, in <module>\nException: This is a test error\n'
+      expect((error as Error).message).toContain(
+        'This is a test error'
       );
+      // Make sure it contains the Python traceback
+      expect((error as Error).message).toContain('Traceback (most recent call last)');
     }
   });
+  test("should support list kw parameters", async () => {
+    const sandbox = await PythonSandbox.create("test");
+    sandbox.expose("list_function", {
+      fn: async ({ l }) => l,
+      input: z.object({ l: z.array(z.string()) }),
+      output: z.array(z.string()),
+      description: "Returns a list of strings",
+    });
+    const { stdout, stderr } = await sandbox.runCode(
+      "\nprint(await list_function(l=['a', 'b', 'c']))"
+    );
+    expect(stdout).toBe("['a', 'b', 'c']\n");
+  });
+  test("should support list parameters", async () => {
+    const sandbox = await PythonSandbox.create("test");
+    sandbox.expose("list_function", {
+      fn: async ({ l }) => l,
+      input: z.object({ l: z.array(z.string()) }),
+      output: z.array(z.string()),
+      description: "Returns a list of strings",
+    });
+    const { stdout, stderr } = await sandbox.runCode(
+      "\nprint(await list_function(['a', 'b', 'c']))"
+    );
+    expect(stdout).toBe("['a', 'b', 'c']\n");
+  });
 });
diff --git a/x/henry/mp-sandbox-agent/sandbox.ts b/x/henry/mp-sandbox-agent/sandbox.ts
index acebd352c5aa..7493d9d90eb5 100644
--- a/x/henry/mp-sandbox-agent/sandbox.ts
+++ b/x/henry/mp-sandbox-agent/sandbox.ts
@@ -3,16 +3,51 @@ import {
   type MicroPythonInstance,
 } from "@micropython/micropython-webassembly-pyscript/micropython.mjs";
 import * as z from "zod";
-import type { Tool } from "./tools/types";
+import { logger } from "./utils/logger";
+import { SandboxError, wrapError } from "./utils/errors";
 
 export interface CodeExecutionResult {
   result: unknown;
   stdout: string;
 }
 
+/**
+ * Represents a parsed JSON value (string, number, boolean, null, array, or object)
+ */
+export type JsonValue = 
+  | string
+  | number
+  | boolean
+  | null
+  | JsonValue[]
+  | { [key: string]: JsonValue };
+
+/**
+ * Represents a function that can be exposed to the sandbox
+ */
+export interface ExposedFunction<
+  TInput = unknown,
+  TOutput = unknown
+> {
+  /** Function to execute when called from Python */
+  fn: (input: TInput) => Promise<TOutput>;
+  /** Schema to validate and parse the input */
+  input: z.ZodType<TInput>;
+  /** Schema to validate and parse the output */
+  output: z.ZodType<TOutput>;
+  /** Description of the function */
+  description: string;
+}
+
+/**
+ * Untyped version of ExposedFunction for internal use
+ */
+type AnyExposedFunction = ExposedFunction<unknown, unknown>;
+
 export class PythonSandbox {
   private mp!: MicroPythonInstance;
-  private exposedFunctions: { [key: string]: Tool } = {};
+  private exposedFunctions: { [key: string]: AnyExposedFunction } = {};
+  private module: Record<string, Function> = {};
   private moduleId: string;
   private stdoutBuffer: string[] = [];
   private stderrBuffer: string[] = [];
@@ -54,69 +89,135 @@ export class PythonSandbox {
     return { stdout, stderr };
   }
 
-  expose(name: string, func: Tool) {
-    this.exposedFunctions[name] = func;
-
-    const wrapper = (...args: unknown[]) => {
-      // Parse input according to schema
-      const inputObject = func.input as z.ZodObject<z.ZodRawShape>;
-      const params = func.input.parse(
-        args.length === 1 && typeof args[0] === "object"
-          ? args[0]
-          : {
-              [Object.keys(inputObject.shape)[0]]: args[0],
-              [Object.keys(inputObject.shape)[1]]: args[1],
-            }
-      );
-      return func.fn(params);
+  /**
+   * Expose a function to the Python environment
+   * @param name The name of the function in Python
+   * @param func The function to expose
+   */
+  expose<TInput, TOutput>(name: string, func: ExposedFunction<TInput, TOutput>): void {
+    this.exposedFunctions[name] = func as AnyExposedFunction;
+
+    // Create a wrapper function that handles JSON serialization/deserialization
+    const wrapper = (_args: string, _kwargs: string): Promise<string | number | boolean> => {
+      // Parse input JSON strings
+      const args: JsonValue[] = JSON.parse(_args);
+      const kwargs: Record<string, JsonValue> = JSON.parse(_kwargs);
+
+      // Convert positional and keyword arguments to an object that can be validated
+      // against the input schema
+      const paramsObj: Record<string, JsonValue> = {};
+      
+      // Handle object schemas differently than other schemas
+      if (func.input instanceof z.ZodObject) {
+        // Map parameters from positional args and keyword args
+        for (const [i, key] of Object.keys(func.input.shape).entries()) {
+          if (key in kwargs) {
+            paramsObj[key] = kwargs[key];
+          } else if (i < args.length) {
+            paramsObj[key] = args[i];
+          }
+        }
+      } else {
+        // For non-object schemas, just use the first argument
+        if (args.length > 0) {
+          return Promise.resolve(JSON.stringify(args[0]));
+        }
+      }
+
+      // Parse with the input schema to validate and transform
+      const params = func.input.parse(paramsObj);
+
+      // Call the function
+      const result = func.fn(params);
+
+      // Function to convert result to a JSON-compatible value
+      const serializeValue = (value: unknown): string | number | boolean => {
+        if (
+          typeof value === "string" ||
+          typeof value === "number" ||
+          typeof value === "boolean"
+        ) {
+          return value;
+        }
+        return JSON.stringify(value);
+      };
+
+      // Handle async results
+      if (result instanceof Promise) {
+        return result.then(serializeValue);
+      }
+
+      return Promise.resolve(serializeValue(result));
     };
 
     // Create an object to hold our exposed functions
-    const module = { [name]: wrapper };
-    this.mp.registerJsModule(this.moduleId, module);
+    this.module[name] = wrapper;
+    this.mp.registerJsModule(this.moduleId, this.module);
+  }
+
+  private generateWrapperFunction(name: string): string {
+    return `
+async def ${name}(*args, **kwargs):
+    args = json.dumps(args)
+    kwargs = json.dumps(kwargs)
+
+    r = await _${name}(args, kwargs)
+    try:
+      return json.loads(r)
+    except:
+      return r`;
+  }
+
+  private generateImports(): string {
+    const imports = ["import json"];
+
+    for (const name of Object.keys(this.exposedFunctions)) {
+      imports.push(`from ${this.moduleId} import ${name} as _${name}`);
+      imports.push(this.generateWrapperFunction(name));
+    }
+
+    return imports.join("\n");
   }
 
   async runCode(code: string): Promise<{ stdout: string; stderr: string }> {
-    // Clear stdout and stderr buffers before running new code
     this.clearBuffers();
-
-    // Import exposed functions if any
-    const importCode = Object.keys(this.exposedFunctions)
-      .map((name) => `from ${this.moduleId} import ${name}`)
-      .join("\n");
+    
+    const codeLength = code.length;
+    const codeSummary = code.length > 50 
+      ? `${code.substring(0, 47)}...` 
+      : code;
+    
+    logger.debug(`Running code (${codeLength} chars): ${codeSummary}`);
+    const importCode = this.generateImports();
 
     try {
-      // Run the actual code
-      await this.mp.runPythonAsync(`${importCode}\n${code.trim()}`);
-
-      return this.getOutput();
+      const fullCode = `${importCode}\n\n${code.trim()}`;
+      await this.mp.runPythonAsync(fullCode);
+      const output = this.getOutput();
+      logger.debug(`Code execution successful with ${output.stdout.length} bytes of stdout`);
+      return output;
     } catch (error) {
-      // Get stdout before throwing
+      // Get stdout and stderr before creating error object
       const { stdout, stderr } = this.getOutput();
-
-      // Create a proper error object
-      let errorObj: Error;
-      if (error instanceof Error) {
-        errorObj = error;
-      } else if (typeof error === "string") {
-        errorObj = new Error(error);
-      } else {
-        errorObj = new Error(String(error));
-      }
-
-      // Add stdout and stderr to the error
-      Object.defineProperty(errorObj, "stdout", {
-        value: stdout,
-        enumerable: true,
-        writable: false,
+      
+      logger.debug(`Code execution failed with ${stderr.length} bytes of stderr`);
+      
+      // Create a SandboxError with context
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      const sandboxError = new SandboxError(
+        `Python code execution failed: ${errorMessage}`,
+        stdout,
+        stderr,
+        { cause: error instanceof Error ? error : undefined }
+      ).addContext({
+        codeLength,
+        codeSummary,
+        moduleId: this.moduleId,
+        hasStdout: stdout.length > 0,
+        hasStderr: stderr.length > 0
       });
-      Object.defineProperty(errorObj, "stderr", {
-        value: stderr,
-        enumerable: true,
-        writable: false,
-      });
-
-      throw errorObj;
+      
+      throw sandboxError;
     }
   }
 }
diff --git a/x/henry/mp-sandbox-agent/services/llm.ts b/x/henry/mp-sandbox-agent/services/llm.ts
new file mode 100644
index 000000000000..c301d53d5d89
--- /dev/null
+++ b/x/henry/mp-sandbox-agent/services/llm.ts
@@ -0,0 +1,167 @@
+import OpenAI from "openai";
+import Anthropic from "@anthropic-ai/sdk";
+import { type ModelConfig, type Provider } from "../utils/config";
+import { type ChatCompletionMessageParam } from "openai/resources/chat/completions";
+import { APIError } from "../utils/errors";
+import { logger } from "../utils/logger";
+
+/**
+ * Message format common across different providers
+ */
+export interface Message {
+  role: "system" | "user" | "assistant";
+  content: string;
+}
+
+/**
+ * Response format from LLM providers
+ */
+export interface LLMResponse {
+  content: string;
+  totalTokens?: number;
+}
+
+/**
+ * Service for interacting with various LLM providers
+ */
+export class LLMService {
+  private config: ModelConfig;
+  private openaiClient?: OpenAI;
+  private anthropicClient?: Anthropic;
+
+  constructor(config: ModelConfig) {
+    this.config = config;
+
+    // Initialize the appropriate client based on provider
+    if (config.provider === "openai") {
+      this.openaiClient = new OpenAI({ apiKey: config.apiKey });
+    } else if (config.provider === "anthropic") {
+      this.anthropicClient = new Anthropic({ apiKey: config.apiKey });
+    }
+  }
+
+  /**
+   * Converts our internal message format to OpenAI's format
+   */
+  private toOpenAIMessages(messages: Message[]): ChatCompletionMessageParam[] {
+    return messages.map(message => ({
+      role: message.role,
+      content: message.content,
+    }));
+  }
+
+  /**
+   * Converts our internal message format to Anthropic's format
+   */
+  private toAnthropicMessages(messages: Message[]): Anthropic.Messages.MessageParam[] {
+    // Anthropic requires system message to be separate from the conversation
+    const systemMessage = messages.find(m => m.role === "system");
+    const nonSystemMessages = messages.filter(m => m.role !== "system");
+    
+    const anthropicMessages: Anthropic.Messages.MessageParam[] = nonSystemMessages.map(message => ({
+      role: message.role === "user" ? "user" : "assistant",
+      content: message.content,
+    }));
+    
+    return anthropicMessages;
+  }
+
+  /**
+   * Generate a completion from the selected LLM provider
+   */
+  async generateCompletion(messages: Message[]): Promise<LLMResponse> {
+    try {
+      if (this.config.provider === "openai") {
+        return await this.generateOpenAICompletion(messages);
+      } else if (this.config.provider === "anthropic") {
+        return await this.generateAnthropicCompletion(messages);
+      } else {
+        throw new APIError(`Unsupported provider: ${this.config.provider}`)
+          .addContext({ supportedProviders: ["openai", "anthropic"] });
+      }
+    } catch (error) {
+      // Enhance error reporting with provider-specific details
+      throw new APIError(
+        `Failed to generate response from ${this.config.provider}`,
+        error instanceof Error && 'status' in error ? (error as any).status : undefined,
+        { cause: error instanceof Error ? error : undefined }
+      ).addContext({
+        provider: this.config.provider,
+        model: this.config.model,
+        messageCount: messages.length,
+      });
+    }
+  }
+
+  /**
+   * Generate a completion using OpenAI
+   */
+  private async generateOpenAICompletion(messages: Message[]): Promise<LLMResponse> {
+    if (!this.openaiClient) {
+      throw new APIError("OpenAI client not initialized")
+        .addContext({ provider: "openai" });
+    }
+
+    const openaiMessages = this.toOpenAIMessages(messages);
+    
+    logger.debug(`Sending request to OpenAI with model: ${this.config.model}`);
+    const response = await this.openaiClient.chat.completions.create({
+      model: this.config.model,
+      messages: openaiMessages,
+      temperature: this.config.temperature,
+      max_tokens: this.config.maxTokens,
+    });
+
+    if (!response.choices[0].message.content) {
+      throw new APIError("OpenAI returned empty response content")
+        .addContext({
+          responseId: response.id,
+          model: this.config.model
+        });
+    }
+
+    return {
+      content: response.choices[0].message.content,
+      totalTokens: response.usage?.total_tokens,
+    };
+  }
+
+  /**
+   * Generate a completion using Anthropic
+   */
+  private async generateAnthropicCompletion(messages: Message[]): Promise<LLMResponse> {
+    if (!this.anthropicClient) {
+      throw new APIError("Anthropic client not initialized")
+        .addContext({ provider: "anthropic" });
+    }
+
+    // Find system message
+    const systemMessage = messages.find(m => m.role === "system")?.content || "";
+    const anthropicMessages = this.toAnthropicMessages(messages);
+    
+    logger.debug(`Sending request to Anthropic with model: ${this.config.model}`);
+    const response = await this.anthropicClient.messages.create({
+      model: this.config.model,
+      messages: anthropicMessages,
+      system: systemMessage,
+      temperature: this.config.temperature,
+      max_tokens: this.config.maxTokens || 4096,
+    });
+
+    // Check for text content in the first content block
+    const content = response.content[0];
+    if (!content || !('text' in content)) {
+      throw new APIError("Anthropic returned empty or invalid response content")
+        .addContext({
+          responseId: response.id,
+          model: this.config.model,
+          contentType: content ? typeof content : 'undefined'
+        });
+    }
+
+    return {
+      content: content.text,
+      totalTokens: response.usage?.input_tokens + response.usage?.output_tokens,
+    };
+  }
+}
\ No newline at end of file
diff --git a/x/henry/mp-sandbox-agent/test-scrape.ts b/x/henry/mp-sandbox-agent/test-scrape.ts
new file mode 100644
index 000000000000..7a7de03909ee
--- /dev/null
+++ b/x/henry/mp-sandbox-agent/test-scrape.ts
@@ -0,0 +1,29 @@
+import * as dotenv from "dotenv";
+import { scrapePages } from "./tools/scrape";
+import { logger, LogLevel } from "./utils/logger";
+
+// Load environment variables from .env file
+dotenv.config();
+
+// Set up logger
+logger.setLevel(LogLevel.INFO);
+
+async function testScrape() {
+  console.log("Testing scrape tool with cleaner logging...");
+  
+  const result = await scrapePages.fn(
+    { urls: ["https://example.com", "https://news.ycombinator.com"] },
+    { log: (...args) => console.log(...args) }
+  );
+  
+  console.log("\nTest completed. Tool returned:", 
+    result.type === "success" 
+      ? `Success with ${result.result.length} results` 
+      : `Error: ${result.error}`
+  );
+}
+
+testScrape().catch(err => {
+  console.error("Test failed with error:", err);
+  process.exit(1);
+});
\ No newline at end of file
diff --git a/x/henry/mp-sandbox-agent/tools/fetch_weather.ts b/x/henry/mp-sandbox-agent/tools/fetch_weather.ts
index 0ef926486b35..3f53f2dfa0a6 100644
--- a/x/henry/mp-sandbox-agent/tools/fetch_weather.ts
+++ b/x/henry/mp-sandbox-agent/tools/fetch_weather.ts
@@ -1,35 +1,72 @@
 import { defineTool } from "./helpers";
 import { z } from "zod";
+import { ok, err } from "./types";
+import { logger } from "../utils/logger";
+import { APIError, ValidationError } from "../utils/errors";
+
+const WeatherSchema = z.object({
+  city: z.string().describe("Full city name with country"),
+  temperature: z.number().describe("Temperature in °C"),
+  precipitation: z.number().describe("Precipitation in mm"),
+  weathercode: z.number().describe("WMO weather code"),
+  units: z
+    .object({
+      temperature: z.string().describe("Temperature unit (e.g., °C)"),
+      precipitation: z.string().describe("Precipitation unit (e.g., mm)"),
+    })
+    .describe("Measurement units for temperature and precipitation"),
+});
 
 export const fetchWeather = defineTool(
-  "Fetches current weather data for the specified city",
+  "Fetches current weather data for the specified city. " +
+    "Automatically logs results in the execution logs (no need to do it manually).",
   z.object({
     city: z.string().describe("Name of the city to get weather for"),
   }),
-  z.object({
-    city: z.string().describe("Full city name with country"),
-    temperature: z.number().describe("Temperature in °C"),
-    precipitation: z.number().describe("Precipitation in mm"),
-    weathercode: z.number().describe("WMO weather code"),
-    units: z
-      .object({
-        temperature: z.string().describe("Temperature unit (e.g., °C)"),
-        precipitation: z.string().describe("Precipitation unit (e.g., mm)"),
-      })
-      .describe("Measurement units for temperature and precipitation"),
-  }),
-  async ({ city }) => {
+  WeatherSchema,
+  async ({ city }, { log }) => {
+    logger.debug(`Fetching weather for city: "${city}"`);
+    
+    // Validate city parameter
+    if (!city.trim()) {
+      const validationError = new ValidationError("City name cannot be empty")
+        .addContext({ providedCity: city });
+      logger.debug(`Weather validation error: ${validationError.message}`);
+      return err(`Invalid city name: ${validationError.message}`);
+    }
+    
     try {
       // First get coordinates for the city
-      const geocodeResponse = await fetch(
-        `https://geocoding-api.open-meteo.com/v1/search?name=${encodeURIComponent(
-          city
-        )}&count=1&language=en&format=json`
-      );
+      const geocodeUrl = `https://geocoding-api.open-meteo.com/v1/search?name=${encodeURIComponent(
+        city
+      )}&count=1&language=en&format=json`;
+      
+      logger.debug(`Geocoding API request: ${geocodeUrl}`);
+      const geocodeResponse = await fetch(geocodeUrl);
+      
+      if (!geocodeResponse.ok) {
+        const apiError = new APIError(
+          "Geocoding API request failed", 
+          geocodeResponse.status
+        ).addContext({
+          city,
+          url: geocodeUrl,
+          statusText: geocodeResponse.statusText
+        });
+        logger.debug(`Geocoding API error: ${apiError.message} (${apiError.statusCode})`);
+        return err(`Failed to geocode city: ${apiError.message}`);
+      }
+      
       const geocodeData = await geocodeResponse.json();
 
       if (!geocodeData.results?.[0]) {
-        throw new Error(`City "${city}" not found`);
+        const cityNotFoundError = new ValidationError(`City "${city}" not found`)
+          .addContext({ 
+            searchedCity: city,
+            responseData: JSON.stringify(geocodeData) 
+          });
+        logger.debug(`City not found: ${city}`);
+        return err(`City "${city}" not found in geocoding database`);
       }
 
       const {
@@ -38,14 +75,52 @@ export const fetchWeather = defineTool(
         name: foundCity,
         country,
       } = geocodeData.results[0];
+      
+      logger.debug(`Found city coordinates: ${foundCity}, ${country} (${latitude}, ${longitude})`);
 
       // Then get weather for those coordinates
-      const response = await fetch(
-        `https://api.open-meteo.com/v1/forecast?latitude=${latitude}&longitude=${longitude}&current=temperature_2m,precipitation,weathercode`
-      );
+      const weatherUrl = `https://api.open-meteo.com/v1/forecast?latitude=${latitude}&longitude=${longitude}&current=temperature_2m,precipitation,weathercode`;
+      logger.debug(`Weather API request: ${weatherUrl}`);
+      
+      const response = await fetch(weatherUrl);
+      
+      if (!response.ok) {
+        const apiError = new APIError(
+          "Weather API request failed", 
+          response.status
+        ).addContext({
+          city: `${foundCity}, ${country}`,
+          coordinates: { latitude, longitude },
+          url: weatherUrl,
+          statusText: response.statusText
+        });
+        logger.debug(`Weather API error: ${apiError.message} (${apiError.statusCode})`);
+        return err(`Failed to fetch weather data: ${apiError.message}`);
+      }
+      
       const data = await response.json();
+      
+      // Validate weather data
+      if (!data.current || 
+          typeof data.current.temperature_2m !== 'number' || 
+          typeof data.current.precipitation !== 'number' || 
+          typeof data.current.weathercode !== 'number') {
+        const dataError = new APIError("Weather API returned invalid data format")
+          .addContext({ 
+            response: JSON.stringify(data),
+            city: `${foundCity}, ${country}`
+          });
+        logger.debug(`Weather data validation error: ${dataError.message}`);
+        return err(`Weather data for ${foundCity}, ${country} has invalid format`);
+      }
+      
+      log(
+        `Weather data for ${foundCity}, ${country}:\n${JSON.stringify(data)}`
+      );
+      
+      logger.debug(`Successfully fetched weather for ${foundCity}, ${country}`);
 
-      return {
+      return ok({
         city: `${foundCity}, ${country}`,
         temperature: data.current.temperature_2m,
         precipitation: data.current.precipitation,
@@ -54,10 +129,20 @@ export const fetchWeather = defineTool(
           temperature: data.current_units.temperature_2m,
           precipitation: data.current_units.precipitation,
         },
-      };
+      });
     } catch (error) {
-      console.error("Error fetching weather:", error);
-      return null;
+      const wrappedError = error instanceof Error 
+        ? error 
+        : new Error(String(error));
+        
+      logger.debug(`Error in fetch_weather tool: ${wrappedError.message}`);
+      if (wrappedError.stack) {
+        logger.debug(`Stack trace: ${wrappedError.stack}`);
+      }
+      
+      return err(
+        `Error fetching weather: ${wrappedError.message}`
+      );
     }
   }
 );
diff --git a/x/henry/mp-sandbox-agent/tools/helpers.ts b/x/henry/mp-sandbox-agent/tools/helpers.ts
index 50ceecbe2be5..7c5db7acbdcf 100644
--- a/x/henry/mp-sandbox-agent/tools/helpers.ts
+++ b/x/henry/mp-sandbox-agent/tools/helpers.ts
@@ -1,16 +1,31 @@
-import type { Tool } from "./types";
+import type { Tool, ToolContext, ToolOutput } from "./types";
 import { z } from "zod";
 
-export function defineTool<I extends z.ZodType<any>, O extends z.ZodType<any>>(
+/**
+ * Helper function to define a new tool with type safety
+ * 
+ * @param description Description of what the tool does
+ * @param input Zod schema for validating the input
+ * @param output Zod schema for validating the success result
+ * @param fn Implementation function
+ * @returns A Tool object that can be used by the agent
+ */
+export function defineTool<
+  TInputSchema extends z.ZodType,
+  TOutputSchema extends z.ZodType
+>(
   description: string,
-  input: I,
-  output: O,
-  implementation: (args: z.infer<I>) => Promise<z.infer<O> | null>
-): Tool {
+  input: TInputSchema,
+  output: TOutputSchema,
+  fn: (
+    args: z.infer<TInputSchema>,
+    context: ToolContext
+  ) => Promise<ToolOutput<z.infer<TOutputSchema>>>
+): Tool<z.infer<TInputSchema>, z.infer<TOutputSchema>> {
   return {
     description,
     input,
     output,
-    fn: implementation,
+    fn,
   };
 }
diff --git a/x/henry/mp-sandbox-agent/tools/scrape.ts b/x/henry/mp-sandbox-agent/tools/scrape.ts
new file mode 100644
index 000000000000..1e8a194b1159
--- /dev/null
+++ b/x/henry/mp-sandbox-agent/tools/scrape.ts
@@ -0,0 +1,127 @@
+import { z } from "zod";
+import { defineTool } from "./helpers";
+import { encoding_for_model } from "tiktoken";
+
+if (!process.env.FIRECRAWL_API_KEY) {
+  throw new Error(
+    "Please set the FIRECRAWL_API_KEY environment variable in your .env file"
+  );
+}
+
+const metadataSchema = z.object({
+  title: z.string().optional(),
+  description: z.string().optional(),
+  language: z.string().optional(),
+  sourceURL: z.string(),
+  pageStatusCode: z.number().optional(),
+  pageError: z.string().optional(),
+});
+
+const scrapeResponseSchema = z.object({
+  success: z.boolean(),
+  data: z.object({
+    metadata: metadataSchema,
+    markdown: z.string(),
+  }),
+});
+
+export const scrapePages = defineTool(
+  "Scrapes multiple webpages and returns their content in markdown format.",
+  z.object({
+    urls: z.array(z.string()).describe("The URLs to scrape"),
+  }),
+  z.array(scrapeResponseSchema),
+  async (input, { log }) => {
+    try {
+      const results = await Promise.all(
+        input.urls.map((url) =>
+          fetch("https://api.firecrawl.dev/v1/scrape", {
+            method: "POST",
+            headers: {
+              Authorization: `Bearer ${process.env.FIRECRAWL_API_KEY}`,
+              "Content-Type": "application/json",
+            },
+            body: JSON.stringify({
+              url,
+              formats: ["markdown"],
+            }),
+          }).then((r) => r.json())
+        )
+      );
+
+      // Use OpenAI's tokenizer (cl100k_base is used by GPT-4 and newer models)
+      const tokenizer = encoding_for_model("gpt-4");
+      
+      // Function to count tokens using tiktoken
+      function countTokens(text: string): number {
+        try {
+          const tokens = tokenizer.encode(text);
+          return tokens.length;
+        } catch (error) {
+          console.error("Error counting tokens:", error);
+          // Fallback to a simple approximation if tiktoken fails
+          return Math.ceil(text.length / 4);
+        }
+      }
+
+      // Function to truncate text to token limit using tiktoken
+      function truncateToTokenLimit(text: string, tokenLimit: number): string {
+        try {
+          const tokens = tokenizer.encode(text);
+          
+          if (tokens.length <= tokenLimit) {
+            return text;
+          }
+          
+          // For safety, use a character-based approach for truncation
+          // Calculate roughly how many characters to include to stay under token limit
+          const charLimit = Math.floor((tokenLimit / tokens.length) * text.length);
+          
+          // Truncate text directly (more reliable than using tiktoken's decode)
+          const truncatedText = text.substring(0, charLimit);
+          
+          return truncatedText + "\n... [content truncated, full content available in result]";
+        } catch (error) {
+          console.error("Error truncating text:", error);
+          // Fallback to a simple approximation if tiktoken fails
+          return text.substring(0, tokenLimit * 4) + "\n... [content truncated, full content available in result]";
+        }
+      }
+
+      // Log a human-friendly summary
+      console.log(
+        "Scrape results:",
+        input.urls
+          .map((url, i) =>
+            results[i].success
+              ? `${url}: Scraped successfully (${results[i].data.markdown.length} chars, ~${countTokens(results[i].data.markdown)} tokens)`
+              : `${url}: Failed to scrape`
+          )
+          .join("\n")
+      );
+      
+      // Log content for the agent with proper token limiting
+      const TOKEN_LIMIT = 2000;
+      log(
+        input.urls
+          .map((url, i) =>
+            results[i].success
+              ? `Content from ${url}:\n${truncateToTokenLimit(results[i].data.markdown, TOKEN_LIMIT)}`
+              : `${url}: Failed to scrape`
+          )
+          .join("\n\n---\n\n")
+      );
+
+      return {
+        type: "success",
+        result: results,
+      };
+    } catch (error) {
+      return {
+        type: "error",
+        error:
+          error instanceof Error ? error.message : "Unknown error occurred",
+      };
+    }
+  }
+);
diff --git a/x/henry/mp-sandbox-agent/tools/serp.ts b/x/henry/mp-sandbox-agent/tools/serp.ts
new file mode 100644
index 000000000000..2c0b56ed027a
--- /dev/null
+++ b/x/henry/mp-sandbox-agent/tools/serp.ts
@@ -0,0 +1,90 @@
+import { z } from "zod";
+import { defineTool } from "./helpers";
+
+if (!process.env.SERPAPI_API_KEY) {
+  throw new Error(
+    "Please set the SERPAPI_API_KEY environment variable in your .env file"
+  );
+}
+
+const searchResultSchema = z.object({
+  position: z.number().optional(),
+  title: z.string().optional(),
+  link: z.string().optional(),
+  snippet: z.string().optional(),
+  displayed_link: z.string().optional(),
+});
+
+const searchResponseSchema = z.object({
+  organic_results: z.array(searchResultSchema).optional(),
+  search_metadata: z
+    .object({
+      status: z.string(),
+      id: z.string(),
+    })
+    .optional(),
+  error: z.string().optional(),
+});
+
+export const searchWeb = defineTool(
+  "Search the web using Google Search. Returns organic search results for the given query. " +
+    "The page parameter is optional and defaults to 1. It can be used to get results beyond the first page. " +
+    "Logs the output of the search in the execution logs (no need to do it manually).",
+  z.object({
+    query: z.string().describe("The search query to execute"),
+    page: z
+      .number()
+      .default(1)
+      .describe("The page number of results to fetch (1-based)"),
+  }),
+  searchResponseSchema,
+  async (input, { log }) => {
+    try {
+      // Calculate start parameter for pagination (Google uses 0-based indexing with 10 results per page)
+      const start = (input.page - 1) * 10;
+
+      const params = {
+        engine: "google",
+        q: input.query,
+        start: start.toString(),
+        num: "10",
+        api_key: process.env.SERPAPI_API_KEY!,
+      };
+
+      const response = await fetch(
+        `https://serpapi.com/search.json?${new URLSearchParams(params)}`
+      );
+      const data = await response.json();
+
+      if (data.error) {
+        return { type: "error", error: data.error };
+      }
+
+      log(
+        `Retrieved ${data.organic_results?.length || 0} results for query "${
+          input.query
+        }" (page ${input.page}):\n${JSON.stringify(
+          data.organic_results?.map((r: any) => ({
+            title: r.title,
+            link: r.link,
+            snippet: r.snippet,
+          }))
+        )}`
+      );
+
+      return {
+        type: "success",
+        result: {
+          organic_results: data.organic_results || [],
+          search_metadata: data.search_metadata,
+        },
+      };
+    } catch (error) {
+      return {
+        type: "error",
+        error:
+          error instanceof Error ? error.message : "Unknown error occurred",
+      };
+    }
+  }
+);
diff --git a/x/henry/mp-sandbox-agent/tools/types.ts b/x/henry/mp-sandbox-agent/tools/types.ts
index 6134b4373322..bb906e470c47 100644
--- a/x/henry/mp-sandbox-agent/tools/types.ts
+++ b/x/henry/mp-sandbox-agent/tools/types.ts
@@ -1,8 +1,64 @@
 import { z } from "zod";
 
-export type Tool = {
-  fn: Function;
-  input: z.ZodType<any>;
-  output: z.ZodType<any>;
+/**
+ * Utility to provide logging and other functionality to tool executions
+ */
+export interface ToolContext {
+  /** Function to log information during tool execution */
+  log: (message: string) => void;
+}
+
+/**
+ * Represents a tool that can be used by the agent
+ */
+export interface Tool<TInput = unknown, TOutput = unknown> {
+  /** The function that implements the tool's behavior */
+  fn: (
+    input: TInput,
+    context: ToolContext
+  ) => Promise<ToolOutput<TOutput>>;
+  /** Schema for validating and parsing the input */
+  input: z.ZodType<TInput>;
+  /** Schema for validating and parsing the output */
+  output: z.ZodType<TOutput>;
+  /** Description of what the tool does */
   description: string;
-};
+}
+
+/**
+ * Type-erased Tool for internal use when specific type information is not needed
+ */
+export type AnyTool = Tool<unknown, unknown>;
+
+/**
+ * Creates a Zod schema for validating tool outputs
+ * @param valueSchema Schema for the success result
+ * @returns A union schema that can validate either success or error results
+ */
+export const ToolOutput = <T extends z.ZodType>(valueSchema: T) =>
+  z.union([
+    z.object({ type: z.literal("success"), result: valueSchema }),
+    z.object({ type: z.literal("error"), error: z.string() }),
+  ]);
+
+/**
+ * Represents the output of a tool execution
+ * Either a success with a result of type T, or an error with a string message
+ */
+export type ToolOutput<T> = 
+  | { type: "success"; result: T }
+  | { type: "error"; error: string };
+
+export function isOk<T>(
+  output: ToolOutput<T>
+): output is { type: "success"; result: T } {
+  return output.type === "success";
+}
+
+export function ok<T>(result: T): ToolOutput<T> {
+  return { type: "success", result };
+}
+
+export function err<T>(error: string): ToolOutput<T> {
+  return { type: "error", error };
+}
diff --git a/x/henry/mp-sandbox-agent/utils/config.ts b/x/henry/mp-sandbox-agent/utils/config.ts
new file mode 100644
index 000000000000..b98fe27bbe5c
--- /dev/null
+++ b/x/henry/mp-sandbox-agent/utils/config.ts
@@ -0,0 +1,160 @@
+import { z } from "zod";
+import { ValidationError } from "./errors";
+import { logger } from "./logger";
+
+/**
+ * Supported AI provider types
+ */
+export type Provider = "openai" | "anthropic";
+
+/**
+ * Interface for AI model configuration
+ */
+export interface ModelConfig {
+  /** The AI provider (openai, anthropic) */
+  provider: Provider;
+  /** The model name to use */
+  model: string;
+  /** API key for the provider */
+  apiKey: string;
+  /** Maximum tokens to generate (defaults based on provider) */
+  maxTokens?: number;
+  /** Temperature setting for generation (defaults based on provider) */
+  temperature?: number;
+}
+
+/**
+ * Supported OpenAI models
+ */
+export const OPENAI_MODELS = [
+  "gpt-4o",
+  "gpt-4-turbo",
+  "gpt-4",
+  "gpt-3.5-turbo",
+] as const;
+
+/**
+ * Supported Anthropic models
+ */
+export const ANTHROPIC_MODELS = [
+  // Latest models
+  "claude-3-7-sonnet-20250219",
+  "claude-3-5-sonnet-20241022",
+  "claude-3-5-haiku-20241022",
+  // Previous models
+  "claude-3-opus-20240229",
+  "claude-3-sonnet-20240229",
+  "claude-3-haiku-20240307",
+] as const;
+
+/**
+ * Zod schema for validating OpenAI models
+ */
+const openAIModelSchema = z.enum(OPENAI_MODELS);
+
+/**
+ * Zod schema for validating Anthropic models
+ */
+const anthropicModelSchema = z.enum(ANTHROPIC_MODELS);
+
+/**
+ * Default configuration values for each provider
+ */
+export const DEFAULT_CONFIGS: Record<Provider, Omit<ModelConfig, "apiKey">> = {
+  openai: {
+    provider: "openai",
+    model: "gpt-4o",
+    temperature: 0.0,
+    maxTokens: 4096,
+  },
+  anthropic: {
+    provider: "anthropic",
+    model: "claude-3-7-sonnet-20250219", // Updated to latest model
+    temperature: 0.0,
+    maxTokens: 4096,
+  },
+};
+
+/**
+ * Loads model configuration from environment variables with sensible defaults
+ * @returns A validated ModelConfig object
+ */
+export function loadModelConfig(): ModelConfig {
+  // Determine provider from environment variable or default to OpenAI
+  const provider = (process.env.AI_PROVIDER?.toLowerCase() || "openai") as Provider;
+  
+  if (provider !== "openai" && provider !== "anthropic") {
+    throw new ValidationError(`Invalid AI provider: ${provider}. Must be one of: openai, anthropic`)
+      .addContext({
+        supportedProviders: ["openai", "anthropic"],
+        providedValue: provider
+      });
+  }
+  
+  // Get API key based on provider
+  const apiKey = provider === "openai" 
+    ? process.env.OPENAI_API_KEY 
+    : process.env.ANTHROPIC_API_KEY;
+  
+  if (!apiKey) {
+    throw new ValidationError(`Missing API key for ${provider}`)
+      .addContext({
+        provider,
+        requiredEnvVar: provider === "openai" ? "OPENAI_API_KEY" : "ANTHROPIC_API_KEY"
+      });
+  }
+  
+  // Get model from environment variable or use default
+  const modelFromEnv = process.env.AI_MODEL;
+  let model: string;
+  
+  // Validate model based on provider
+  if (modelFromEnv) {
+    if (provider === "openai") {
+      const result = openAIModelSchema.safeParse(modelFromEnv);
+      if (!result.success) {
+        logger.warn(`Invalid OpenAI model: ${modelFromEnv}. Using default: ${DEFAULT_CONFIGS.openai.model}`);
+        model = DEFAULT_CONFIGS.openai.model;
+      } else {
+        model = result.data;
+      }
+    } else {
+      const result = anthropicModelSchema.safeParse(modelFromEnv);
+      if (!result.success) {
+        logger.warn(`Invalid Anthropic model: ${modelFromEnv}. Using default: ${DEFAULT_CONFIGS.anthropic.model}`);
+        model = DEFAULT_CONFIGS.anthropic.model;
+      } else {
+        model = result.data;
+      }
+    }
+  } else {
+    // Use default model for the provider
+    model = DEFAULT_CONFIGS[provider].model;
+  }
+  
+  // Parse temperature if provided
+  const temperatureFromEnv = process.env.AI_TEMPERATURE 
+    ? parseFloat(process.env.AI_TEMPERATURE) 
+    : undefined;
+  
+  const temperature = temperatureFromEnv !== undefined
+    ? Math.max(0, Math.min(1, temperatureFromEnv)) // Clamp between 0 and 1
+    : DEFAULT_CONFIGS[provider].temperature;
+  
+  // Parse max tokens if provided
+  const maxTokensFromEnv = process.env.AI_MAX_TOKENS 
+    ? parseInt(process.env.AI_MAX_TOKENS, 10) 
+    : undefined;
+  
+  const maxTokens = maxTokensFromEnv !== undefined && !isNaN(maxTokensFromEnv)
+    ? maxTokensFromEnv
+    : DEFAULT_CONFIGS[provider].maxTokens;
+  
+  return {
+    provider,
+    model,
+    apiKey,
+    temperature,
+    maxTokens,
+  };
+}
\ No newline at end of file
diff --git a/x/henry/mp-sandbox-agent/utils/errors.ts b/x/henry/mp-sandbox-agent/utils/errors.ts
new file mode 100644
index 000000000000..323d84733607
--- /dev/null
+++ b/x/henry/mp-sandbox-agent/utils/errors.ts
@@ -0,0 +1,214 @@
+/**
+ * Custom error classes and error handling utilities for the MicroPython Sandbox Agent.
+ * Provides consistent error handling with proper context information.
+ */
+
+/**
+ * Base error class for the application
+ * Contains context information to help with debugging
+ */
+export class AppError extends Error {
+  /** Error code for categorizing errors */
+  public code: string;
+  
+  /** Additional context about the error */
+  public context: Record<string, unknown>;
+  
+  /** Original cause of the error, if it was wrapped */
+  public cause?: Error;
+  
+  /** Whether the error has been handled */
+  public handled: boolean = false;
+  
+  constructor(message: string, options?: ErrorOptions) {
+    super(message, options);
+    this.name = this.constructor.name;
+    this.code = 'APP_ERROR';
+    this.context = {};
+    
+    // Capture the stack trace
+    if (Error.captureStackTrace) {
+      Error.captureStackTrace(this, this.constructor);
+    }
+    
+    // Extract cause if provided
+    if (options?.cause instanceof Error) {
+      this.cause = options.cause;
+    }
+  }
+  
+  /**
+   * Add additional context to the error
+   */
+  addContext(context: Record<string, unknown>): this {
+    this.context = { ...this.context, ...context };
+    return this;
+  }
+  
+  /**
+   * Mark the error as handled to prevent duplicate logging
+   */
+  markHandled(): this {
+    this.handled = true;
+    return this;
+  }
+  
+  /**
+   * Get a structured representation of the error for logging
+   */
+  toJSON(): Record<string, unknown> {
+    return {
+      name: this.name,
+      message: this.message,
+      code: this.code,
+      context: this.context,
+      stack: this.stack,
+      cause: this.cause ? (
+        this.cause instanceof AppError 
+          ? this.cause.toJSON() 
+          : {
+              name: this.cause.name,
+              message: this.cause.message,
+              stack: this.cause.stack
+            }
+      ) : undefined
+    };
+  }
+}
+
+/**
+ * Error class for validation errors
+ */
+export class ValidationError extends AppError {
+  constructor(message: string, options?: ErrorOptions) {
+    super(message, options);
+    this.code = 'VALIDATION_ERROR';
+  }
+}
+
+/**
+ * Error class for configuration errors
+ */
+export class ConfigurationError extends AppError {
+  constructor(message: string, options?: ErrorOptions) {
+    super(message, options);
+    this.code = 'CONFIG_ERROR';
+  }
+}
+
+/**
+ * Error class for external API errors
+ */
+export class APIError extends AppError {
+  public statusCode?: number;
+  
+  constructor(message: string, statusCode?: number, options?: ErrorOptions) {
+    super(message, options);
+    this.code = 'API_ERROR';
+    this.statusCode = statusCode;
+  }
+  
+  toJSON(): Record<string, unknown> {
+    return {
+      ...super.toJSON(),
+      statusCode: this.statusCode
+    };
+  }
+}
+
+/**
+ * Error class for sandbox execution errors
+ */
+export class SandboxError extends AppError {
+  public stdout: string;
+  public stderr: string;
+  
+  constructor(message: string, stdout: string = '', stderr: string = '', options?: ErrorOptions) {
+    super(message, options);
+    this.code = 'SANDBOX_ERROR';
+    this.stdout = stdout;
+    this.stderr = stderr;
+  }
+  
+  toJSON(): Record<string, unknown> {
+    return {
+      ...super.toJSON(),
+      stdout: this.stdout,
+      stderr: this.stderr
+    };
+  }
+}
+
+/**
+ * Error class for tool execution errors
+ */
+export class ToolError extends AppError {
+  public toolName: string;
+  
+  constructor(toolName: string, message: string, options?: ErrorOptions) {
+    super(`Error in tool '${toolName}': ${message}`, options);
+    this.code = 'TOOL_ERROR';
+    this.toolName = toolName;
+  }
+  
+  toJSON(): Record<string, unknown> {
+    return {
+      ...super.toJSON(),
+      toolName: this.toolName
+    };
+  }
+}
+
+/**
+ * Wraps an unknown error in an AppError for consistent handling
+ */
+export function wrapError(error: unknown, defaultMessage = 'An unexpected error occurred'): AppError {
+  if (error instanceof AppError) {
+    return error;
+  }
+  
+  if (error instanceof Error) {
+    return new AppError(error.message, { cause: error });
+  }
+  
+  if (typeof error === 'string') {
+    return new AppError(error);
+  }
+  
+  return new AppError(defaultMessage).addContext({ originalError: error });
+}
+
+/**
+ * Creates a validation error with the provided field information
+ */
+export function createValidationError(
+  message: string, 
+  fieldName: string, 
+  value?: unknown
+): ValidationError {
+  return new ValidationError(message).addContext({
+    field: fieldName,
+    invalidValue: value
+  });
+}
+
+/**
+ * Creates a configuration error with the provided config information
+ */
+export function createConfigError(
+  message: string,
+  configKey: string,
+  expectedValue?: string
+): ConfigurationError {
+  return new ConfigurationError(message).addContext({
+    configKey,
+    expectedValue
+  });
+}
+
+/**
+ * Type guard to check if an error is an instance of AppError
+ */
+export function isAppError(error: unknown): error is AppError {
+  return error instanceof AppError;
+}
\ No newline at end of file
diff --git a/x/henry/mp-sandbox-agent/utils/logger.ts b/x/henry/mp-sandbox-agent/utils/logger.ts
new file mode 100644
index 000000000000..78ced5156b7c
--- /dev/null
+++ b/x/henry/mp-sandbox-agent/utils/logger.ts
@@ -0,0 +1,259 @@
+/**
+ * A configurable logging system for the MicroPython Sandbox Agent.
+ * Supports different log levels and output formats.
+ */
+import { AppError, isAppError } from "./errors";
+
+export enum LogLevel {
+  ERROR = 0,
+  WARN = 1,
+  INFO = 2,
+  DEBUG = 3,
+  TRACE = 4,
+}
+
+export interface LoggerOptions {
+  /** Minimum level to log */
+  level: LogLevel;
+  /** Whether to include timestamps in logs */
+  timestamps?: boolean;
+  /** Whether to include log level in logs */
+  showLevel?: boolean;
+  /** Custom output function (defaults to console) */
+  outputFn: (message: string, level: LogLevel) => void;
+}
+
+export class Logger {
+  private options: LoggerOptions;
+  
+  constructor(options: Partial<LoggerOptions> = {}) {
+    this.options = {
+      level: options.level ?? LogLevel.INFO,
+      timestamps: options.timestamps ?? true,
+      showLevel: options.showLevel ?? true,
+      outputFn: options.outputFn ?? this.defaultOutputFn,
+    };
+  }
+  
+  /**
+   * Default output function that logs to the console
+   */
+  private defaultOutputFn(message: string, level: LogLevel): void {
+    switch (level) {
+      case LogLevel.ERROR:
+        console.error(message);
+        break;
+      case LogLevel.WARN:
+        console.warn(message);
+        break;
+      case LogLevel.INFO:
+      case LogLevel.DEBUG:
+      case LogLevel.TRACE:
+      default:
+        console.log(message);
+        break;
+    }
+  }
+  
+  /**
+   * Format a log message based on configuration
+   */
+  private formatMessage(level: LogLevel, message: string): string {
+    const parts: string[] = [];
+    
+    if (this.options.timestamps) {
+      parts.push(`[${new Date().toISOString()}]`);
+    }
+    
+    if (this.options.showLevel) {
+      parts.push(`[${LogLevel[level]}]`);
+    }
+    
+    parts.push(message);
+    return parts.join(' ');
+  }
+  
+  /**
+   * Log a message if the level is enabled
+   * @param level The log level
+   * @param message The message to log
+   * @param args Values to substitute into the message
+   */
+  private log(level: LogLevel, message: string, ...args: unknown[]): void {
+    if (level > this.options.level) return;
+    
+    let formattedMessage = this.formatMessage(level, message);
+    
+    // Handle additional args by replacing %s, %d, etc.
+    if (args.length > 0) {
+      formattedMessage = formattedMessage.replace(/%[sdjifoO%]/g, (match): string => {
+        if (match === '%%') return '%';
+        
+        const value = args.shift();
+        if (value === undefined) return '';
+        
+        // Format based on specifier
+        switch (match) {
+          case '%j':
+          case '%o':
+          case '%O':
+            try {
+              return JSON.stringify(value, null, 2);
+            } catch (err) {
+              return String(value);
+            }
+          case '%d':
+          case '%i':
+            return Number(value).toString();
+          case '%f':
+            return Number(value).toFixed(6);
+          case '%s':
+          default:
+            return String(value);
+        }
+      });
+    }
+    
+    this.options.outputFn(formattedMessage, level);
+  }
+  
+  /**
+   * Log an error message
+   * @param message The message to log
+   * @param args Values to substitute into the message
+   */
+  error(message: string, ...args: unknown[]): void {
+    this.log(LogLevel.ERROR, message, ...args);
+  }
+  
+  /**
+   * Log an error object with full context
+   * @param error The error object to log
+   * @param message Optional message to display before the error
+   */
+  logError(error: unknown, message?: string): void {
+    if (this.options.level < LogLevel.ERROR) return;
+    
+    if (isAppError(error)) {
+      // Already an AppError with context
+      if (message) {
+        this.error(message);
+      }
+      
+      // Log the structured error
+      const errorObj = error.toJSON();
+      this.options.outputFn(
+        this.formatMessage(LogLevel.ERROR, `[${errorObj.code}] ${errorObj.message}`),
+        LogLevel.ERROR
+      );
+      
+      // Log context if present
+      if (errorObj.context && typeof errorObj.context === 'object' && Object.keys(errorObj.context).length > 0) {
+        this.options.outputFn(
+          this.formatMessage(LogLevel.ERROR, `Context: ${JSON.stringify(errorObj.context, null, 2)}`),
+          LogLevel.ERROR
+        );
+      }
+      
+      // Log stack trace at debug level
+      if (errorObj.stack && this.options.level >= LogLevel.DEBUG) {
+        this.options.outputFn(
+          this.formatMessage(LogLevel.DEBUG, `Stack: ${errorObj.stack}`),
+          LogLevel.DEBUG
+        );
+      }
+      
+      // Log cause if present
+      if (errorObj.cause) {
+        this.options.outputFn(
+          this.formatMessage(LogLevel.ERROR, `Caused by: ${JSON.stringify(errorObj.cause, null, 2)}`),
+          LogLevel.ERROR
+        );
+      }
+    } else if (error instanceof Error) {
+      // Standard Error object
+      this.error(message || error.message);
+      if (this.options.level >= LogLevel.DEBUG && error.stack) {
+        this.debug(`Stack: ${error.stack}`);
+      }
+    } else {
+      // Unknown error type
+      this.error(message || 'Unknown error occurred');
+      this.debug(`Error details: ${JSON.stringify(error, null, 2)}`);
+    }
+  }
+  
+  /**
+   * Log a warning message
+   * @param message The message to log
+   * @param args Values to substitute into the message
+   */
+  warn(message: string, ...args: unknown[]): void {
+    this.log(LogLevel.WARN, message, ...args);
+  }
+  
+  /**
+   * Log an info message
+   * @param message The message to log
+   * @param args Values to substitute into the message
+   */
+  info(message: string, ...args: unknown[]): void {
+    this.log(LogLevel.INFO, message, ...args);
+  }
+  
+  /**
+   * Log a debug message
+   * @param message The message to log
+   * @param args Values to substitute into the message
+   */
+  debug(message: string, ...args: unknown[]): void {
+    this.log(LogLevel.DEBUG, message, ...args);
+  }
+  
+  /**
+   * Log a trace message (most verbose)
+   * @param message The message to log
+   * @param args Values to substitute into the message
+   */
+  trace(message: string, ...args: unknown[]): void {
+    this.log(LogLevel.TRACE, message, ...args);
+  }
+  
+  /**
+   * Create a separator line for visual grouping in logs
+   */
+  separator(): void {
+    this.info("--------------------------------");
+  }
+  
+  /**
+   * Change the log level
+   */
+  setLevel(level: LogLevel): void {
+    this.options.level = level;
+  }
+  
+  /**
+   * Get the current log level
+   */
+  getLevel(): LogLevel {
+    return this.options.level;
+  }
+  
+  /**
+   * Enable or disable timestamps
+   */
+  setTimestamps(enabled: boolean): void {
+    this.options.timestamps = enabled;
+  }
+  
+  /**
+   * Enable or disable showing log level
+   */
+  setShowLevel(enabled: boolean): void {
+    this.options.showLevel = enabled;
+  }
+}
+
+// Create a default logger instance for easy imports
+export const logger = new Logger();
\ No newline at end of file