From 1eeefa111d4c5ec258acc13fdf48c8b349ee1dc1 Mon Sep 17 00:00:00 2001
From: Henry Fontanier <henry@dust.tt>
Date: Tue, 18 Feb 2025 01:50:12 +0100
Subject: [PATCH] wip

---
 connectors/henry.ts                           |   5 +
 x/henry/mp-sandbox-agent/agent.ts             | 116 --------
 .../mp-sandbox-agent/agent/helpers.test.ts    | 109 ++++++++
 x/henry/mp-sandbox-agent/agent/helpers.ts     |  71 +++++
 x/henry/mp-sandbox-agent/agent/index.ts       | 253 ++++++++++++++++++
 x/henry/mp-sandbox-agent/main.ts              |  17 +-
 x/henry/mp-sandbox-agent/sandbox.test.ts      |  18 +-
 x/henry/mp-sandbox-agent/sandbox.ts           |  61 ++++-
 .../mp-sandbox-agent/tools/fetch_weather.ts   |  40 +--
 x/henry/mp-sandbox-agent/tools/helpers.ts     |   8 +-
 x/henry/mp-sandbox-agent/tools/types.ts       |  28 +-
 11 files changed, 561 insertions(+), 165 deletions(-)
 create mode 100644 connectors/henry.ts
 delete mode 100644 x/henry/mp-sandbox-agent/agent.ts
 create mode 100644 x/henry/mp-sandbox-agent/agent/helpers.test.ts
 create mode 100644 x/henry/mp-sandbox-agent/agent/helpers.ts
 create mode 100644 x/henry/mp-sandbox-agent/agent/index.ts
diff --git a/connectors/henry.ts b/connectors/henry.ts
new file mode 100644
index 0000000000000..5cb33c10e8821
--- /dev/null
+++ b/connectors/henry.ts
@@ -0,0 +1,5 @@
+async function main() {
+  //
+}
+
+main().catch(console.error);
diff --git a/x/henry/mp-sandbox-agent/agent.ts b/x/henry/mp-sandbox-agent/agent.ts
deleted file mode 100644
index 77aa04245a11b..0000000000000
--- a/x/henry/mp-sandbox-agent/agent.ts
+++ /dev/null
@@ -1,116 +0,0 @@
-import OpenAI from "openai";
-import { PythonSandbox } from "./sandbox";
-import type { Tool } from "./tools/types";
-import { z } from "zod";
-
-function generateFunctionDocs(functions: Record<string, Tool>): string {
-  let docs = "Available functions:\n";
-
-  for (const [fnName, { description, input, output }] of Object.entries(
-    functions
-  )) {
-    // Function signature with description
-    const inputObject = input as z.ZodObject<any>;
-    const outputObject = output as z.ZodObject<any>;
-
-    docs += `- ${fnName}(${Object.keys(inputObject.shape).join(
-      ", "
-    )}): async function that ${description}\n`;
-
-    // Input parameters
-    docs += "  Parameters:\n";
-    for (const [paramName, paramSchema] of Object.entries(inputObject.shape)) {
-      const zodSchema = paramSchema as z.ZodType;
-      docs += `  * ${paramName} (${zodSchema.description || "any"})\n`;
-    }
-
-    // Output fields
-    docs += "  Returns an object with fields:\n";
-    for (const [fieldName, fieldSchema] of Object.entries(outputObject.shape)) {
-      const zodSchema = fieldSchema as z.ZodType;
-      docs += `  * ${fieldName} (${zodSchema.description || "any"})\n`;
-    }
-  }
-
-  return docs;
-}
-
-export class Agent {
-  private sandbox!: PythonSandbox;
-  private openai: OpenAI;
-  private exposedTools: Set<string> = new Set();
-  private goal: string;
-
-  private constructor(goal: string, apiKey: string) {
-    this.goal = goal;
-    this.openai = new OpenAI({ apiKey });
-  }
-
-  static async create(goal: string, apiKey: string): Promise<Agent> {
-    const agent = new Agent(goal, apiKey);
-    agent.sandbox = await PythonSandbox.create();
-    return agent;
-  }
-
-  private generateSystemPrompt(tools: Record<string, Tool>): string {
-    return (
-      "You are a Python code generator working towards the following goal:\n" +
-      this.goal +
-      "\n\n" +
-      "Your response should follow this format:\n\n" +
-      "1. (Optional) A brief explanation of what the code will do and why, in plain text\n" +
-      "2. A Python code block that:\n" +
-      "   - Contains no imports\n" +
-      "   - Contains only top-level statements (no function definitions)\n" +
-      "   - Can use await expressions directly (top-level await is supported)\n" +
-      "   - Contains no comments\n" +
-      "   - Is simple and self-contained\n\n" +
-      generateFunctionDocs(tools) +
-      "\n" +
-      "Example response format:\n" +
-      "This code will fetch and display the current weather in London.\n\n" +
-      "```python\n" +
-      "weather = await fetch_weather('London')\n" +
-      'print(f\'Weather in {weather["city"]}: {weather["temperature"]}°C\')\n' +
-      "```"
-    );
-  }
-
-  async step(
-    tools: Record<string, Tool>,
-    input: string
-  ): Promise<{ stdout: string; stderr: string }> {
-    // Expose or update tools
-    for (const [name, tool] of Object.entries(tools)) {
-      this.sandbox.expose(name, tool);
-      this.exposedTools.add(name);
-    }
-
-    const response = await this.openai.chat.completions.create({
-      model: "o3-mini",
-      messages: [
-        {
-          role: "system",
-          content: this.generateSystemPrompt(tools),
-        },
-        {
-          role: "user",
-          content: input,
-        },
-      ],
-    });
-
-    if (!response.choices[0].message.content) {
-      throw new Error("No code generated from OpenAI");
-    }
-
-    // Extract code from the response
-    const content = response.choices[0].message.content;
-    const codeMatch = content.match(/```python\n([\s\S]*?)```/) ||
-      content.match(/```\n([\s\S]*?)```/) || [null, content];
-    const code = codeMatch[1].trim();
-
-    // Execute the code
-    return await this.sandbox.runCode(code);
-  }
-}
diff --git a/x/henry/mp-sandbox-agent/agent/helpers.test.ts b/x/henry/mp-sandbox-agent/agent/helpers.test.ts
new file mode 100644
index 0000000000000..d3dfca54b1046
--- /dev/null
+++ b/x/henry/mp-sandbox-agent/agent/helpers.test.ts
@@ -0,0 +1,109 @@
+import { describe, expect, it } from "bun:test";
+import { z } from "zod";
+import { generateToolDocs } from "./helpers";
+import { defineTool } from "../tools/helpers";
+import { ToolOutput } from "../tools/types";
+
+describe("generateToolDocs", () => {
+  it("should generate docs for a simple tool", () => {
+    const simpleTool = defineTool(
+      "A simple test function",
+      z.object({
+        name: z.string().describe("The name parameter"),
+      }),
+      z.string().describe("The return value"),
+      async () => ({ type: "success", result: "test" })
+    );
+
+    const docs = generateToolDocs({ simpleTool });
+    expect(docs).toContain("Available functions:");
+    expect(docs).toContain(
+      "All functions may return None if they fail (check for None before accessing the result)."
+    );
+    expect(docs).toContain(
+      "simpleTool(name): async function - A simple test function"
+    );
+    expect(docs).toContain("* name: The name parameter");
+    expect(docs).toContain("Returns:");
+    expect(docs).toContain("The return value");
+  });
+
+  it("should generate docs for a tool with complex types", () => {
+    const complexTool = defineTool(
+      "A complex test function",
+      z.object({
+        user: z
+          .object({
+            name: z.string().describe("User's name"),
+            age: z.number().describe("User's age"),
+          })
+          .describe("User object"),
+        options: z.array(z.string()).describe("List of options"),
+      }),
+      z.object({
+        id: z.number().describe("User ID"),
+        settings: z
+          .array(
+            z.object({
+              key: z.string().describe("Setting key"),
+              value: z.string().describe("Setting value"),
+            })
+          )
+          .describe("User settings"),
+      }),
+      async () => ({ type: "success", result: { id: 1, settings: [] } })
+    );
+
+    const docs = generateToolDocs({ complexTool });
+    expect(docs).toContain(
+      "complexTool(user, options): async function - A complex test function"
+    );
+    expect(docs).toContain("* user: dictionary with keys:");
+    expect(docs).toContain("  * name: User's name");
+    expect(docs).toContain("  * age: User's age");
+    expect(docs).toContain("* options: array of string");
+    expect(docs).toContain("Returns:");
+    expect(docs).toContain("dictionary with keys:");
+    expect(docs).toContain("* id: User ID");
+    expect(docs).toContain("* settings: array of dictionary with keys:");
+    expect(docs).toContain("  * key: Setting key");
+    expect(docs).toContain("  * value: Setting value");
+  });
+
+  it("should handle multiple tools", () => {
+    const tool1 = defineTool(
+      "First tool",
+      z.object({ a: z.string() }),
+      z.number(),
+      async () => ({ type: "success", result: 1 })
+    );
+
+    const tool2 = defineTool(
+      "Second tool",
+      z.object({ b: z.boolean() }),
+      z.string(),
+      async () => ({ type: "success", result: "test" })
+    );
+
+    const docs = generateToolDocs({ tool1, tool2 });
+    expect(docs).toContain("tool1(a): async function - First tool");
+    expect(docs).toContain("tool2(b): async function - Second tool");
+  });
+
+  it("should handle tools with nested output types", () => {
+    const outputTool = defineTool(
+      "Output test function",
+      z.object({ input: z.string() }),
+      z.string(),
+      async () => ({ type: "success", result: "test" })
+    );
+
+    const docs = generateToolDocs({ outputTool });
+    // Should only show the success case type
+    expect(docs).toContain("Returns:");
+    expect(docs).toContain("string");
+    // Should not show the discriminated union structure
+    expect(docs).not.toContain("type:");
+    expect(docs).not.toContain("result:");
+  });
+});
diff --git a/x/henry/mp-sandbox-agent/agent/helpers.ts b/x/henry/mp-sandbox-agent/agent/helpers.ts
new file mode 100644
index 0000000000000..13e30b3ba87be
--- /dev/null
+++ b/x/henry/mp-sandbox-agent/agent/helpers.ts
@@ -0,0 +1,71 @@
+import type { Tool } from "../tools/types";
+import { z } from "zod";
+
+function describeZodType(schema: z.ZodType, indent: string = ""): string {
+  if (schema instanceof z.ZodArray) {
+    return `array of ${describeZodType(schema.element, indent + "  ")}`;
+  } else if (schema instanceof z.ZodObject) {
+    let desc = "dictionary with keys:\n";
+    for (const [fieldName, fieldSchema] of Object.entries(schema.shape)) {
+      desc += `${indent}  * ${fieldName}: ${describeZodType(
+        fieldSchema as z.ZodType,
+        indent + "  "
+      )
+        .split("\n")
+        .join("\n" + indent)}\n`;
+    }
+    return desc;
+  } else if (schema instanceof z.ZodUnion && schema.options.length === 2) {
+    // Check if this is a ToolOutput schema
+    const successCase = schema.options.find(
+      (opt: z.ZodType) =>
+        opt instanceof z.ZodObject && opt.shape.type?.value === "success"
+    ) as z.ZodObject<any> | undefined;
+
+    if (successCase?.shape.result) {
+      return describeZodType(successCase.shape.result, indent);
+    }
+    // If we can't handle this union type, just describe it as a union
+    return `union of ${schema.options
+      .map((opt: z.ZodType) => describeZodType(opt, indent + "  "))
+      .join(" | ")}`;
+  } else {
+    return (
+      schema.description ||
+      schema.constructor.name.replace("Zod", "").toLowerCase() ||
+      "any"
+    );
+  }
+}
+
+export function generateToolDocs(tools: Record<string, Tool>): string {
+  let docs = "Available functions:\n";
+  docs +=
+    "Note: All functions may return None if they fail (check for None before accessing the result).\n\n";
+
+  for (const [fnName, { description, input, output }] of Object.entries(
+    tools
+  )) {
+    // Function signature with description
+    const inputObject = input as z.ZodObject<any>;
+
+    docs += `- ${fnName}(${Object.keys(inputObject.shape).join(
+      ", "
+    )}): async function - ${description}\n`;
+
+    // Input parameters
+    docs += "  Parameters:\n";
+    for (const [paramName, paramSchema] of Object.entries(inputObject.shape)) {
+      docs += `  * ${paramName}: ${describeZodType(
+        paramSchema as z.ZodType,
+        "  "
+      )}\n`;
+    }
+
+    // Output fields
+    docs += "  Returns:\n";
+    docs += describeZodType(output, "  ");
+  }
+
+  return docs;
+}
diff --git a/x/henry/mp-sandbox-agent/agent/index.ts b/x/henry/mp-sandbox-agent/agent/index.ts
new file mode 100644
index 0000000000000..c1a4e72278a9e
--- /dev/null
+++ b/x/henry/mp-sandbox-agent/agent/index.ts
@@ -0,0 +1,253 @@
+import OpenAI from "openai";
+import { PythonSandbox } from "../sandbox";
+import type { Tool } from "../tools/types";
+import { generateToolDocs } from "./helpers";
+import { z } from "zod";
+import { type ChatCompletionMessageParam } from "openai/resources/chat/completions";
+import { defineTool } from "../tools/helpers";
+
+type StepResult = {
+  generation: string;
+  codeOutput: string;
+};
+
+function codeOutputToMessage(
+  codeOutput: string,
+  shouldContinue: boolean
+): ChatCompletionMessageParam {
+  let content =
+    `Here is the output of the code you generated:\n\n` + `${codeOutput}\n\n`;
+
+  if (shouldContinue) {
+    content += "Please continue generating code.\n";
+  }
+
+  return {
+    role: "user",
+    content: content,
+  };
+}
+
+function stepResultToMessages(
+  stepResult: StepResult
+): ChatCompletionMessageParam[] {
+  return [
+    {
+      role: "assistant",
+      content: stepResult.generation,
+    },
+    codeOutputToMessage(stepResult.codeOutput, true),
+  ];
+}
+
+export class Agent {
+  private sandbox!: PythonSandbox;
+  private openai: OpenAI;
+  private exposedTools: Set<string> = new Set();
+  private goal: string;
+  private steps: Array<StepResult> = [];
+
+  private constructor(goal: string, apiKey: string) {
+    this.goal = goal;
+    this.openai = new OpenAI({ apiKey });
+  }
+
+  static async create(goal: string, apiKey: string): Promise<Agent> {
+    console.log("--------------------------------");
+    console.log(`Creating agent with goal: ${goal}`);
+    console.log("--------------------------------");
+    const agent = new Agent(goal, apiKey);
+    agent.sandbox = await PythonSandbox.create();
+    return agent;
+  }
+
+  private generateSystemPrompt(tools: Record<string, Tool>): string {
+    return (
+      "You are an AI agent that uses Python code to work towards a goal that has been provided by the user.\n" +
+      "You can use as many steps as you need to achieve the goal. You may run a new piece of code at each step.\n" +
+      "Your response should follow this format:\n\n" +
+      "1. An analysis of the situation, in plain text. Explain what you need to do next to achieve the goal.\n" +
+      "2. A Python code block that:\n" +
+      "   - Contains no imports\n" +
+      "   - Contains only top-level statements (no function definitions)\n" +
+      "   - Can use await expressions directly (top-level await is supported)\n" +
+      "   - Contains no comments\n" +
+      "   - Is simple and self-contained\n\n" +
+      generateToolDocs(tools) +
+      "\n" +
+      "You will then be provided with the standard output, standard error and error logs from the code you generate.\n" +
+      "The user will not see the output of your code. " +
+      "Once you code logs contain enough information to provide a final answer to the user, you must use the `stop_execution` tool. " +
+      "You must always exactly adhere to this format. There must ALWAYS be a Python code block in your message. " +
+      "If you don't have any code to provide, just provide a code block that calls the `stop_execution` tool.\n\n" +
+      "Never use the `stop_execution` tool in your first Python code block, as you will not have any guarantee that the execution logs " +
+      "contain enough information to provide a final answer to the user (it could fail).\n" +
+      "You will then be asked to provide a final answer to the user based on the execution logs you have.\n" +
+      "Example response format:\n" +
+      "I need to use the fetch_weather tool to get the weather in London. I will then print the relevant information.\n\n" +
+      "```python\n" +
+      "weather = await fetch_weather('London')\n" +
+      'print(f\'Weather in {weather["city"]}: {weather["temperature"]}°C\')\n' +
+      "```"
+    );
+  }
+
+  async step(_tools: Record<string, Tool>): Promise<string | null> {
+    const tools = { ..._tools };
+    if (Object.keys(tools).some((name) => name === "stop_execution")) {
+      throw new Error("`stop_execution` is a reserved tool name.");
+    }
+
+    let shouldContinue = true;
+    const finalExecutionTool: Tool = defineTool(
+      "Must be used when the execution logs contain enough information to provide a final answer to the user." +
+        "After using this tool, the user will ask you to write a final answer based on your execution logs.",
+      z.object({}),
+      z.null(),
+      async () => {
+        shouldContinue = false;
+        return { type: "success", result: null };
+      }
+    );
+
+    // if (!!this.steps.length) {
+    tools["stop_execution"] = finalExecutionTool;
+    // }
+
+    // Expose or update tools
+    const errors: Array<{ tool: string; error: string }> = [];
+    console.log("--------------------------------");
+    for (const [name, tool] of Object.entries(tools)) {
+      console.log(`Exposing tool: ${name}`);
+      this.sandbox.expose(name, {
+        ...tool,
+        fn: async (input: Tool["input"]) => {
+          const result = await tool.fn(input);
+          if (result.type === "success") {
+            return result.result;
+          }
+          errors.push({ tool: name, error: result.error });
+          return null;
+        },
+      });
+      this.exposedTools.add(name);
+    }
+    console.log("--------------------------------");
+    const systemPrompt = this.generateSystemPrompt(tools);
+
+    console.log("--------------------------------");
+    console.log("System prompt:");
+    console.log(systemPrompt);
+    console.log("--------------------------------");
+
+    const messages: ChatCompletionMessageParam[] = [
+      {
+        role: "system",
+        content: systemPrompt,
+      },
+      {
+        role: "user",
+        content: this.goal,
+      },
+    ];
+
+    for (const step of this.steps) {
+      messages.push(...stepResultToMessages(step));
+    }
+
+    console.log("--------------------------------");
+    console.log("Messages:");
+    console.log(messages);
+    console.log("--------------------------------");
+
+    const response = await this.openai.chat.completions.create({
+      model: "gpt-4o",
+      messages,
+    });
+
+    if (!response.choices[0].message.content) {
+      throw new Error("No code generated from OpenAI");
+    }
+
+    // Extract code from the response
+    const content = response.choices[0].message.content;
+    console.log("--------------------------------");
+    console.log("Code generation response:");
+    console.log(content);
+    console.log("--------------------------------");
+
+    const codeMatch = content.match(/```python\n([\s\S]*?)```/) ||
+      content.match(/```\n([\s\S]*?)```/) || [null, content];
+    const code = codeMatch[1].trim();
+
+    // Execute the code
+    const codeOutput = await (async () => {
+      try {
+        const codeOutput = await this.sandbox.runCode(code);
+        let output = "";
+        if (codeOutput.stdout) {
+          output += `STDOUT:\n${codeOutput.stdout}\n\n`;
+        }
+        if (codeOutput.stderr) {
+          output += `STDERR:\n${codeOutput.stderr}\n\n`;
+        }
+        if (errors.length > 0) {
+          output += `ERRORS:\n${errors
+            .map((e) => `* ${e.tool}: ${e.error}`)
+            .join("\n")}\n\n`;
+        }
+
+        if (!output) {
+          return "No output returned from the code.";
+        }
+
+        return output;
+      } catch (error) {
+        return `STDERR:\n${error}`;
+      }
+    })();
+
+    console.log("--------------------------------");
+    console.log("Code output:");
+    console.log(codeOutput);
+    console.log("--------------------------------");
+
+    messages.push({
+      role: "assistant",
+      content: content,
+    });
+
+    if (!shouldContinue) {
+      messages.push({
+        role: "user",
+        content:
+          "Please provide a comprehensive final answer to the goal based on the execution logs you have.",
+      });
+      const finalResponse = await this.openai.chat.completions.create({
+        model: "gpt-4o",
+        messages,
+      });
+      return finalResponse.choices[0].message.content;
+    }
+
+    messages.push(codeOutputToMessage(codeOutput, shouldContinue));
+
+    const stepResult: StepResult = {
+      generation: content,
+      codeOutput: codeOutput,
+    };
+
+    console.log("--------------------------------");
+    console.log("Step result:");
+    console.log(stepResult);
+    console.log("--------------------------------");
+
+    this.steps.push(stepResult);
+
+    return null;
+  }
+
+  getSteps(): Array<StepResult> {
+    return this.steps;
+  }
+}
diff --git a/x/henry/mp-sandbox-agent/main.ts b/x/henry/mp-sandbox-agent/main.ts
index 7ec53f237f59a..546f568c85283 100644
--- a/x/henry/mp-sandbox-agent/main.ts
+++ b/x/henry/mp-sandbox-agent/main.ts
@@ -2,6 +2,9 @@
 import * as dotenv from "dotenv";
 import { fetchWeather } from "./tools/fetch_weather";
 import { Agent } from "./agent";
+import type { Tool } from "./tools/types";
+import { defineTool } from "./tools/helpers";
+import { z } from "zod";
 
 // Load environment variables from .env file
 dotenv.config();
@@ -23,10 +26,7 @@ async function main() {
   }
 
   // Initialize agent with a goal
-  const agent = await Agent.create(
-    "Help users get weather information for cities around the world",
-    apiKey as string
-  );
+  const agent = await Agent.create(request, apiKey as string);
 
   // Define available tools
   const tools = {
@@ -34,11 +34,12 @@ async function main() {
   };
 
   // Run a step with the user's request
-  const { stdout, stderr } = await agent.step(tools, request);
+  let answer: string | null = null;
+  while (answer === null) {
+    answer = await agent.step(tools);
+  }
 
-  // Output results
-  if (stdout) console.log("\nOutput:", stdout);
-  if (stderr) console.log("\nErrors:", stderr);
+  console.log(answer);
 }
 
 main().catch((error) => {
diff --git a/x/henry/mp-sandbox-agent/sandbox.test.ts b/x/henry/mp-sandbox-agent/sandbox.test.ts
index 2e88f2d060419..c5097ed85c22e 100644
--- a/x/henry/mp-sandbox-agent/sandbox.test.ts
+++ b/x/henry/mp-sandbox-agent/sandbox.test.ts
@@ -13,13 +13,13 @@ describe("PythonSandbox", () => {
   test("should support importing and calling exposed functions", async () => {
     const sandbox = await PythonSandbox.create("test");
     sandbox.expose("fake_function", {
-      fn: () => "Hello, World!",
+      fn: async () => "Hello, World!",
       input: z.object({}),
       output: z.string(),
       description: "A fake function that returns a string",
     });
     const { stdout, stderr } = await sandbox.runCode(
-      "from test import fake_function\nprint(fake_function())"
+      "from test import fake_function\nprint(await fake_function())"
     );
     expect(stdout).toBe("Hello, World!\n");
     expect(stderr).toBe("");
@@ -28,13 +28,13 @@ describe("PythonSandbox", () => {
   test("should support importing and calling exposed functions with arguments", async () => {
     const sandbox = await PythonSandbox.create("test");
     sandbox.expose("add", {
-      fn: ({ a, b }: { a: number; b: number }) => a + b,
+      fn: async ({ a, b }: { a: number; b: number }) => a + b,
       input: z.object({ a: z.number(), b: z.number() }),
       output: z.number(),
       description: "Adds two numbers",
     });
     const { stdout, stderr } = await sandbox.runCode(
-      "from test import add\nprint(add(1, 2))"
+      "from test import add\nprint(await add(1, 2))"
     );
     expect(stdout).toBe("3\n");
     expect(stderr).toBe("");
@@ -43,13 +43,13 @@ describe("PythonSandbox", () => {
   test("should support importing and calling exposed functions with positional arguments", async () => {
     const sandbox = await PythonSandbox.create("test");
     sandbox.expose("sub", {
-      fn: ({ b, a }: { a: number; b: number }) => b - a,
+      fn: async ({ b, a }: { a: number; b: number }) => b - a,
       input: z.object({ b: z.number(), a: z.number() }),
       output: z.number(),
       description: "Subtracts two numbers",
     });
     const { stdout, stderr } = await sandbox.runCode(
-      "from test import sub\nprint(sub(1, 2))"
+      "from test import sub\nprint(await sub(1, 2))"
     );
     expect(stdout).toBe("-1\n");
     expect(stderr).toBe("");
@@ -58,13 +58,13 @@ describe("PythonSandbox", () => {
   test("should support importing and calling exposed functions with keyword arguments", async () => {
     const sandbox = await PythonSandbox.create("test");
     sandbox.expose("multiply", {
-      fn: ({ a, b }: { a: number; b: number }) => a * b,
+      fn: async ({ a, b }: { a: number; b: number }) => a * b,
       input: z.object({ a: z.number(), b: z.number() }),
       output: z.number(),
       description: "Multiplies two numbers",
     });
     const { stdout, stderr } = await sandbox.runCode(
-      "from test import multiply\nprint(multiply(a=1, b=2))"
+      "from test import multiply\nprint(await multiply(a=1, b=2))"
     );
     expect(stdout).toBe("2\n");
     expect(stderr).toBe("");
@@ -101,7 +101,7 @@ describe("PythonSandbox", () => {
     } catch (error) {
       expect(error).toBeInstanceOf(Error);
       expect((error as Error).message).toBe(
-        'Traceback (most recent call last):\n  File "<stdin>", line 2, in <module>\nException: This is a test error\n'
+        'Traceback (most recent call last):\n  File "<stdin>", line 3, in <module>\nException: This is a test error\n'
       );
     }
   });
diff --git a/x/henry/mp-sandbox-agent/sandbox.ts b/x/henry/mp-sandbox-agent/sandbox.ts
index acebd352c5aa3..38009738e96c1 100644
--- a/x/henry/mp-sandbox-agent/sandbox.ts
+++ b/x/henry/mp-sandbox-agent/sandbox.ts
@@ -3,16 +3,23 @@ import {
   type MicroPythonInstance,
 } from "@micropython/micropython-webassembly-pyscript/micropython.mjs";
 import * as z from "zod";
-import type { Tool } from "./tools/types";
 
 export interface CodeExecutionResult {
   result: unknown;
   stdout: string;
 }
 
+type ExposedFunction = {
+  fn: (input: any) => Promise<any>;
+  input: z.ZodType<any>;
+  output: z.ZodType<any>;
+  description: string;
+};
+
 export class PythonSandbox {
   private mp!: MicroPythonInstance;
-  private exposedFunctions: { [key: string]: Tool } = {};
+  private exposedFunctions: { [key: string]: ExposedFunction } = {};
+  private module: Record<string, any> = {};
   private moduleId: string;
   private stdoutBuffer: string[] = [];
   private stderrBuffer: string[] = [];
@@ -54,7 +61,7 @@ export class PythonSandbox {
     return { stdout, stderr };
   }
 
-  expose(name: string, func: Tool) {
+  expose(name: string, func: ExposedFunction) {
     this.exposedFunctions[name] = func;
 
     const wrapper = (...args: unknown[]) => {
@@ -68,12 +75,26 @@ export class PythonSandbox {
               [Object.keys(inputObject.shape)[1]]: args[1],
             }
       );
-      return func.fn(params);
+
+      const r = func.fn(params);
+
+      const maybeParseValue = (value: unknown) =>
+        typeof value === "string" ||
+        typeof value === "number" ||
+        typeof value === "boolean"
+          ? value
+          : JSON.stringify(value);
+
+      if (r instanceof Promise) {
+        return r.then(maybeParseValue);
+      }
+
+      return maybeParseValue(r);
     };
 
     // Create an object to hold our exposed functions
-    const module = { [name]: wrapper };
-    this.mp.registerJsModule(this.moduleId, module);
+    this.module[name] = wrapper;
+    this.mp.registerJsModule(this.moduleId, this.module);
   }
 
   async runCode(code: string): Promise<{ stdout: string; stderr: string }> {
@@ -81,9 +102,31 @@ export class PythonSandbox {
     this.clearBuffers();
 
     // Import exposed functions if any
-    const importCode = Object.keys(this.exposedFunctions)
-      .map((name) => `from ${this.moduleId} import ${name}`)
-      .join("\n");
+    let importCode = "import json\n";
+    // +
+    //   Object.keys(this.exposedFunctions)
+    //     // Automatically json loads the result of the function
+    //     .map(
+    //       (name) =>
+    //         `from ${this.moduleId} import as _${name}; def ${name}(*args, **kwargs): return json.loads(_${name}(*args, **kwargs))`
+    //     )
+    //     .
+    // join("\n");
+
+    for (const name of Object.keys(this.exposedFunctions)) {
+      importCode += `from ${this.moduleId} import ${name} as _${name}\n`;
+      importCode += `
+async def ${name}(*args, **kwargs):
+  r = await _${name}(*args, **kwargs)
+  loaded = json.loads(r)
+  return loaded
+\n`;
+    }
+
+    console.log("--------------------------------");
+    console.log("Import code:");
+    console.log(importCode);
+    console.log("--------------------------------");
 
     try {
       // Run the actual code
diff --git a/x/henry/mp-sandbox-agent/tools/fetch_weather.ts b/x/henry/mp-sandbox-agent/tools/fetch_weather.ts
index 0ef926486b35f..4ebe02ce91dc4 100644
--- a/x/henry/mp-sandbox-agent/tools/fetch_weather.ts
+++ b/x/henry/mp-sandbox-agent/tools/fetch_weather.ts
@@ -1,23 +1,26 @@
 import { defineTool } from "./helpers";
 import { z } from "zod";
+import { ok, err } from "./types";
+
+const WeatherSchema = z.object({
+  city: z.string().describe("Full city name with country"),
+  temperature: z.number().describe("Temperature in °C"),
+  precipitation: z.number().describe("Precipitation in mm"),
+  weathercode: z.number().describe("WMO weather code"),
+  units: z
+    .object({
+      temperature: z.string().describe("Temperature unit (e.g., °C)"),
+      precipitation: z.string().describe("Precipitation unit (e.g., mm)"),
+    })
+    .describe("Measurement units for temperature and precipitation"),
+});
 
 export const fetchWeather = defineTool(
   "Fetches current weather data for the specified city",
   z.object({
     city: z.string().describe("Name of the city to get weather for"),
   }),
-  z.object({
-    city: z.string().describe("Full city name with country"),
-    temperature: z.number().describe("Temperature in °C"),
-    precipitation: z.number().describe("Precipitation in mm"),
-    weathercode: z.number().describe("WMO weather code"),
-    units: z
-      .object({
-        temperature: z.string().describe("Temperature unit (e.g., °C)"),
-        precipitation: z.string().describe("Precipitation unit (e.g., mm)"),
-      })
-      .describe("Measurement units for temperature and precipitation"),
-  }),
+  WeatherSchema,
   async ({ city }) => {
     try {
       // First get coordinates for the city
@@ -29,7 +32,7 @@ export const fetchWeather = defineTool(
       const geocodeData = await geocodeResponse.json();
 
       if (!geocodeData.results?.[0]) {
-        throw new Error(`City "${city}" not found`);
+        return err(`City "${city}" not found`);
       }
 
       const {
@@ -45,7 +48,7 @@ export const fetchWeather = defineTool(
       );
       const data = await response.json();
 
-      return {
+      return ok({
         city: `${foundCity}, ${country}`,
         temperature: data.current.temperature_2m,
         precipitation: data.current.precipitation,
@@ -54,10 +57,13 @@ export const fetchWeather = defineTool(
           temperature: data.current_units.temperature_2m,
           precipitation: data.current_units.precipitation,
         },
-      };
+      });
     } catch (error) {
-      console.error("Error fetching weather:", error);
-      return null;
+      return err(
+        `Error fetching weather: ${
+          error instanceof Error ? error.message : String(error)
+        }`
+      );
     }
   }
 );
diff --git a/x/henry/mp-sandbox-agent/tools/helpers.ts b/x/henry/mp-sandbox-agent/tools/helpers.ts
index 50ceecbe2be5e..69295d97494ee 100644
--- a/x/henry/mp-sandbox-agent/tools/helpers.ts
+++ b/x/henry/mp-sandbox-agent/tools/helpers.ts
@@ -1,16 +1,16 @@
-import type { Tool } from "./types";
+import type { Tool, ToolOutput } from "./types";
 import { z } from "zod";
 
-export function defineTool<I extends z.ZodType<any>, O extends z.ZodType<any>>(
+export function defineTool<I extends z.ZodType, O extends z.ZodType>(
   description: string,
   input: I,
   output: O,
-  implementation: (args: z.infer<I>) => Promise<z.infer<O> | null>
+  fn: (args: z.infer<I>) => Promise<ToolOutput<z.infer<O>>>
 ): Tool {
   return {
     description,
     input,
     output,
-    fn: implementation,
+    fn: fn as Tool["fn"],
   };
 }
diff --git a/x/henry/mp-sandbox-agent/tools/types.ts b/x/henry/mp-sandbox-agent/tools/types.ts
index 6134b43733228..999674b0ed65f 100644
--- a/x/henry/mp-sandbox-agent/tools/types.ts
+++ b/x/henry/mp-sandbox-agent/tools/types.ts
@@ -1,8 +1,32 @@
 import { z } from "zod";
 
 export type Tool = {
-  fn: Function;
+  fn: (input: any) => Promise<ToolOutput<any>>;
   input: z.ZodType<any>;
-  output: z.ZodType<any>;
+  output: z.ZodType<any>; // This represents the success case schema
   description: string;
 };
+
+export const ToolOutput = <T extends z.ZodType>(valueSchema: T) =>
+  z.union([
+    z.object({ type: z.literal("success"), result: valueSchema }),
+    z.object({ type: z.literal("error"), error: z.string() }),
+  ]);
+
+export type ToolOutput<T> = z.infer<
+  ReturnType<typeof ToolOutput<z.ZodType<T>>>
+>;
+
+export function isOk<T>(
+  output: ToolOutput<T>
+): output is { type: "success"; result: T } {
+  return output.type === "success";
+}
+
+export function ok<T>(result: T): ToolOutput<T> {
+  return { type: "success", result };
+}
+
+export function err<T>(error: string): ToolOutput<T> {
+  return { type: "error", error };
+}