feat(agents-api): Add support for claude computer-use (#787)

```python from anthropic import Anthropic from anthropic.types.beta.beta_message import BetaMessage from litellm import ChatCompletionMessageToolCall, Function, Message from litellm.types.utils import Choices, ModelResponse # ... # ... # Check if the model is Anthropic and bash/editor/computer use tool is included if "claude" in agent_model.lower() and any( tool.type in ["bash_20241022", "text_editor_20241022", "computer_20241022"] for tool in agent_tools ): # Retrieve the API key from the environment variable betas = [COMPUTER_USE_BETA_FLAG] # Use Anthropic API directly client = Anthropic(api_key=anthropic_api_key) # Claude Response claude_response: BetaMessage = await client.beta.messages.create( model=agent_model, messages=prompt, tools=formatted_agent_tools, max_tokens=1024, betas=betas, ) # Claude returns [ToolUse | TextBlock] # We need to convert tool_use to tool_calls # And set content = TextBlock.text # But we need to ensure no more than one text block is returned if ( len([block for block in claude_response.content if block.type == "text"]) > 1 ): raise ApplicationError("Claude should only return one message") text_block = next( (block for block in claude_response.content if block.type == "text"), None, ) stop_reason = claude_response.stop_reason if stop_reason == "tool_use": choice = Choices( message=Message( role="assistant", content=text_block.text if text_block else None, tool_calls=[ ChatCompletionMessageToolCall( type="function", function=Function( name=block.name, arguments=block.input, ), ) for block in claude_response.content if block.type == "tool_use" ], ), finish_reason="tool_calls", ) else: assert text_block, "Claude should always return a text block for stop_reason=stop" choice = Choices( message=Message( role="assistant", content=text_block.text, ), finish_reason="stop", ) ```  ---- > [!IMPORTANT] > Adds support for Anthropic Claude model with new tool types, updates models, and fixes a bug in `execute_system.py`. > > - **Behavior**: > - Adds support for Anthropic Claude model with `computer_20241022`, `text_editor_20241022`, and `bash_20241022` tools in `prompt_step.py`. > - Handles tool use in Claude responses, raising an error if multiple messages are returned. > - **Models**: > - Adds `type` field to `Tool`, `CreateToolRequest`, `PatchToolRequest`, and `UpdateToolRequest` models. > - Updates `ToolType` enum to include new tool types. > - **Misc**: > - Fixes a bug in `execute_system.py` related to `delete` operation handling. > - Imports `AsyncAnthropic` in `prompt_step.py` for asynchronous API calls. > > <sup>This description was created by </sup>[<img alt="Ellipsis" src="https://img.shields.io/badge/Ellipsis-blue?color=175173">](https://www.ellipsis.dev?ref=julep-ai%2Fjulep&utm_source=github&utm_medium=referral)<sup> for 5867ecb. It will automatically update as commits are pushed.</sup>  --------- Co-authored-by: vedantsahai18 <[email protected]> Co-authored-by: Vedantsahai18 <[email protected]>
julep-ai · Oct 31, 2024 · b63327b · b63327b
1 parent 29c2674
commit b63327b
Show file tree

Hide file tree

Showing 7 changed files with 226 additions and 38 deletions.
diff --git a/agents-api/agents_api/activities/execute_system.py b/agents-api/agents_api/activities/execute_system.py
@@ -225,8 +225,9 @@ async def execute_system(
                 return create_session_query(**arguments)
             elif system.operation == "update":
                 return update_session_query(**arguments)
-            elif system.operation == "delete":
-                return update_session_query(**arguments)
+            # FIXME: @hamada needs to be fixed
+            # elif system.operation == "delete":
+            #     return update_session_query(**arguments)
             elif system.operation == "delete":
                 return delete_session_query(**arguments)
         # TASKS

diff --git a/agents-api/agents_api/activities/task_steps/prompt_step.py b/agents-api/agents_api/activities/task_steps/prompt_step.py
@@ -1,7 +1,12 @@
 import os
+from datetime import datetime
 
-from anthropic import Anthropic  # Import Anthropic client
+from anthropic import Anthropic, AsyncAnthropic  # Import AsyncAnthropic client
+from anthropic.types.beta.beta_message import BetaMessage
 from beartype import beartype
+from litellm import ChatCompletionMessageToolCall, Function, Message
+from litellm.types.utils import Choices, ModelResponse
+from litellm.utils import CustomStreamWrapper, ModelResponse
 from temporalio import activity
 from temporalio.exceptions import ApplicationError
 
@@ -20,29 +25,29 @@
 
 # FIXME: This shouldn't be here.
 def format_agent_tool(tool: Tool) -> dict:
-    if tool.function:
+    if tool.type == "function":
         return {
             "type": "function",
             "function": {
                 "name": tool.name,
                 "description": tool.description,
-                "parameters": tool.function.parameters,
+                "parameters": tool.parameters,
             },
         }
-    elif tool.computer_20241022:
+    elif tool.type == "computer_20241022":
         return {
             "type": tool.type,
             "name": tool.name,
-            "display_width_px": tool.display_width_px,
-            "display_height_px": tool.display_height_px,
-            "display_number": tool.display_number,
+            "display_width_px": tool.spec["display_width_px"],
+            "display_height_px": tool.spec["display_height_px"],
+            "display_number": tool.spec["display_number"],
         }
-    elif tool.bash_20241022:
+    elif tool.type == "bash_20241022":
         return {
             "type": tool.type,
             "name": tool.name,
         }
-    elif tool.text_editor_20241022:
+    elif tool.type == "text_editor_20241022":
         return {
             "type": tool.type,
             "name": tool.name,
@@ -86,7 +91,7 @@ async def prompt_step(context: StepContext) -> StepOutcome:
         sort_by="created_at",
         direction="desc",
     )
-
+    # grab the tools from context.current_step.tools and then append it to the agent_tools
     if context.current_step.settings:
         passed_settings: dict = context.current_step.settings.model_dump(
             exclude_unset=True
@@ -99,32 +104,93 @@ async def prompt_step(context: StepContext) -> StepOutcome:
         prompt = [{"role": "user", "content": prompt}]
 
     # Format agent_tools for litellm
-    formatted_agent_tools = [
-        format_agent_tool(tool) for tool in agent_tools if format_agent_tool(tool)
-    ]
+    # Initialize the formatted_agent_tools with context tools
+    task_tools = context.tools
+    formatted_agent_tools = [format_agent_tool(tool) for tool in task_tools]
+    # Add agent_tools if they are not already in formatted_agent_tools
+    for agent_tool in agent_tools:
+        if (
+            format_agent_tool(agent_tool)
+            and format_agent_tool(agent_tool) not in formatted_agent_tools
+        ):
+            formatted_agent_tools.append(format_agent_tool(agent_tool))
 
     # Check if the model is Anthropic
-    if "claude-3.5-sonnet-20241022" == agent_model.lower():
+    if agent_model.lower().startswith("claude-3.5") and any(
+        tool["type"] in ["computer_20241022", "bash_20241022", "text_editor_20241022"]
+        for tool in formatted_agent_tools
+    ):
         # Retrieve the API key from the environment variable
         betas = [COMPUTER_USE_BETA_FLAG]
         # Use Anthropic API directly
-        client = Anthropic(api_key=anthropic_api_key)
-
+        client = AsyncAnthropic(api_key=anthropic_api_key)
+        new_prompt = [{"role": "user", "content": prompt[0]["content"]}]
         # Claude Response
-        response = await client.beta.messages.create(
-            model=agent_model,
-            messages=prompt,
+        claude_response: BetaMessage = await client.beta.messages.create(
+            model="claude-3-5-sonnet-20241022",
+            messages=new_prompt,
             tools=formatted_agent_tools,
             max_tokens=1024,
             betas=betas,
         )
 
-        return StepOutcome(
-            output=response.model_dump()
-            if hasattr(response, "model_dump")
-            else response,
-            next=None,
+        # Claude returns [ToolUse | TextBlock]
+        # We need to convert tool_use to tool_calls
+        # And set content = TextBlock.text
+        # But we need to ensure no more than one text block is returned
+        if (
+            len([block for block in claude_response.content if block.type == "text"])
+            > 1
+        ):
+            raise ApplicationError("Claude should only return one message")
+
+        text_block = next(
+            (block for block in claude_response.content if block.type == "text"),
+            None,
+        )
+
+        stop_reason = claude_response.stop_reason
+
+        if stop_reason == "tool_use":
+            choice = Choices(
+                message=Message(
+                    role="assistant",
+                    content=text_block.text if text_block else None,
+                    tool_calls=[
+                        ChatCompletionMessageToolCall(
+                            type="function",
+                            function=Function(
+                                name=block.name,
+                                arguments=block.input,
+                            ),
+                        )
+                        for block in claude_response.content
+                        if block.type == "tool_use"
+                    ],
+                ),
+                finish_reason="tool_calls",
+            )
+        else:
+            assert (
+                text_block
+            ), "Claude should always return a text block for stop_reason=stop"
+
+            choice = Choices(
+                message=Message(
+                    role="assistant",
+                    content=text_block.text,
+                ),
+                finish_reason="stop",
+            )
+
+        response: ModelResponse = ModelResponse(
+            id=claude_response.id,
+            choices=[choice],
+            created=int(datetime.now().timestamp()),
+            model=claude_response.model,
+            object="text_completion",
         )
+
     else:
         # Use litellm for other models
         completion_data: dict = {
@@ -150,9 +216,7 @@ async def prompt_step(context: StepContext) -> StepOutcome:
 
             response = response.choices[0].message.content
 
-        return StepOutcome(
-            output=response.model_dump()
-            if hasattr(response, "model_dump")
-            else response,
-            next=None,
-        )
+    return StepOutcome(
+        output=response.model_dump() if hasattr(response, "model_dump") else response,
+        next=None,
+    )
diff --git a/agents-api/agents_api/autogen/Tools.py b/agents-api/agents_api/autogen/Tools.py
@@ -687,6 +687,18 @@ class CreateToolRequest(BaseModel):
     """
     Name of the tool (must be unique for this agent and a valid python identifier string )
     """
+    type: Literal[
+        "function",
+        "integration",
+        "system",
+        "api_call",
+        "computer_20241022",
+        "text_editor_20241022",
+        "bash_20241022",
+    ]
+    """
+    Type of the tool
+    """
     description: str | None = None
     """
     Description of the tool
@@ -953,6 +965,21 @@ class PatchToolRequest(BaseModel):
     """
     Name of the tool (must be unique for this agent and a valid python identifier string )
     """
+    type: (
+        Literal[
+            "function",
+            "integration",
+            "system",
+            "api_call",
+            "computer_20241022",
+            "text_editor_20241022",
+            "bash_20241022",
+        ]
+        | None
+    ) = None
+    """
+    Type of the tool
+    """
     description: str | None = None
     """
     Description of the tool
@@ -1380,6 +1407,18 @@ class Tool(BaseModel):
     """
     Name of the tool (must be unique for this agent and a valid python identifier string )
     """
+    type: Literal[
+        "function",
+        "integration",
+        "system",
+        "api_call",
+        "computer_20241022",
+        "text_editor_20241022",
+        "bash_20241022",
+    ]
+    """
+    Type of the tool
+    """
     description: str | None = None
     """
     Description of the tool
@@ -1457,6 +1496,18 @@ class UpdateToolRequest(BaseModel):
     """
     Name of the tool (must be unique for this agent and a valid python identifier string )
     """
+    type: Literal[
+        "function",
+        "integration",
+        "system",
+        "api_call",
+        "computer_20241022",
+        "text_editor_20241022",
+        "bash_20241022",
+    ]
+    """
+    Type of the tool
+    """
     description: str | None = None
     """
     Description of the tool

diff --git a/agents-api/agents_api/common/protocol/tasks.py b/agents-api/agents_api/common/protocol/tasks.py
@@ -235,13 +235,12 @@ def task_to_spec(
     for tool in task.tools:
         tool_spec = getattr(tool, tool.type)
 
-        tools.append(
-            TaskToolDef(
-                type=tool.type,
-                spec=tool_spec.model_dump(),
-                **tool.model_dump(exclude={"type"}),
-            )
+        tool_obj = dict(
+            type=tool.type,
+            spec=tool_spec.model_dump(),
+            **tool.model_dump(exclude={"type"}),
         )
+        tools.append(TaskToolDef(**tool_obj))
 
     workflows = [Workflow(name="main", steps=task_data.pop("main"))]
 

diff --git a/integrations-service/integrations/autogen/Tools.py b/integrations-service/integrations/autogen/Tools.py
@@ -687,6 +687,18 @@ class CreateToolRequest(BaseModel):
     """
     Name of the tool (must be unique for this agent and a valid python identifier string )
     """
+    type: Literal[
+        "function",
+        "integration",
+        "system",
+        "api_call",
+        "computer_20241022",
+        "text_editor_20241022",
+        "bash_20241022",
+    ]
+    """
+    Type of the tool
+    """
     description: str | None = None
     """
     Description of the tool
@@ -953,6 +965,21 @@ class PatchToolRequest(BaseModel):
     """
     Name of the tool (must be unique for this agent and a valid python identifier string )
     """
+    type: (
+        Literal[
+            "function",
+            "integration",
+            "system",
+            "api_call",
+            "computer_20241022",
+            "text_editor_20241022",
+            "bash_20241022",
+        ]
+        | None
+    ) = None
+    """
+    Type of the tool
+    """
     description: str | None = None
     """
     Description of the tool
@@ -1380,6 +1407,18 @@ class Tool(BaseModel):
     """
     Name of the tool (must be unique for this agent and a valid python identifier string )
     """
+    type: Literal[
+        "function",
+        "integration",
+        "system",
+        "api_call",
+        "computer_20241022",
+        "text_editor_20241022",
+        "bash_20241022",
+    ]
+    """
+    Type of the tool
+    """
     description: str | None = None
     """
     Description of the tool
@@ -1457,6 +1496,18 @@ class UpdateToolRequest(BaseModel):
     """
     Name of the tool (must be unique for this agent and a valid python identifier string )
     """
+    type: Literal[
+        "function",
+        "integration",
+        "system",
+        "api_call",
+        "computer_20241022",
+        "text_editor_20241022",
+        "bash_20241022",
+    ]
+    """
+    Type of the tool
+    """
     description: str | None = None
     """
     Description of the tool

diff --git a/typespec/tools/models.tsp b/typespec/tools/models.tsp
@@ -210,6 +210,9 @@ model Tool {
     /** Name of the tool (must be unique for this agent and a valid python identifier string )*/
     name: validPythonIdentifier;
 
+    /** Type of the tool */
+    type: ToolType;
+
     /** Description of the tool */
     description?: string;