feat(agents-api): Add python expression support to prompt step (#795)

- **wip(agents-api): Auto-run tools in prompt steps** - **refactor: Lint integrations-service (CI)** - **feat(agents-api): Add python expression support to prompt step** - **feat(agents-api): Default prompt_step.tools = 'all'**  ---- > [!IMPORTANT] > Add Python expression support to prompt steps in agents API, with default tool settings and refactoring. > > - **Behavior**: > - Add Python expression evaluation for prompts starting with `$_` in `prompt_step()` in `prompt_step.py`. > - Default `tools` in `PromptStep` to `'all'` in `Tasks.py`. > - Default `auto_run_tools` to `True` for `PromptStep` and `False` for sessions in `Sessions.py` and `Tasks.py`. > - **Refactoring**: > - Remove unused import `Developer` from `execute_system.py`. > - Remove unused import `RootModel` from `Tools.py`. > - Linting changes in `integrations-service`. > - **Testing**: > - Add test for prompt step with Python expression in `test_execution_workflow.py`. > > <sup>This description was created by </sup>[<img alt="Ellipsis" src="https://img.shields.io/badge/Ellipsis-blue?color=175173">](https://www.ellipsis.dev?ref=julep-ai%2Fjulep&utm_source=github&utm_medium=referral)<sup> for 4254083. It will automatically update as commits are pushed.</sup>  --------- Signed-off-by: Diwank Singh Tomer <[email protected]> Co-authored-by: creatorrr <[email protected]>
julep-ai · Oct 31, 2024 · 7264881 · 7264881
1 parent b324093
commit 7264881
Show file tree

Hide file tree

Showing 17 changed files with 332 additions and 177 deletions.
diff --git a/agents-api/agents_api/activities/execute_system.py b/agents-api/agents_api/activities/execute_system.py
@@ -15,7 +15,6 @@
     VectorDocSearchRequest,
 )
 from ..autogen.Tools import SystemDef
-from ..common.protocol.developers import Developer
 from ..common.protocol.tasks import StepContext
 from ..common.storage_handler import auto_blob_store
 from ..env import testing

diff --git a/agents-api/agents_api/activities/task_steps/prompt_step.py b/agents-api/agents_api/activities/task_steps/prompt_step.py
@@ -19,6 +19,7 @@
 from ...common.utils.template import render_template
 from ...env import anthropic_api_key, debug
 from ..utils import get_handler
+from .base_evaluate import base_evaluate
 
 COMPUTER_USE_BETA_FLAG = "computer-use-2024-10-22"
 
@@ -77,43 +78,67 @@ def format_tool(tool: Tool) -> dict:
     return formatted
 
 
+EVAL_PROMPT_PREFIX = "$_ "
+
+
 @activity.defn
 @auto_blob_store
 @beartype
 async def prompt_step(context: StepContext) -> StepOutcome:
     # Get context data
     prompt: str | list[dict] = context.current_step.model_dump()["prompt"]
-    context_data: dict = context.model_dump()
+    context_data: dict = context.model_dump(include_remote=True)
 
-    # Render template messages
-    prompt = await render_template(
-        prompt,
-        context_data,
-        skip_vars=["developer_id"],
+    # If the prompt is a string and starts with $_ then we need to evaluate it
+    should_evaluate_prompt = isinstance(prompt, str) and prompt.startswith(
+        EVAL_PROMPT_PREFIX
+    )
+
+    if should_evaluate_prompt:
+        prompt = await base_evaluate(
+            prompt[len(EVAL_PROMPT_PREFIX) :].strip(), context_data
+        )
+
+        if not isinstance(prompt, (str, list)):
+            raise ApplicationError(
+                "Invalid prompt expression, expected a string or list"
+            )
+
+    # Wrap the prompt in a list if it is not already
+    prompt = (
+        prompt if isinstance(prompt, list) else [{"role": "user", "content": prompt}]
     )
+
+    # Render template messages if we didn't evaluate the prompt
+    if not should_evaluate_prompt:
+        # Render template messages
+        prompt = await render_template(
+            prompt,
+            context_data,
+            skip_vars=["developer_id"],
+        )
+
     # Get settings and run llm
     agent_default_settings: dict = (
         context.execution_input.agent.default_settings.model_dump()
         if context.execution_input.agent.default_settings
         else {}
     )
+
     agent_model: str = (
         context.execution_input.agent.model
         if context.execution_input.agent.model
         else "gpt-4o"
     )
 
+    # Get passed settings
     if context.current_step.settings:
         passed_settings: dict = context.current_step.settings.model_dump(
             exclude_unset=True
         )
     else:
         passed_settings: dict = {}
 
-    # Wrap the prompt in a list if it is not already
-    if isinstance(prompt, str):
-        prompt = [{"role": "user", "content": prompt}]
-
     # Format tools for litellm
     formatted_tools = [format_tool(tool) for tool in context.tools]
 
@@ -132,11 +157,15 @@ async def prompt_step(context: StepContext) -> StepOutcome:
         betas = [COMPUTER_USE_BETA_FLAG]
         # Use Anthropic API directly
         client = AsyncAnthropic(api_key=anthropic_api_key)
-        new_prompt = [{"role": "user", "content": prompt[0]["content"]}]
+
+        # Reformat the prompt for Anthropic
+        # Anthropic expects a list of messages with role and content (and no name etc)
+        prompt = [{"role": "user", "content": message["content"]} for message in prompt]
+
         # Claude Response
         claude_response: BetaMessage = await client.beta.messages.create(
             model="claude-3-5-sonnet-20241022",
-            messages=new_prompt,
+            messages=prompt,
             tools=formatted_tools,
             max_tokens=1024,
             betas=betas,
@@ -210,7 +239,7 @@ async def prompt_step(context: StepContext) -> StepOutcome:
         }
 
         extra_body = {
-            "cache": {"no-cache": debug},
+            "cache": {"no-cache": debug or context.current_step.disable_cache},
         }
 
         response: ModelResponse = await litellm.acompletion(

diff --git a/agents-api/agents_api/autogen/Sessions.py b/agents-api/agents_api/autogen/Sessions.py
@@ -43,12 +43,10 @@ class CreateSessionRequest(BaseModel):
     """
     Action to start on context window overflow
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = False
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: false for sessions, true for tasks)
 
     If a tool call is made, the tool's output will be sent back to the model as the model's input.
     If a tool call is not made, the model's output will be returned as is.
@@ -80,12 +78,10 @@ class PatchSessionRequest(BaseModel):
     """
     Action to start on context window overflow
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = False
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: false for sessions, true for tasks)
 
     If a tool call is made, the tool's output will be sent back to the model as the model's input.
     If a tool call is not made, the model's output will be returned as is.
@@ -117,12 +113,10 @@ class Session(BaseModel):
     """
     Action to start on context window overflow
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = False
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: false for sessions, true for tasks)
 
     If a tool call is made, the tool's output will be sent back to the model as the model's input.
     If a tool call is not made, the model's output will be returned as is.
@@ -190,12 +184,10 @@ class UpdateSessionRequest(BaseModel):
     """
     Action to start on context window overflow
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = False
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: false for sessions, true for tasks)
 
     If a tool call is made, the tool's output will be sent back to the model as the model's input.
     If a tool call is not made, the model's output will be returned as is.
@@ -234,12 +226,10 @@ class CreateOrUpdateSessionRequest(CreateSessionRequest):
     """
     Action to start on context window overflow
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = False
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: false for sessions, true for tasks)
 
     If a tool call is made, the tool's output will be sent back to the model as the model's input.
     If a tool call is not made, the model's output will be returned as is.

diff --git a/agents-api/agents_api/autogen/Tasks.py b/agents-api/agents_api/autogen/Tasks.py
@@ -686,7 +686,7 @@ class PromptStep(BaseModel):
     """
     The prompt to run
     """
-    tools: Literal["all"] | list[ToolRef | CreateToolRequest] = []
+    tools: Literal["all"] | list[ToolRef | CreateToolRequest] = "all"
     """
     The tools to use for the prompt
     """
@@ -702,16 +702,18 @@ class PromptStep(BaseModel):
     """
     Whether to unwrap the output of the prompt step, equivalent to `response.choices[0].message.content`
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = True
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: true for prompt steps, false for sessions)
 
     If a tool call is made, the tool's output will be used as the model's input.
     If a tool call is not made, the model's output will be used as the next step's input.
     """
+    disable_cache: StrictBool = False
+    """
+    Whether to disable caching for the prompt step
+    """
 
 
 class PromptStepUpdateItem(BaseModel):
@@ -730,7 +732,7 @@ class PromptStepUpdateItem(BaseModel):
     """
     The prompt to run
     """
-    tools: Literal["all"] | list[ToolRefUpdateItem | CreateToolRequest] = []
+    tools: Literal["all"] | list[ToolRefUpdateItem | CreateToolRequest] = "all"
     """
     The tools to use for the prompt
     """
@@ -746,16 +748,18 @@ class PromptStepUpdateItem(BaseModel):
     """
     Whether to unwrap the output of the prompt step, equivalent to `response.choices[0].message.content`
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = True
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: true for prompt steps, false for sessions)
 
     If a tool call is made, the tool's output will be used as the model's input.
     If a tool call is not made, the model's output will be used as the next step's input.
     """
+    disable_cache: StrictBool = False
+    """
+    Whether to disable caching for the prompt step
+    """
 
 
 class ReturnStep(BaseModel):

diff --git a/agents-api/agents_api/autogen/Tools.py b/agents-api/agents_api/autogen/Tools.py
@@ -12,7 +12,6 @@
     BaseModel,
     ConfigDict,
     Field,
-    RootModel,
     StrictBool,
 )
 

diff --git a/agents-api/agents_api/common/protocol/tasks.py b/agents-api/agents_api/common/protocol/tasks.py
@@ -1,8 +1,9 @@
-from typing import Annotated, Any
+from typing import Annotated, Any, Literal
 from uuid import UUID
 
 from beartype import beartype
 from temporalio import activity, workflow
+from temporalio.exceptions import ApplicationError
 
 with workflow.unsafe.imports_passed_through():
     from pydantic import BaseModel, Field, computed_field
@@ -23,6 +24,7 @@
         TaskSpecDef,
         TaskToolDef,
         Tool,
+        ToolRef,
         TransitionTarget,
         TransitionType,
         UpdateTaskRequest,
@@ -154,6 +156,20 @@ def tools(self) -> list[Tool | CreateToolRequest]:
         task = execution_input.task
         agent_tools = execution_input.agent_tools
 
+        step_tools: Literal["all"] | list[ToolRef | CreateToolRequest] = getattr(
+            self.current_step, "tools", "all"
+        )
+
+        if step_tools != "all":
+            if not all(
+                tool and isinstance(tool, CreateToolRequest) for tool in step_tools
+            ):
+                raise ApplicationError(
+                    "Invalid tools for step (ToolRef not supported yet)"
+                )
+
+            return step_tools
+
         # Need to convert task.tools (list[TaskToolDef]) to list[Tool]
         task_tools = []
         for tool in task.tools:

diff --git a/agents-api/agents_api/models/session/create_or_update_session.py b/agents-api/agents_api/models/session/create_or_update_session.py
@@ -53,7 +53,7 @@ def create_or_update_session(
     data: CreateOrUpdateSessionRequest,
 ) -> tuple[list[str], dict]:
     data.metadata = data.metadata or {}
-    session_data = data.model_dump()
+    session_data = data.model_dump(exclude={"auto_run_tools", "disable_cache"})
 
     user = session_data.pop("user")
     agent = session_data.pop("agent")

diff --git a/agents-api/agents_api/models/session/create_session.py b/agents-api/agents_api/models/session/create_session.py
@@ -60,7 +60,7 @@ def create_session(
     session_id = session_id or uuid4()
 
     data.metadata = data.metadata or {}
-    session_data = data.model_dump()
+    session_data = data.model_dump(exclude={"auto_run_tools", "disable_cache"})
 
     user = session_data.pop("user")
     agent = session_data.pop("agent")