From 387e0aaf773b422876ddd321b3fad5f8b021669c Mon Sep 17 00:00:00 2001
From: Diwank Singh Tomer <diwank.singh@gmail.com>
Date: Thu, 31 Oct 2024 15:46:05 -0400
Subject: [PATCH 1/4] wip(agents-api): Auto-run tools in prompt steps

Signed-off-by: Diwank Singh Tomer <diwank.singh@gmail.com>
---
 agents-api/agents_api/autogen/Sessions.py     |  40 +++----
 agents-api/agents_api/autogen/Tasks.py        |  16 +--
 .../session/create_or_update_session.py       |   2 +-
 .../models/session/create_session.py          |   2 +-
 .../workflows/task_execution/__init__.py      | 108 ++++++++++++++++--
 .../integrations/autogen/Sessions.py          |  40 +++----
 .../integrations/autogen/Tasks.py             |  16 +--
 .../integrations/models/brave.py              |   3 +-
 .../integrations/utils/integrations/brave.py  |   5 +-
 typespec/sessions/models.tsp                  |   8 +-
 typespec/tasks/steps.tsp                      |  10 +-
 .../@typespec/openapi3/openapi-1.0.0.yaml     |  89 ++++++---------
 12 files changed, 190 insertions(+), 149 deletions(-)

diff --git a/agents-api/agents_api/autogen/Sessions.py b/agents-api/agents_api/autogen/Sessions.py
index 1f13639fc..945fd5ee4 100644
--- a/agents-api/agents_api/autogen/Sessions.py
+++ b/agents-api/agents_api/autogen/Sessions.py
@@ -43,12 +43,10 @@ class CreateSessionRequest(BaseModel):
     """
     Action to start on context window overflow
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = False
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: false for sessions, true for tasks)
 
     If a tool call is made, the tool's output will be sent back to the model as the model's input.
     If a tool call is not made, the model's output will be returned as is.
@@ -80,12 +78,10 @@ class PatchSessionRequest(BaseModel):
     """
     Action to start on context window overflow
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = False
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: false for sessions, true for tasks)
 
     If a tool call is made, the tool's output will be sent back to the model as the model's input.
     If a tool call is not made, the model's output will be returned as is.
@@ -117,12 +113,10 @@ class Session(BaseModel):
     """
     Action to start on context window overflow
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = False
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: false for sessions, true for tasks)
 
     If a tool call is made, the tool's output will be sent back to the model as the model's input.
     If a tool call is not made, the model's output will be returned as is.
@@ -190,12 +184,10 @@ class UpdateSessionRequest(BaseModel):
     """
     Action to start on context window overflow
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = False
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: false for sessions, true for tasks)
 
     If a tool call is made, the tool's output will be sent back to the model as the model's input.
     If a tool call is not made, the model's output will be returned as is.
@@ -234,12 +226,10 @@ class CreateOrUpdateSessionRequest(CreateSessionRequest):
     """
     Action to start on context window overflow
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = False
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: false for sessions, true for tasks)
 
     If a tool call is made, the tool's output will be sent back to the model as the model's input.
     If a tool call is not made, the model's output will be returned as is.
diff --git a/agents-api/agents_api/autogen/Tasks.py b/agents-api/agents_api/autogen/Tasks.py
index bf0342394..5fb285ed5 100644
--- a/agents-api/agents_api/autogen/Tasks.py
+++ b/agents-api/agents_api/autogen/Tasks.py
@@ -702,12 +702,10 @@ class PromptStep(BaseModel):
     """
     Whether to unwrap the output of the prompt step, equivalent to `response.choices[0].message.content`
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = True
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: true for prompt steps, false for sessions)
 
     If a tool call is made, the tool's output will be used as the model's input.
     If a tool call is not made, the model's output will be used as the next step's input.
@@ -746,12 +744,10 @@ class PromptStepUpdateItem(BaseModel):
     """
     Whether to unwrap the output of the prompt step, equivalent to `response.choices[0].message.content`
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = True
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: true for prompt steps, false for sessions)
 
     If a tool call is made, the tool's output will be used as the model's input.
     If a tool call is not made, the model's output will be used as the next step's input.
diff --git a/agents-api/agents_api/models/session/create_or_update_session.py b/agents-api/agents_api/models/session/create_or_update_session.py
index 629ca6398..ad6031f01 100644
--- a/agents-api/agents_api/models/session/create_or_update_session.py
+++ b/agents-api/agents_api/models/session/create_or_update_session.py
@@ -53,7 +53,7 @@ def create_or_update_session(
     data: CreateOrUpdateSessionRequest,
 ) -> tuple[list[str], dict]:
     data.metadata = data.metadata or {}
-    session_data = data.model_dump()
+    session_data = data.model_dump(exclude={"auto_run_tools"})
 
     user = session_data.pop("user")
     agent = session_data.pop("agent")
diff --git a/agents-api/agents_api/models/session/create_session.py b/agents-api/agents_api/models/session/create_session.py
index 70b2d41e7..249bcdeec 100644
--- a/agents-api/agents_api/models/session/create_session.py
+++ b/agents-api/agents_api/models/session/create_session.py
@@ -60,7 +60,7 @@ def create_session(
     session_id = session_id or uuid4()
 
     data.metadata = data.metadata or {}
-    session_data = data.model_dump()
+    session_data = data.model_dump(exclude={"auto_run_tools"})
 
     user = session_data.pop("user")
     agent = session_data.pop("agent")
diff --git a/agents-api/agents_api/workflows/task_execution/__init__.py b/agents-api/agents_api/workflows/task_execution/__init__.py
index 7733ab236..51318248e 100644
--- a/agents-api/agents_api/workflows/task_execution/__init__.py
+++ b/agents-api/agents_api/workflows/task_execution/__init__.py
@@ -372,10 +372,23 @@ async def run(
                 state = PartialTransition(type="resume", output=result)
 
             case PromptStep(unwrap=True), StepOutcome(output=response):
+                finish_reason = response["choices"][0]["finish_reason"]
+                if finish_reason == "tool_calls":
+                    workflow.logger.error(
+                        "Prompt step: Tool calls not supported in unwrap mode"
+                    )
+
+                    state = PartialTransition(
+                        type="error", output="Tool calls not supported in unwrap mode"
+                    )
+                    await transition(context, state)
+
+                    raise ApplicationError("Tool calls not supported in unwrap mode")
+
                 workflow.logger.debug(f"Prompt step: Received response: {response}")
                 state = PartialTransition(output=response)
 
-            case PromptStep(forward_tool_results=False, unwrap=False), StepOutcome(
+            case PromptStep(auto_run_tools=False, unwrap=False), StepOutcome(
                 output=response
             ):
                 workflow.logger.debug(f"Prompt step: Received response: {response}")
@@ -387,12 +400,22 @@ async def run(
                 workflow.logger.debug(f"Prompt step: Received response: {response}")
                 state = PartialTransition(output=response)
 
-            case PromptStep(unwrap=False), StepOutcome(output=response) if response[
-                "choices"
-            ][0]["finish_reason"] == "tool_calls":
-                workflow.logger.debug("Prompt step: Received tool call")
-                message = response["choices"][0]["message"]
-                tool_calls_input = message["tool_calls"]
+            ## TODO: Handle multiple tool calls and multiple choices
+            # case PromptStep(unwrap=False), StepOutcome(output=response) if response[
+            #     "choices"
+            # ][0]["finish_reason"] == "tool_calls":
+            #     workflow.logger.debug("Prompt step: Received tool call")
+            #     message = response["choices"][0]["message"]
+            #     tool_calls_input = message["tool_calls"]
+
+            case PromptStep(auto_run_tools=True, unwrap=False), StepOutcome(
+                output=response
+            ) if (message := response["choices"][0])[
+                "finish_reason"
+            ] == "tool_calls" and (tool_calls_input := message["tool_calls"])[0][
+                "type"
+            ] == "function":
+                workflow.logger.debug("Prompt step: Received FUNCTION tool call")
 
                 # Enter a wait-for-input step to ask the developer to run the tool calls
                 tool_calls_results = await workflow.execute_activity(
@@ -415,6 +438,67 @@ async def run(
                 )
                 state = PartialTransition(output=new_response.output, type="resume")
 
+            case PromptStep(auto_run_tools=True, unwrap=False), StepOutcome(
+                output=response
+            ) if (message := response["choices"][0])[
+                "finish_reason"
+            ] == "tool_calls" and (tool_calls_input := message["tool_calls"])[0][
+                "type"
+            ] == "integration":
+                workflow.logger.debug("Prompt step: Received INTEGRATION tool call")
+
+                # FIXME: Implement integration tool calls
+                # See: MANUAL TOOL CALL INTEGRATION (below)
+                raise NotImplementedError("Integration tool calls not yet supported")
+
+                # TODO: Feed the tool call results back to the model (see above)
+
+            case PromptStep(auto_run_tools=True, unwrap=False), StepOutcome(
+                output=response
+            ) if (message := response["choices"][0])[
+                "finish_reason"
+            ] == "tool_calls" and (tool_calls_input := message["tool_calls"])[0][
+                "type"
+            ] == "api_call":
+                workflow.logger.debug("Prompt step: Received API_CALL tool call")
+
+                # FIXME: Implement API_CALL tool calls
+                # See: MANUAL TOOL CALL API_CALL (below)
+                raise NotImplementedError("API_CALL tool calls not yet supported")
+
+                # TODO: Feed the tool call results back to the model (see above)
+
+            case PromptStep(auto_run_tools=True, unwrap=False), StepOutcome(
+                output=response
+            ) if (message := response["choices"][0])[
+                "finish_reason"
+            ] == "tool_calls" and (tool_calls_input := message["tool_calls"])[0][
+                "type"
+            ] == "system":
+                workflow.logger.debug("Prompt step: Received SYSTEM tool call")
+
+                # FIXME: Implement SYSTEM tool calls
+                # See: MANUAL TOOL CALL SYSTEM (below)
+                raise NotImplementedError("SYSTEM tool calls not yet supported")
+
+                # TODO: Feed the tool call results back to the model (see above)
+
+            case PromptStep(unwrap=False), StepOutcome(output=response) if (
+                message := response["choices"][0]
+            )["finish_reason"] == "tool_calls" and (
+                tool_calls_input := message["tool_calls"]
+            )[0]["type"] not in ["function", "integration", "api_call", "system"]:
+                workflow.logger.debug(
+                    f"Prompt step: Received unknown tool call: {tool_calls_input[0]['type']}"
+                )
+                state = PartialTransition(output=response)
+
+            case SetStep(), StepOutcome(output=evaluated_output):
+                workflow.logger.info("Set step: Updating user state")
+
+            case SetStep(), StepOutcome(output=evaluated_output):
+                workflow.logger.info("Set step: Updating user state")
+
             case SetStep(), StepOutcome(output=evaluated_output):
                 workflow.logger.info("Set step: Updating user state")
 
@@ -452,6 +536,8 @@ async def run(
             case ToolCallStep(), StepOutcome(output=tool_call) if tool_call[
                 "type"
             ] == "integration":
+                # MANUAL TOOL CALL INTEGRATION
+                workflow.logger.debug("ToolCallStep: Received INTEGRATION tool call")
                 call = tool_call["integration"]
                 tool_name = call["name"]
                 arguments = call["arguments"]
@@ -490,6 +576,8 @@ async def run(
             case ToolCallStep(), StepOutcome(output=tool_call) if tool_call[
                 "type"
             ] == "api_call":
+                # MANUAL TOOL CALL API_CALL
+                workflow.logger.debug("ToolCallStep: Received API_CALL tool call")
                 call = tool_call["api_call"]
                 tool_name = call["name"]
                 arguments = call["arguments"]
@@ -528,6 +616,8 @@ async def run(
             case ToolCallStep(), StepOutcome(output=tool_call) if tool_call[
                 "type"
             ] == "system":
+                # MANUAL TOOL CALL SYSTEM
+                workflow.logger.debug("ToolCallStep: Received SYSTEM tool call")
                 call = tool_call.get("system")
 
                 system_call = SystemDef(**call)
@@ -545,12 +635,16 @@ async def run(
                 workflow.logger.error(
                     f"Unhandled step type: {type(context.current_step).__name__}"
                 )
+                state = PartialTransition(type="error", output="Not implemented")
+                await transition(context, state)
+
                 raise ApplicationError("Not implemented")
 
         # 4. Transition to the next step
         workflow.logger.info(f"Transitioning after step {context.cursor.step}")
 
         # The returned value is the transition finally created
+        state = state or PartialTransition(type="error", output="Not implemented")
         final_state = await transition(context, state)
 
         # ---
diff --git a/integrations-service/integrations/autogen/Sessions.py b/integrations-service/integrations/autogen/Sessions.py
index 1f13639fc..945fd5ee4 100644
--- a/integrations-service/integrations/autogen/Sessions.py
+++ b/integrations-service/integrations/autogen/Sessions.py
@@ -43,12 +43,10 @@ class CreateSessionRequest(BaseModel):
     """
     Action to start on context window overflow
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = False
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: false for sessions, true for tasks)
 
     If a tool call is made, the tool's output will be sent back to the model as the model's input.
     If a tool call is not made, the model's output will be returned as is.
@@ -80,12 +78,10 @@ class PatchSessionRequest(BaseModel):
     """
     Action to start on context window overflow
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = False
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: false for sessions, true for tasks)
 
     If a tool call is made, the tool's output will be sent back to the model as the model's input.
     If a tool call is not made, the model's output will be returned as is.
@@ -117,12 +113,10 @@ class Session(BaseModel):
     """
     Action to start on context window overflow
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = False
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: false for sessions, true for tasks)
 
     If a tool call is made, the tool's output will be sent back to the model as the model's input.
     If a tool call is not made, the model's output will be returned as is.
@@ -190,12 +184,10 @@ class UpdateSessionRequest(BaseModel):
     """
     Action to start on context window overflow
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = False
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: false for sessions, true for tasks)
 
     If a tool call is made, the tool's output will be sent back to the model as the model's input.
     If a tool call is not made, the model's output will be returned as is.
@@ -234,12 +226,10 @@ class CreateOrUpdateSessionRequest(CreateSessionRequest):
     """
     Action to start on context window overflow
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = False
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: false for sessions, true for tasks)
 
     If a tool call is made, the tool's output will be sent back to the model as the model's input.
     If a tool call is not made, the model's output will be returned as is.
diff --git a/integrations-service/integrations/autogen/Tasks.py b/integrations-service/integrations/autogen/Tasks.py
index bf0342394..5fb285ed5 100644
--- a/integrations-service/integrations/autogen/Tasks.py
+++ b/integrations-service/integrations/autogen/Tasks.py
@@ -702,12 +702,10 @@ class PromptStep(BaseModel):
     """
     Whether to unwrap the output of the prompt step, equivalent to `response.choices[0].message.content`
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = True
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: true for prompt steps, false for sessions)
 
     If a tool call is made, the tool's output will be used as the model's input.
     If a tool call is not made, the model's output will be used as the next step's input.
@@ -746,12 +744,10 @@ class PromptStepUpdateItem(BaseModel):
     """
     Whether to unwrap the output of the prompt step, equivalent to `response.choices[0].message.content`
     """
-    forward_tool_results: StrictBool | None = None
+    auto_run_tools: StrictBool = True
     """
-    Whether to forward the tool results to the model when available.
-    "true" => always forward
-    "false" => never forward
-    null => forward if applicable (default)
+    Whether to auto-run the tool and send the tool results to the model when available.
+    (default: true for prompt steps, false for sessions)
 
     If a tool call is made, the tool's output will be used as the model's input.
     If a tool call is not made, the model's output will be used as the next step's input.
diff --git a/integrations-service/integrations/models/brave.py b/integrations-service/integrations/models/brave.py
index 96abb0a46..2a794beda 100644
--- a/integrations-service/integrations/models/brave.py
+++ b/integrations-service/integrations/models/brave.py
@@ -9,5 +9,6 @@ class SearchResult(BaseModel):
     link: str
     snippet: str
 
+
 class BraveSearchOutput(BaseOutput):
-    result: List[SearchResult] = Field(..., description="A list of search results")
\ No newline at end of file
+    result: List[SearchResult] = Field(..., description="A list of search results")
diff --git a/integrations-service/integrations/utils/integrations/brave.py b/integrations-service/integrations/utils/integrations/brave.py
index b96f15969..20cbd8ab5 100644
--- a/integrations-service/integrations/utils/integrations/brave.py
+++ b/integrations-service/integrations/utils/integrations/brave.py
@@ -26,11 +26,10 @@ async def search(
     tool = BraveSearch.from_api_key(api_key=setup.api_key, search_kwargs={"count": 3})
 
     result = tool.run(arguments.query)
-    
+
     try:
         parsed_result = [SearchResult(**item) for item in json.loads(result)]
     except json.JSONDecodeError as e:
         raise ValueError("Malformed JSON response from Brave Search") from e
-    
 
-    return BraveSearchOutput(result=parsed_result)
\ No newline at end of file
+    return BraveSearchOutput(result=parsed_result)
diff --git a/typespec/sessions/models.tsp b/typespec/sessions/models.tsp
index dfbb6ea41..bb1c16d19 100644
--- a/typespec/sessions/models.tsp
+++ b/typespec/sessions/models.tsp
@@ -62,14 +62,12 @@ model Session {
     /** Action to start on context window overflow */
     context_overflow: ContextOverflowType | null = null;
 
-    /** Whether to forward the tool results to the model when available.
-     * "true" => always forward
-     * "false" => never forward
-     * null => forward if applicable (default)
+    /** Whether to auto-run the tool and send the tool results to the model when available.
+     * (default: false for sessions, true for tasks)
      * 
      * If a tool call is made, the tool's output will be sent back to the model as the model's input.
      * If a tool call is not made, the model's output will be returned as is. */
-    forward_tool_results: boolean | null = null;
+    auto_run_tools: boolean = false;
 
     ...HasId;
     ...HasMetadata;
diff --git a/typespec/tasks/steps.tsp b/typespec/tasks/steps.tsp
index 16d3c97db..5de975247 100644
--- a/typespec/tasks/steps.tsp
+++ b/typespec/tasks/steps.tsp
@@ -114,14 +114,12 @@ model PromptStepDef {
     /** Whether to unwrap the output of the prompt step, equivalent to `response.choices[0].message.content` */
     unwrap?: boolean = false;
 
-    /** Whether to forward the tool results to the model when available.
-     * "true" => always forward
-     * "false" => never forward
-     * null => forward if applicable (default)
-     * 
+    /** Whether to auto-run the tool and send the tool results to the model when available.
+     * (default: true for prompt steps, false for sessions)
+     *
      * If a tool call is made, the tool's output will be used as the model's input.
      * If a tool call is not made, the model's output will be used as the next step's input. */
-    forward_tool_results: boolean | null = null;
+    auto_run_tools: boolean = true;
 }
 
 model EvaluateStep extends BaseWorkflowStep<"evaluate"> {
diff --git a/typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml b/typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml
index 0c8a2b8ab..97dfbf141 100644
--- a/typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml
+++ b/typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml
@@ -3216,7 +3216,7 @@ components:
         - render_templates
         - token_budget
         - context_overflow
-        - forward_tool_results
+        - auto_run_tools
       properties:
         id:
           $ref: '#/components/schemas/Common.uuid'
@@ -3326,18 +3326,15 @@ components:
           nullable: true
           description: Action to start on context window overflow
           default: null
-        forward_tool_results:
+        auto_run_tools:
           type: boolean
-          nullable: true
           description: |-
-            Whether to forward the tool results to the model when available.
-            "true" => always forward
-            "false" => never forward
-            null => forward if applicable (default)
+            Whether to auto-run the tool and send the tool results to the model when available.
+            (default: false for sessions, true for tasks)
 
             If a tool call is made, the tool's output will be sent back to the model as the model's input.
             If a tool call is not made, the model's output will be returned as is.
-          default: null
+          default: false
         metadata:
           type: object
           additionalProperties: {}
@@ -3350,7 +3347,7 @@ components:
         - render_templates
         - token_budget
         - context_overflow
-        - forward_tool_results
+        - auto_run_tools
       properties:
         user:
           allOf:
@@ -3458,18 +3455,15 @@ components:
           nullable: true
           description: Action to start on context window overflow
           default: null
-        forward_tool_results:
+        auto_run_tools:
           type: boolean
-          nullable: true
           description: |-
-            Whether to forward the tool results to the model when available.
-            "true" => always forward
-            "false" => never forward
-            null => forward if applicable (default)
+            Whether to auto-run the tool and send the tool results to the model when available.
+            (default: false for sessions, true for tasks)
 
             If a tool call is made, the tool's output will be sent back to the model as the model's input.
             If a tool call is not made, the model's output will be returned as is.
-          default: null
+          default: false
         metadata:
           type: object
           additionalProperties: {}
@@ -3612,18 +3606,15 @@ components:
           nullable: true
           description: Action to start on context window overflow
           default: null
-        forward_tool_results:
+        auto_run_tools:
           type: boolean
-          nullable: true
           description: |-
-            Whether to forward the tool results to the model when available.
-            "true" => always forward
-            "false" => never forward
-            null => forward if applicable (default)
+            Whether to auto-run the tool and send the tool results to the model when available.
+            (default: false for sessions, true for tasks)
 
             If a tool call is made, the tool's output will be sent back to the model as the model's input.
             If a tool call is not made, the model's output will be returned as is.
-          default: null
+          default: false
         metadata:
           type: object
           additionalProperties: {}
@@ -3636,7 +3627,7 @@ components:
         - render_templates
         - token_budget
         - context_overflow
-        - forward_tool_results
+        - auto_run_tools
         - id
         - created_at
         - updated_at
@@ -3737,18 +3728,15 @@ components:
           nullable: true
           description: Action to start on context window overflow
           default: null
-        forward_tool_results:
+        auto_run_tools:
           type: boolean
-          nullable: true
           description: |-
-            Whether to forward the tool results to the model when available.
-            "true" => always forward
-            "false" => never forward
-            null => forward if applicable (default)
+            Whether to auto-run the tool and send the tool results to the model when available.
+            (default: false for sessions, true for tasks)
 
             If a tool call is made, the tool's output will be sent back to the model as the model's input.
             If a tool call is not made, the model's output will be returned as is.
-          default: null
+          default: false
         id:
           allOf:
             - $ref: '#/components/schemas/Common.uuid'
@@ -3821,7 +3809,7 @@ components:
         - render_templates
         - token_budget
         - context_overflow
-        - forward_tool_results
+        - auto_run_tools
       properties:
         situation:
           type: string
@@ -3913,18 +3901,15 @@ components:
           nullable: true
           description: Action to start on context window overflow
           default: null
-        forward_tool_results:
+        auto_run_tools:
           type: boolean
-          nullable: true
           description: |-
-            Whether to forward the tool results to the model when available.
-            "true" => always forward
-            "false" => never forward
-            null => forward if applicable (default)
+            Whether to auto-run the tool and send the tool results to the model when available.
+            (default: false for sessions, true for tasks)
 
             If a tool call is made, the tool's output will be sent back to the model as the model's input.
             If a tool call is not made, the model's output will be returned as is.
-          default: null
+          default: false
         metadata:
           type: object
           additionalProperties: {}
@@ -4771,7 +4756,7 @@ components:
         - kind_
         - prompt
         - tools
-        - forward_tool_results
+        - auto_run_tools
       properties:
         kind_:
           type: string
@@ -4877,18 +4862,15 @@ components:
           type: boolean
           description: Whether to unwrap the output of the prompt step, equivalent to `response.choices[0].message.content`
           default: false
-        forward_tool_results:
+        auto_run_tools:
           type: boolean
-          nullable: true
           description: |-
-            Whether to forward the tool results to the model when available.
-            "true" => always forward
-            "false" => never forward
-            null => forward if applicable (default)
+            Whether to auto-run the tool and send the tool results to the model when available.
+            (default: true for prompt steps, false for sessions)
 
             If a tool call is made, the tool's output will be used as the model's input.
             If a tool call is not made, the model's output will be used as the next step's input.
-          default: null
+          default: true
       allOf:
         - type: object
           required:
@@ -4911,7 +4893,7 @@ components:
       required:
         - prompt
         - tools
-        - forward_tool_results
+        - auto_run_tools
       properties:
         prompt:
           anyOf:
@@ -5011,18 +4993,15 @@ components:
           type: boolean
           description: Whether to unwrap the output of the prompt step, equivalent to `response.choices[0].message.content`
           default: false
-        forward_tool_results:
+        auto_run_tools:
           type: boolean
-          nullable: true
           description: |-
-            Whether to forward the tool results to the model when available.
-            "true" => always forward
-            "false" => never forward
-            null => forward if applicable (default)
+            Whether to auto-run the tool and send the tool results to the model when available.
+            (default: true for prompt steps, false for sessions)
 
             If a tool call is made, the tool's output will be used as the model's input.
             If a tool call is not made, the model's output will be used as the next step's input.
-          default: null
+          default: true
       allOf:
         - type: object
           properties:

From dc4d8ea64bedc5c8d9bb899736a4450580e1ca2f Mon Sep 17 00:00:00 2001
From: creatorrr <creatorrr@users.noreply.github.com>
Date: Thu, 31 Oct 2024 19:46:48 +0000
Subject: [PATCH 2/4] refactor: Lint integrations-service (CI)

---
 integrations-service/integrations/models/brave.py             | 3 ++-
 integrations-service/integrations/utils/integrations/brave.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/integrations-service/integrations/models/brave.py b/integrations-service/integrations/models/brave.py
index 2a794beda..dd721d222 100644
--- a/integrations-service/integrations/models/brave.py
+++ b/integrations-service/integrations/models/brave.py
@@ -1,5 +1,6 @@
 from typing import List
-from pydantic import Field, BaseModel
+
+from pydantic import BaseModel, Field
 
 from .base_models import BaseOutput
 
diff --git a/integrations-service/integrations/utils/integrations/brave.py b/integrations-service/integrations/utils/integrations/brave.py
index 20cbd8ab5..549379a82 100644
--- a/integrations-service/integrations/utils/integrations/brave.py
+++ b/integrations-service/integrations/utils/integrations/brave.py
@@ -1,7 +1,8 @@
+import json
+
 from beartype import beartype
 from langchain_community.tools import BraveSearch
 from tenacity import retry, stop_after_attempt, wait_exponential
-import json
 
 from ...autogen.Tools import BraveSearchArguments, BraveSearchSetup
 from ...models import BraveSearchOutput, SearchResult

From d13724bba3ff3885bb729e3a20afcd45be3a4cd4 Mon Sep 17 00:00:00 2001
From: Diwank Singh Tomer <diwank.singh@gmail.com>
Date: Thu, 31 Oct 2024 17:04:13 -0400
Subject: [PATCH 3/4] feat(agents-api): Add python expression support to prompt
 step

Signed-off-by: Diwank Singh Tomer <diwank.singh@gmail.com>
---
 .../agents_api/activities/execute_system.py   |  1 -
 .../activities/task_steps/prompt_step.py      | 45 +++++++++++----
 agents-api/agents_api/autogen/Tools.py        |  1 -
 .../agents_api/common/protocol/tasks.py       | 18 +++++-
 .../workflows/task_execution/__init__.py      | 28 ++--------
 agents-api/tests/test_execution_workflow.py   | 56 +++++++++++++++++++
 typespec/tasks/steps.tsp                      |  2 +-
 7 files changed, 115 insertions(+), 36 deletions(-)

diff --git a/agents-api/agents_api/activities/execute_system.py b/agents-api/agents_api/activities/execute_system.py
index a40e02f7e..abc4f1865 100644
--- a/agents-api/agents_api/activities/execute_system.py
+++ b/agents-api/agents_api/activities/execute_system.py
@@ -15,7 +15,6 @@
     VectorDocSearchRequest,
 )
 from ..autogen.Tools import SystemDef
-from ..common.protocol.developers import Developer
 from ..common.protocol.tasks import StepContext
 from ..common.storage_handler import auto_blob_store
 from ..env import testing
diff --git a/agents-api/agents_api/activities/task_steps/prompt_step.py b/agents-api/agents_api/activities/task_steps/prompt_step.py
index 7a4605ee0..55ca3d140 100644
--- a/agents-api/agents_api/activities/task_steps/prompt_step.py
+++ b/agents-api/agents_api/activities/task_steps/prompt_step.py
@@ -19,6 +19,7 @@
 from ...common.utils.template import render_template
 from ...env import anthropic_api_key, debug
 from ..utils import get_handler
+from .base_evaluate import base_evaluate
 
 COMPUTER_USE_BETA_FLAG = "computer-use-2024-10-22"
 
@@ -77,32 +78,60 @@ def format_tool(tool: Tool) -> dict:
     return formatted
 
 
+EVAL_PROMPT_PREFIX = "$_ "
+
+
 @activity.defn
 @auto_blob_store
 @beartype
 async def prompt_step(context: StepContext) -> StepOutcome:
     # Get context data
     prompt: str | list[dict] = context.current_step.model_dump()["prompt"]
-    context_data: dict = context.model_dump()
+    context_data: dict = context.model_dump(include_remote=True)
 
-    # Render template messages
-    prompt = await render_template(
-        prompt,
-        context_data,
-        skip_vars=["developer_id"],
+    # If the prompt is a string and starts with $_ then we need to evaluate it
+    should_evaluate_prompt = isinstance(prompt, str) and prompt.startswith(
+        EVAL_PROMPT_PREFIX
     )
+
+    if should_evaluate_prompt:
+        prompt = await base_evaluate(
+            prompt[len(EVAL_PROMPT_PREFIX) :].strip(), context_data
+        )
+
+        if not isinstance(prompt, (str, list)):
+            raise ApplicationError(
+                "Invalid prompt expression, expected a string or list"
+            )
+
+    # Wrap the prompt in a list if it is not already
+    prompt = (
+        prompt if isinstance(prompt, list) else [{"role": "user", "content": prompt}]
+    )
+
+    # Render template messages if we didn't evaluate the prompt
+    if not should_evaluate_prompt:
+        # Render template messages
+        prompt = await render_template(
+            prompt,
+            context_data,
+            skip_vars=["developer_id"],
+        )
+
     # Get settings and run llm
     agent_default_settings: dict = (
         context.execution_input.agent.default_settings.model_dump()
         if context.execution_input.agent.default_settings
         else {}
     )
+
     agent_model: str = (
         context.execution_input.agent.model
         if context.execution_input.agent.model
         else "gpt-4o"
     )
 
+    # Get passed settings
     if context.current_step.settings:
         passed_settings: dict = context.current_step.settings.model_dump(
             exclude_unset=True
@@ -110,10 +139,6 @@ async def prompt_step(context: StepContext) -> StepOutcome:
     else:
         passed_settings: dict = {}
 
-    # Wrap the prompt in a list if it is not already
-    if isinstance(prompt, str):
-        prompt = [{"role": "user", "content": prompt}]
-
     # Format tools for litellm
     formatted_tools = [format_tool(tool) for tool in context.tools]
 
diff --git a/agents-api/agents_api/autogen/Tools.py b/agents-api/agents_api/autogen/Tools.py
index 52007fcea..88c4764fe 100644
--- a/agents-api/agents_api/autogen/Tools.py
+++ b/agents-api/agents_api/autogen/Tools.py
@@ -12,7 +12,6 @@
     BaseModel,
     ConfigDict,
     Field,
-    RootModel,
     StrictBool,
 )
 
diff --git a/agents-api/agents_api/common/protocol/tasks.py b/agents-api/agents_api/common/protocol/tasks.py
index 66ffd9632..87fd51b33 100644
--- a/agents-api/agents_api/common/protocol/tasks.py
+++ b/agents-api/agents_api/common/protocol/tasks.py
@@ -1,8 +1,9 @@
-from typing import Annotated, Any
+from typing import Annotated, Any, Literal
 from uuid import UUID
 
 from beartype import beartype
 from temporalio import activity, workflow
+from temporalio.exceptions import ApplicationError
 
 with workflow.unsafe.imports_passed_through():
     from pydantic import BaseModel, Field, computed_field
@@ -23,6 +24,7 @@
         TaskSpecDef,
         TaskToolDef,
         Tool,
+        ToolRef,
         TransitionTarget,
         TransitionType,
         UpdateTaskRequest,
@@ -154,6 +156,20 @@ def tools(self) -> list[Tool | CreateToolRequest]:
         task = execution_input.task
         agent_tools = execution_input.agent_tools
 
+        step_tools: Literal["all"] | list[ToolRef | CreateToolRequest] = getattr(
+            self.current_step, "tools", "all"
+        )
+
+        if step_tools != "all":
+            if not all(
+                tool and isinstance(tool, CreateToolRequest) for tool in step_tools
+            ):
+                raise ApplicationError(
+                    "Invalid tools for step (ToolRef not supported yet)"
+                )
+
+            return step_tools
+
         # Need to convert task.tools (list[TaskToolDef]) to list[Tool]
         task_tools = []
         for tool in task.tools:
diff --git a/agents-api/agents_api/workflows/task_execution/__init__.py b/agents-api/agents_api/workflows/task_execution/__init__.py
index 51318248e..de3c1189e 100644
--- a/agents-api/agents_api/workflows/task_execution/__init__.py
+++ b/agents-api/agents_api/workflows/task_execution/__init__.py
@@ -202,6 +202,9 @@ async def run(
             retry_policy=DEFAULT_RETRY_POLICY,
         )
 
+        # Init state
+        state = None
+
         match context.current_step, outcome:
             # Handle errors (activity returns None)
             case step, StepOutcome(error=error) if error is not None:
@@ -371,22 +374,9 @@ async def run(
 
                 state = PartialTransition(type="resume", output=result)
 
-            case PromptStep(unwrap=True), StepOutcome(output=response):
-                finish_reason = response["choices"][0]["finish_reason"]
-                if finish_reason == "tool_calls":
-                    workflow.logger.error(
-                        "Prompt step: Tool calls not supported in unwrap mode"
-                    )
-
-                    state = PartialTransition(
-                        type="error", output="Tool calls not supported in unwrap mode"
-                    )
-                    await transition(context, state)
-
-                    raise ApplicationError("Tool calls not supported in unwrap mode")
-
-                workflow.logger.debug(f"Prompt step: Received response: {response}")
-                state = PartialTransition(output=response)
+            case PromptStep(unwrap=True), StepOutcome(output=message):
+                workflow.logger.debug(f"Prompt step: Received response: {message}")
+                state = PartialTransition(output=message)
 
             case PromptStep(auto_run_tools=False, unwrap=False), StepOutcome(
                 output=response
@@ -493,12 +483,6 @@ async def run(
                 )
                 state = PartialTransition(output=response)
 
-            case SetStep(), StepOutcome(output=evaluated_output):
-                workflow.logger.info("Set step: Updating user state")
-
-            case SetStep(), StepOutcome(output=evaluated_output):
-                workflow.logger.info("Set step: Updating user state")
-
             case SetStep(), StepOutcome(output=evaluated_output):
                 workflow.logger.info("Set step: Updating user state")
 
diff --git a/agents-api/tests/test_execution_workflow.py b/agents-api/tests/test_execution_workflow.py
index d41aa4a6d..dbfa2f8bd 100644
--- a/agents-api/tests/test_execution_workflow.py
+++ b/agents-api/tests/test_execution_workflow.py
@@ -1133,6 +1133,62 @@ async def _(
             ]
 
 
+@test("workflow: prompt step (python expression)")
+async def _(
+    client=cozo_client,
+    developer_id=test_developer_id,
+    agent=test_agent,
+):
+    mock_model_response = ModelResponse(
+        id="fake_id",
+        choices=[Choices(message={"role": "assistant", "content": "Hello, world!"})],
+        created=0,
+        object="text_completion",
+    )
+
+    with patch("agents_api.clients.litellm.acompletion") as acompletion:
+        acompletion.return_value = mock_model_response
+        data = CreateExecutionRequest(input={"test": "input"})
+
+        task = create_task(
+            developer_id=developer_id,
+            agent_id=agent.id,
+            data=CreateTaskRequest(
+                **{
+                    "name": "test task",
+                    "description": "test task about",
+                    "input_schema": {"type": "object", "additionalProperties": True},
+                    "main": [
+                        {
+                            "prompt": "$_ [{'role': 'user', 'content': _.test}]",
+                            "settings": {},
+                        },
+                    ],
+                }
+            ),
+            client=client,
+        )
+
+        async with patch_testing_temporal() as (_, mock_run_task_execution_workflow):
+            execution, handle = await start_execution(
+                developer_id=developer_id,
+                task_id=task.id,
+                data=data,
+                client=client,
+            )
+
+            assert handle is not None
+            assert execution.task_id == task.id
+            assert execution.input == data.input
+
+            mock_run_task_execution_workflow.assert_called_once()
+
+            result = await handle.result()
+            result = result["choices"][0]["message"]
+            assert result["content"] == "Hello, world!"
+            assert result["role"] == "assistant"
+
+
 @test("workflow: prompt step")
 async def _(
     client=cozo_client,
diff --git a/typespec/tasks/steps.tsp b/typespec/tasks/steps.tsp
index 5de975247..943dfbc7c 100644
--- a/typespec/tasks/steps.tsp
+++ b/typespec/tasks/steps.tsp
@@ -103,7 +103,7 @@ model PromptStepDef {
     prompt: JinjaTemplate | InputChatMLMessage<JinjaTemplate>[];
 
     /** The tools to use for the prompt */
-    tools: "all" | (ToolRef | CreateToolRequest)[] = #[];
+    tools: "all" | (ToolRef | CreateToolRequest)[] = "all";
 
     /** The tool choice for the prompt */
     tool_choice?: ToolChoiceOption;

From d768c7f3d1ece4735a727a4ae9c3ba0523971f57 Mon Sep 17 00:00:00 2001
From: Diwank Singh Tomer <diwank.singh@gmail.com>
Date: Thu, 31 Oct 2024 17:05:06 -0400
Subject: [PATCH 4/4] feat(agents-api): Default prompt_step.tools = 'all'

Signed-off-by: Diwank Singh Tomer <diwank.singh@gmail.com>
---
 agents-api/agents_api/autogen/Tasks.py                    | 4 ++--
 integrations-service/integrations/autogen/Tasks.py        | 4 ++--
 typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/agents-api/agents_api/autogen/Tasks.py b/agents-api/agents_api/autogen/Tasks.py
index 5fb285ed5..41e9231cc 100644
--- a/agents-api/agents_api/autogen/Tasks.py
+++ b/agents-api/agents_api/autogen/Tasks.py
@@ -686,7 +686,7 @@ class PromptStep(BaseModel):
     """
     The prompt to run
     """
-    tools: Literal["all"] | list[ToolRef | CreateToolRequest] = []
+    tools: Literal["all"] | list[ToolRef | CreateToolRequest] = "all"
     """
     The tools to use for the prompt
     """
@@ -728,7 +728,7 @@ class PromptStepUpdateItem(BaseModel):
     """
     The prompt to run
     """
-    tools: Literal["all"] | list[ToolRefUpdateItem | CreateToolRequest] = []
+    tools: Literal["all"] | list[ToolRefUpdateItem | CreateToolRequest] = "all"
     """
     The tools to use for the prompt
     """
diff --git a/integrations-service/integrations/autogen/Tasks.py b/integrations-service/integrations/autogen/Tasks.py
index 5fb285ed5..41e9231cc 100644
--- a/integrations-service/integrations/autogen/Tasks.py
+++ b/integrations-service/integrations/autogen/Tasks.py
@@ -686,7 +686,7 @@ class PromptStep(BaseModel):
     """
     The prompt to run
     """
-    tools: Literal["all"] | list[ToolRef | CreateToolRequest] = []
+    tools: Literal["all"] | list[ToolRef | CreateToolRequest] = "all"
     """
     The tools to use for the prompt
     """
@@ -728,7 +728,7 @@ class PromptStepUpdateItem(BaseModel):
     """
     The prompt to run
     """
-    tools: Literal["all"] | list[ToolRefUpdateItem | CreateToolRequest] = []
+    tools: Literal["all"] | list[ToolRefUpdateItem | CreateToolRequest] = "all"
     """
     The tools to use for the prompt
     """
diff --git a/typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml b/typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml
index 97dfbf141..8d9b7b541 100644
--- a/typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml
+++ b/typespec/tsp-output/@typespec/openapi3/openapi-1.0.0.yaml
@@ -4845,7 +4845,7 @@ components:
                   - $ref: '#/components/schemas/Tasks.ToolRef'
                   - $ref: '#/components/schemas/Tools.CreateToolRequest'
           description: The tools to use for the prompt
-          default: []
+          default: all
         tool_choice:
           anyOf:
             - type: string
@@ -4976,7 +4976,7 @@ components:
                   - $ref: '#/components/schemas/Tasks.ToolRefUpdateItem'
                   - $ref: '#/components/schemas/Tools.CreateToolRequest'
           description: The tools to use for the prompt
-          default: []
+          default: all
         tool_choice:
           anyOf:
             - type: string