google · DonggeLiu · Nov 11, 2024 · Nov 10, 2024
diff --git a/agent/base_agent.py b/agent/base_agent.py
@@ -68,6 +68,7 @@ def _filter_code(self, raw_code_block: str) -> str:
     return filtered_code_block
 
   def _format_bash_execution_result(self, process: sp.CompletedProcess) -> str:
+    """Formats a prompt based on bash execution result."""
     stdout = self.llm.truncate_prompt(process.stdout)
     # TODO(dongge) Share input limit evenly if both stdout and stderr overlong.
     stderr = self.llm.truncate_prompt(process.stderr, stdout)
@@ -76,18 +77,16 @@ def _format_bash_execution_result(self, process: sp.CompletedProcess) -> str:
             f'<stdout>\n{stdout}\n</stdout>\n'
             f'<stderr>\n{stderr}\n</stderr>\n')
 
-  def _container_handle_bash_command(self, cur_round: int, response: str,
+  def _container_handle_bash_command(self, command: str,
                                      tool: BaseTool) -> Prompt:
-    """Handles the command from LLM with container tool."""
-    command = self._parse_tag(response, 'bash')
-    if command:
-      prompt_text = self._format_bash_execution_result(tool.execute(command))
-    else:
-      logger.warning('ROUND %02d No BASH command from LLM response: %s',
-                     cur_round, response)
-      prompt_text = ('No bash command received, Please follow the '
-                     'interaction protocols:\n'
-                     f'{tool.tutorial()}')
+    """Handles the command from LLM with container |tool|."""
+    prompt_text = self._format_bash_execution_result(tool.execute(command))
+    return DefaultTemplateBuilder(self.llm, None, initial=prompt_text).build([])
+
+  def _container_handle_invalid_tool_usage(self, tool: BaseTool) -> Prompt:
+    """Formats a prompt to re-teach LLM how to use the |tool|."""
+    prompt_text = (f'No valid instruction received, Please follow the '
+                   f'interaction protocols:\n{tool.tutorial()}')
     return DefaultTemplateBuilder(self.llm, None, initial=prompt_text).build([])
 
   def _sleep_random_duration(self, min_sec: int = 1, max_sec: int = 60) -> None:

diff --git a/agent/prototyper.py b/agent/prototyper.py
@@ -221,11 +221,17 @@ def _container_handle_conclusion(
   def _container_tool_reaction(self, cur_round: int, response: str,
                                build_result: BuildResult) -> Optional[Prompt]:
     """Validates LLM conclusion or executes its command."""
+    # Prioritize Bash instructions.
+    if command := self._parse_tag(response, 'bash'):
+      return self._container_handle_bash_command(command, self.inspect_tool)
+
     if self._parse_tag(response, 'conclusion'):
       return self._container_handle_conclusion(cur_round, response,
                                                build_result)
-    return self._container_handle_bash_command(cur_round, response,
-                                               self.inspect_tool)
+    # Other responses are invalid.
+    logger.warning('ROUND %02d Invalid response from LLM: %s', cur_round,
+                   response)
+    return self._container_handle_invalid_tool_usage(self.inspect_tool)
 
   def execute(self, result_history: list[Result]) -> BuildResult:
     """Executes the agent based on previous result."""