From d36a044b2ecdb231fee9c8e04cc432c8261555b4 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Tue, 6 Aug 2024 21:00:32 -0700 Subject: [PATCH 01/11] Initial implementation of human handoff --- core/agents/bug_hunter.py | 149 +++++++++++++++--- core/agents/orchestrator.py | 3 + core/db/models/project_state.py | 1 + core/prompts/bug-hunter/ask_a_question.prompt | 4 + .../prompts/bug-hunter/data_about_logs.prompt | 6 + .../bug-hunter/problem_explanation.prompt | 11 ++ core/prompts/bug-hunter/tell_me_more.prompt | 1 + core/ui/base.py | 12 ++ core/ui/console.py | 6 + core/ui/ipc_client.py | 10 ++ core/ui/virtual.py | 6 + 11 files changed, 189 insertions(+), 20 deletions(-) create mode 100644 core/prompts/bug-hunter/ask_a_question.prompt create mode 100644 core/prompts/bug-hunter/data_about_logs.prompt create mode 100644 core/prompts/bug-hunter/problem_explanation.prompt create mode 100644 core/prompts/bug-hunter/tell_me_more.prompt diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index 30df597af..5ca3ce0f2 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -30,6 +30,18 @@ class HuntConclusionOptions(BaseModel): ) +class ImportantLog(BaseModel): + logCode: str = Field(description="Actual line of code that prints the log.") + filePath: str = Field(description="Path to the file in which the log exists.") + currentOutput: str = Field(description="Current output of the log.") + expectedOutput: str = Field(description="Expected output of the log.") + explanation: str = Field(description="A brief explanation of the log.") + + +class ImportantLogsForDebugging(BaseModel): + logs: list[ImportantLog] = Field(description="Important logs that will help the human debug the current bug.") + + class BugHunter(BaseAgent): agent_type = "bug-hunter" display_name = "Bug Hunter" @@ -46,6 +58,8 @@ async def run(self) -> AgentResponse: return await self.ask_user_to_test(False, True) elif current_iteration["status"] == IterationStatus.AWAITING_BUG_REPRODUCTION: return await self.ask_user_to_test(True, False) + elif current_iteration["status"] == IterationStatus.START_PAIR_PROGRAMMING: + return await self.start_pair_programming() async def get_bug_reproduction_instructions(self): llm = self.get_llm() @@ -62,23 +76,7 @@ async def get_bug_reproduction_instructions(self): async def check_logs(self, logs_message: str = None): llm = self.get_llm(CHECK_LOGS_AGENT_NAME) - convo = AgentConvo(self).template( - "iteration", - current_task=self.current_state.current_task, - user_feedback=self.current_state.current_iteration["user_feedback"], - user_feedback_qa=self.current_state.current_iteration["user_feedback_qa"], - docs=self.current_state.docs, - magic_words=magic_words, - next_solution_to_try=None, - ) - - for hunting_cycle in self.current_state.current_iteration.get("bug_hunting_cycles", []): - convo = convo.assistant(hunting_cycle["human_readable_instructions"]).template( - "log_data", - backend_logs=hunting_cycle["backend_logs"], - frontend_logs=hunting_cycle["frontend_logs"], - fix_attempted=hunting_cycle["fix_attempted"], - ) + convo = self.generate_iteration_convo_so_far() human_readable_instructions = await llm(convo, temperature=0.5) @@ -121,14 +119,22 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti + self.current_state.current_iteration["bug_reproduction_description"] ) + buttons = {} + + last_iteration = self.current_state.iterations[-1] if len(self.current_state.iterations) >= 3 else None + if last_iteration: + buttons["loop"] = "I'm stuck in a loop" + if self.current_state.run_command: await self.ui.send_run_command(self.current_state.run_command) if awaiting_user_test: + buttons["yes"] = "Yes, the issue is fixed" + buttons["no"] = "No" user_feedback = await self.ask_question( "Is the bug you reported fixed now?", - buttons={"yes": "Yes, the issue is fixed", "no": "No"}, - default="continue", + buttons=buttons, + default="yes", buttons_only=True, hint="Instructions for testing:\n\n" + self.current_state.current_iteration["bug_reproduction_description"], @@ -137,14 +143,18 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti if user_feedback.button == "yes": self.next_state.complete_iteration() + elif user_feedback.button == "loop": + self.next_state.current_iteration["status"] = IterationStatus.START_PAIR_PROGRAMMING else: awaiting_bug_reproduction = True if awaiting_bug_reproduction: # TODO how can we get FE and BE logs automatically? + buttons["continue"] = "Continue" + buttons["done"] = "Bug is fixed" backend_logs = await self.ask_question( "Please do exactly what you did in the last iteration, paste **BACKEND** logs here and click CONTINUE.", - buttons={"continue": "Continue", "done": "Bug is fixed"}, + buttons=buttons, default="continue", hint="Instructions for testing:\n\n" + self.current_state.current_iteration["bug_reproduction_description"], @@ -152,6 +162,8 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti if backend_logs.button == "done": self.next_state.complete_iteration() + elif backend_logs.button == "loop": + self.next_state.current_iteration["status"] = IterationStatus.START_PAIR_PROGRAMMING else: frontend_logs = await self.ask_question( "Please paste **frontend** logs here and click CONTINUE.", @@ -170,3 +182,100 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti self.next_state.complete_iteration() return AgentResponse.done(self) + + async def start_pair_programming(self): + llm = self.get_llm() + convo = self.generate_iteration_convo_so_far(True) + # TODO: structure this output better + convo = convo.template("problem_explanation") + initial_explanation = await llm(convo, temperature=0.5) + + convo = convo.template("data_about_logs").require_schema(ImportantLogsForDebugging) + + data_about_logs = await llm(convo, parser=JSONParser(ImportantLogsForDebugging), temperature=0.5) + + await self.ui.send_data_about_logs(data_about_logs) + + while True: + self.next_state.current_iteration["initial_explanation"] = initial_explanation + next_step = await self.ask_question( + "How do you want to approach this?", + buttons={ + "question": "I have a question", + "done": "I fixed the bug myself", + "tell_me_more": "Tell me more about the bug", + "additional_user_info": "I have a hint for Pythagora", + "solution_tip": "I think I know where the problem is", + "other": "Other", + }, + default="continue", + hint="Instructions for testing:\n\n" + + self.current_state.current_iteration["bug_reproduction_description"], + ) + + # TODO: remove when Leon checks + convo.remove_last_x_messages(2) + + if len(convo.messages) > 10: + convo.trim(1, 2) + + if next_step.button == "done": + self.next_state.complete_iteration() + elif next_step.button == "question": + # TODO: in the future improve with a separate conversation and autonomous parsing of user info + user_response = await self.ask_question("Oh, cool, what would you like to know?") + convo = convo.template("ask_a_question", question=user_response.text) + llm_answer = await llm(convo, temperature=0.5) + await self.send_message(llm_answer) + elif next_step.button == "tell_me_more": + convo.template("tell_me_more") + response = await llm(convo, temperature=0.5) + await self.send_message(response) + elif next_step.button == "other": + # this is the same as "question" - we want to keep an option for users to click to understand if we're missing something with other options + user_response = await self.ask_question("Let me know what you think...") + convo = convo.template("ask_a_question", question=user_response.text) + llm_answer = await llm(convo, temperature=0.5) + await self.send_message(llm_answer) + elif next_step.button in ["additional_user_info", "solution_tip"]: + user_response = await self.ask_question("Oh, cool, what would you like to know?") + await self.continue_on(convo, next_step.button, user_response) + elif next_step.button == "tell_me_more": + convo.template("tell_me_more") + response = await llm(convo, temperature=0.5) + await self.send_message(response) + continue + + # TODO: send telemetry so we know what do users mostly click here! + return AgentResponse.done(self) + + def generate_iteration_convo_so_far(self, omit_last_cycle=False): + convo = AgentConvo(self).template( + "iteration", + current_task=self.current_state.current_task, + user_feedback=self.current_state.current_iteration["user_feedback"], + user_feedback_qa=self.current_state.current_iteration["user_feedback_qa"], + docs=self.current_state.docs, + magic_words=magic_words, + next_solution_to_try=None, + ) + + hunting_cycles = self.current_state.current_iteration.get("bug_hunting_cycles", [])[ + 0 : (-1 if omit_last_cycle else None) + ] + + for hunting_cycle in hunting_cycles: + convo = convo.assistant(hunting_cycle["human_readable_instructions"]).template( + "log_data", + backend_logs=hunting_cycle["backend_logs"], + frontend_logs=hunting_cycle["frontend_logs"], + fix_attempted=hunting_cycle["fix_attempted"], + ) + + return convo + + async def continue_on(self, convo, button_value, user_response): + llm = self.get_llm() + convo = convo.template("continue_on") + continue_on = await llm(convo, temperature=0.5) + return continue_on diff --git a/core/agents/orchestrator.py b/core/agents/orchestrator.py index 2b6bdc439..22570b1e3 100644 --- a/core/agents/orchestrator.py +++ b/core/agents/orchestrator.py @@ -233,6 +233,9 @@ def create_agent(self, prev_response: Optional[AgentResponse]) -> BaseAgent: if current_iteration_status == IterationStatus.HUNTING_FOR_BUG: # Triggering the bug hunter to start the hunt return BugHunter(self.state_manager, self.ui) + elif current_iteration_status == IterationStatus.START_PAIR_PROGRAMMING: + # Pythagora cannot solve the issue so we're starting pair programming + return BugHunter(self.state_manager, self.ui) elif current_iteration_status == IterationStatus.AWAITING_LOGGING: # Get the developer to implement logs needed for debugging return Developer(self.state_manager, self.ui) diff --git a/core/db/models/project_state.py b/core/db/models/project_state.py index 67ad685ad..663cc09ca 100644 --- a/core/db/models/project_state.py +++ b/core/db/models/project_state.py @@ -42,6 +42,7 @@ class IterationStatus: FIND_SOLUTION = "find_solution" PROBLEM_SOLVER = "problem_solver" NEW_FEATURE_REQUESTED = "new_feature_requested" + START_PAIR_PROGRAMMING = "start_pair_programming" DONE = "done" diff --git a/core/prompts/bug-hunter/ask_a_question.prompt b/core/prompts/bug-hunter/ask_a_question.prompt new file mode 100644 index 000000000..348ee3e5d --- /dev/null +++ b/core/prompts/bug-hunter/ask_a_question.prompt @@ -0,0 +1,4 @@ +The developer wants to ask you a question. Here is the question: +{{question}} + +Please answer and refer to all the files in the repository and everything we've talked about so far but do not form your answer in any way that was asked for before, just answer the question as if you're talking to a colleague. diff --git a/core/prompts/bug-hunter/data_about_logs.prompt b/core/prompts/bug-hunter/data_about_logs.prompt new file mode 100644 index 000000000..a37a5a9f9 --- /dev/null +++ b/core/prompts/bug-hunter/data_about_logs.prompt @@ -0,0 +1,6 @@ +Tell me the most important logs that are relevant for this issue. For each log, tell me the the following: + 1. line in the code (eg. `print(...)`, `console.log(...)`, etc.) that generated the log + 2. what file is the log in (eg. `index.js`, `app.js`, etc. - don't put the full path but only the file name) + 2. the current output of that log (make sure not to put the entire log output but maximum 5-10 lines of the output) + 3. the expected output of that log (also make sure to put maximum of 5-10 lines of the output) + 4. a brief explanation of why the output is incorrect and what should be different here (use maximum 2-3 sentences) diff --git a/core/prompts/bug-hunter/problem_explanation.prompt b/core/prompts/bug-hunter/problem_explanation.prompt new file mode 100644 index 000000000..e327a5dbf --- /dev/null +++ b/core/prompts/bug-hunter/problem_explanation.prompt @@ -0,0 +1,11 @@ +This also didn't help to solve the issue so we can conclude that you are unable to solve this problem yourself so I got a human here who will help you out. + +First, focus on the problem you're facing and explain it to the human. Explain what is the issue that you're working in, specify logs that are indicative of the problem and the logs should be different, how should they be different and how will that solve the problem. + + +Log that is indicative of the problem: + - how the logs look right now + - how the logs should look like + +What did you try to solve the problem? + diff --git a/core/prompts/bug-hunter/tell_me_more.prompt b/core/prompts/bug-hunter/tell_me_more.prompt new file mode 100644 index 000000000..5a25ff0fd --- /dev/null +++ b/core/prompts/bug-hunter/tell_me_more.prompt @@ -0,0 +1 @@ +Please tell me more about the problem we're working on and don't repeat things you said before but tell me something I don't know. diff --git a/core/ui/base.py b/core/ui/base.py index 0cb8dbdd2..ead320658 100644 --- a/core/ui/base.py +++ b/core/ui/base.py @@ -231,6 +231,18 @@ async def send_step_progress( """ raise NotImplementedError() + # TODO: fix!!! + async def send_data_about_logs( + self, + data_about_logs: dict, + ): + """ + Send the data about debugging logs. + + :param data_about_logs: Data about logs. + """ + raise NotImplementedError() + async def send_run_command(self, run_command: str): """ Send a run command to the UI. diff --git a/core/ui/console.py b/core/ui/console.py index ed3128120..876bb903c 100644 --- a/core/ui/console.py +++ b/core/ui/console.py @@ -118,6 +118,12 @@ async def send_step_progress( ): pass + async def send_data_about_logs( + self, + data_about_logs: dict, + ): + pass + async def send_run_command(self, run_command: str): pass diff --git a/core/ui/ipc_client.py b/core/ui/ipc_client.py index a3d09b9ac..e2e74eace 100644 --- a/core/ui/ipc_client.py +++ b/core/ui/ipc_client.py @@ -29,6 +29,7 @@ class MessageType(str, Enum): USER_INPUT_REQUEST = "user_input_request" INFO = "info" PROGRESS = "progress" + DEBUGGING_LOGS = "debugging_logs" RUN_COMMAND = "run_command" OPEN_FILE = "openFile" PROJECT_FOLDER_NAME = "project_folder_name" @@ -331,6 +332,15 @@ async def send_step_progress( }, ) + async def send_data_about_logs( + self, + data_about_logs: dict, + ): + await self._send( + MessageType.DEBUGGING_LOGS, + content=data_about_logs, + ) + async def send_run_command(self, run_command: str): await self._send( MessageType.RUN_COMMAND, diff --git a/core/ui/virtual.py b/core/ui/virtual.py index 0d07a58fc..7ca5ddac9 100644 --- a/core/ui/virtual.py +++ b/core/ui/virtual.py @@ -111,6 +111,12 @@ async def send_step_progress( ): pass + async def send_data_about_logs( + self, + data_about_logs: dict, + ): + pass + async def send_run_command(self, run_command: str): pass From 6f315f53424bc1011e53c745f80995a97262bdf2 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Tue, 6 Aug 2024 21:01:32 -0700 Subject: [PATCH 02/11] added two supporting functions to the convo class --- core/agents/convo.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/core/agents/convo.py b/core/agents/convo.py index 0eb58f9d8..b833c4656 100644 --- a/core/agents/convo.py +++ b/core/agents/convo.py @@ -88,6 +88,22 @@ def fork(self) -> "AgentConvo": child.prompt_log = deepcopy(self.prompt_log) return child + def trim(self, trim_index: int, trim_count: int) -> "AgentConvo": + """ + Trim the conversation starting from the given index by 1 message. + :param trim_index: + :return: + """ + self.messages = self.messages[:trim_index] + self.messages[trim_index + trim_count :] + return self + + def remove_last_x_messages(self, count: int) -> "AgentConvo": + """ + Remove the last `count` messages from the conversation. + """ + self.messages = self.messages[:-count] + return self + def require_schema(self, model: BaseModel) -> "AgentConvo": def remove_defs(d): if isinstance(d, dict): From 99792ac43c762b7e39e80b227b449e63cac9d81c Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Wed, 7 Aug 2024 07:30:23 -0700 Subject: [PATCH 03/11] Fix --- core/agents/bug_hunter.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index 5ca3ce0f2..e841ff17f 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -194,7 +194,20 @@ async def start_pair_programming(self): data_about_logs = await llm(convo, parser=JSONParser(ImportantLogsForDebugging), temperature=0.5) - await self.ui.send_data_about_logs(data_about_logs) + await self.ui.send_data_about_logs( + { + "logs": [ + { + "currentLog": d.currentOutput, + "expectedLog": d.expectedOutput, + "explanation": d.explanation, + "filePath": d.filePath, + "logCode": d.logCode, + } + for d in data_about_logs.logs + ] + } + ) while True: self.next_state.current_iteration["initial_explanation"] = initial_explanation From 225457df83ac4a641e2ad228f87496d523d237f5 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Thu, 8 Aug 2024 16:38:12 -0700 Subject: [PATCH 04/11] Added a new field for the extension shouldBeDifferent and prompted the LLM to return full paths --- core/agents/bug_hunter.py | 5 ++++- core/prompts/bug-hunter/data_about_logs.prompt | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index e841ff17f..8c4dcb277 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -32,6 +32,9 @@ class HuntConclusionOptions(BaseModel): class ImportantLog(BaseModel): logCode: str = Field(description="Actual line of code that prints the log.") + shouldBeDifferent: bool = Field( + description="Whether the current output should be different from the expected output." + ) filePath: str = Field(description="Path to the file in which the log exists.") currentOutput: str = Field(description="Current output of the log.") expectedOutput: str = Field(description="Expected output of the log.") @@ -77,7 +80,6 @@ async def get_bug_reproduction_instructions(self): async def check_logs(self, logs_message: str = None): llm = self.get_llm(CHECK_LOGS_AGENT_NAME) convo = self.generate_iteration_convo_so_far() - human_readable_instructions = await llm(convo, temperature=0.5) convo = ( @@ -203,6 +205,7 @@ async def start_pair_programming(self): "explanation": d.explanation, "filePath": d.filePath, "logCode": d.logCode, + "shouldBeDifferent": d.shouldBeDifferent, } for d in data_about_logs.logs ] diff --git a/core/prompts/bug-hunter/data_about_logs.prompt b/core/prompts/bug-hunter/data_about_logs.prompt index a37a5a9f9..630b2fc89 100644 --- a/core/prompts/bug-hunter/data_about_logs.prompt +++ b/core/prompts/bug-hunter/data_about_logs.prompt @@ -1,6 +1,7 @@ Tell me the most important logs that are relevant for this issue. For each log, tell me the the following: 1. line in the code (eg. `print(...)`, `console.log(...)`, etc.) that generated the log - 2. what file is the log in (eg. `index.js`, `app.js`, etc. - don't put the full path but only the file name) + 2. what file is the log in (eg. `index.js`, `routes/users.js`, etc. - make sure to put the entire path like listed above) 2. the current output of that log (make sure not to put the entire log output but maximum 5-10 lines of the output) 3. the expected output of that log (also make sure to put maximum of 5-10 lines of the output) - 4. a brief explanation of why the output is incorrect and what should be different here (use maximum 2-3 sentences) + 4. should the log be different from the current output or are the current and expected output the same + 5. a brief explanation of why the output is incorrect and what should be different here (use maximum 2-3 sentences) From 3fcbd6d73a0e790f6967e640309a2bf4548da6e4 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Thu, 8 Aug 2024 16:40:52 -0700 Subject: [PATCH 05/11] Small refactor so we can reuse this function for pair programming --- core/agents/bug_hunter.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index 8c4dcb277..8586c7c52 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -93,23 +93,13 @@ async def check_logs(self, logs_message: str = None): llm = self.get_llm() hunt_conclusion = await llm(convo, parser=JSONParser(HuntConclusionOptions), temperature=0) - self.next_state.current_iteration["description"] = human_readable_instructions - self.next_state.current_iteration["bug_hunting_cycles"] += [ - { - "human_readable_instructions": human_readable_instructions, - "fix_attempted": any( - c["fix_attempted"] for c in self.current_state.current_iteration["bug_hunting_cycles"] - ), - } - ] - if hunt_conclusion.conclusion == magic_words.PROBLEM_IDENTIFIED: # if no need for logs, implement iteration same as before - self.next_state.current_iteration["status"] = IterationStatus.AWAITING_BUG_FIX + self.set_data_for_next_hunting_cycle(human_readable_instructions, IterationStatus.AWAITING_BUG_FIX) await self.send_message("The bug is found - I'm attempting to fix it.") else: # if logs are needed, add logging steps - self.next_state.current_iteration["status"] = IterationStatus.AWAITING_LOGGING + self.set_data_for_next_hunting_cycle(human_readable_instructions, IterationStatus.AWAITING_LOGGING) await self.send_message("Adding more logs to identify the bug.") self.next_state.flag_iterations_as_modified() @@ -290,6 +280,19 @@ def generate_iteration_convo_so_far(self, omit_last_cycle=False): return convo + def set_data_for_next_hunting_cycle(self, human_readable_instructions, new_status): + self.next_state.current_iteration["description"] = human_readable_instructions + self.next_state.current_iteration["bug_hunting_cycles"] += [ + { + "human_readable_instructions": human_readable_instructions, + "fix_attempted": any( + c["fix_attempted"] for c in self.current_state.current_iteration["bug_hunting_cycles"] + ), + } + ] + + self.next_state.current_iteration["status"] = new_status + async def continue_on(self, convo, button_value, user_response): llm = self.get_llm() convo = convo.template("continue_on") From 3fa9d83904d929da42d1efac10f689c8a8364cc4 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Thu, 8 Aug 2024 16:41:43 -0700 Subject: [PATCH 06/11] Renaming I'm stuck in a loop to Start pair programming + log rename --- core/agents/bug_hunter.py | 2 +- core/agents/developer.py | 2 +- core/agents/troubleshooter.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index 8586c7c52..741006137 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -115,7 +115,7 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti last_iteration = self.current_state.iterations[-1] if len(self.current_state.iterations) >= 3 else None if last_iteration: - buttons["loop"] = "I'm stuck in a loop" + buttons["loop"] = "Start Pair Programming" if self.current_state.run_command: await self.ui.send_run_command(self.current_state.run_command) diff --git a/core/agents/developer.py b/core/agents/developer.py index 02f596c3d..ac73864b4 100644 --- a/core/agents/developer.py +++ b/core/agents/developer.py @@ -119,7 +119,7 @@ async def breakdown_current_iteration(self, task_review_feedback: Optional[str] source = "bug_hunt" n_tasks = len(self.next_state.iterations) log.debug(f"Breaking down the logging cycle {description}") - await self.send_message("Breaking down the current iteration logging cycle ...") + await self.send_message("Breaking down the current bug hunting cycle ...") else: iteration = self.current_state.current_iteration current_task["task_review_feedback"] = None diff --git a/core/agents/troubleshooter.py b/core/agents/troubleshooter.py index 497eb7805..fe8fce542 100644 --- a/core/agents/troubleshooter.py +++ b/core/agents/troubleshooter.py @@ -216,7 +216,7 @@ async def get_user_feedback( If "is_loop" is True, Pythagora is stuck in a loop and needs to consider alternative solutions. The last element in the tuple is the user feedback, which may be empty if the user provided no - feedback (eg. if they just clicked on "Continue" or "I'm stuck in a loop"). + feedback (eg. if they just clicked on "Continue" or "Start Pair Programming"). """ bug_report = None @@ -233,7 +233,7 @@ async def get_user_feedback( buttons = {"continue": "Everything works", "change": "I want to make a change", "bug": "There is an issue"} if last_iteration: - buttons["loop"] = "I'm stuck in a loop" + buttons["loop"] = "Start Pair Programming" user_response = await self.ask_question( test_message, buttons=buttons, default="continue", buttons_only=True, hint=hint From c7b2afe446c65e40361fa207746a356138afb846 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Thu, 8 Aug 2024 16:42:25 -0700 Subject: [PATCH 07/11] Finished the initial implementation for the human handoff --- core/agents/bug_hunter.py | 38 ++++++++++++++----- .../bug-hunter/problem_explanation.prompt | 9 +---- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index 741006137..efe999178 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -178,7 +178,7 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti async def start_pair_programming(self): llm = self.get_llm() convo = self.generate_iteration_convo_so_far(True) - # TODO: structure this output better + convo.remove_last_x_messages(1) convo = convo.template("problem_explanation") initial_explanation = await llm(convo, temperature=0.5) @@ -205,15 +205,15 @@ async def start_pair_programming(self): while True: self.next_state.current_iteration["initial_explanation"] = initial_explanation next_step = await self.ask_question( - "How do you want to approach this?", + "What do you want to do?", buttons={ "question": "I have a question", "done": "I fixed the bug myself", "tell_me_more": "Tell me more about the bug", - "additional_user_info": "I have a hint for Pythagora", - "solution_tip": "I think I know where the problem is", + "solution_hint": "I think I know where the problem is", "other": "Other", }, + buttons_only=True, default="continue", hint="Instructions for testing:\n\n" + self.current_state.current_iteration["bug_reproduction_description"], @@ -225,10 +225,11 @@ async def start_pair_programming(self): if len(convo.messages) > 10: convo.trim(1, 2) + # TODO: in the future improve with a separate conversation that parses the user info and goes into an appropriate if statement if next_step.button == "done": self.next_state.complete_iteration() + break elif next_step.button == "question": - # TODO: in the future improve with a separate conversation and autonomous parsing of user info user_response = await self.ask_question("Oh, cool, what would you like to know?") convo = convo.template("ask_a_question", question=user_response.text) llm_answer = await llm(convo, temperature=0.5) @@ -243,17 +244,34 @@ async def start_pair_programming(self): convo = convo.template("ask_a_question", question=user_response.text) llm_answer = await llm(convo, temperature=0.5) await self.send_message(llm_answer) - elif next_step.button in ["additional_user_info", "solution_tip"]: - user_response = await self.ask_question("Oh, cool, what would you like to know?") - await self.continue_on(convo, next_step.button, user_response) + elif next_step.button == "solution_hint": + human_hint_label = "Amazing!!! How do you think we can solve this bug?" + while True: + human_hint = await self.ask_question(human_hint_label) + convo = convo.template("instructions_from_human_hint", human_hint=human_hint.text) + llm = self.get_llm(CHECK_LOGS_AGENT_NAME) + human_readable_instructions = await llm(convo, temperature=0.5) + human_approval = await self.ask_question( + "Can I implement this solution?", buttons={"yes": "Yes", "no": "No"}, buttons_only=True + ) + llm = self.get_llm() + if human_approval.button == "yes": + self.set_data_for_next_hunting_cycle( + human_readable_instructions, IterationStatus.AWAITING_BUG_FIX + ) + self.next_state.flag_iterations_as_modified() + break + else: + human_hint_label = "Oh, my bad, what did I misunderstand?" + break elif next_step.button == "tell_me_more": convo.template("tell_me_more") response = await llm(convo, temperature=0.5) await self.send_message(response) continue - # TODO: send telemetry so we know what do users mostly click here! - return AgentResponse.done(self) + # TODO: send telemetry so we know what do users mostly click here! + return AgentResponse.done(self) def generate_iteration_convo_so_far(self, omit_last_cycle=False): convo = AgentConvo(self).template( diff --git a/core/prompts/bug-hunter/problem_explanation.prompt b/core/prompts/bug-hunter/problem_explanation.prompt index e327a5dbf..3064d1d00 100644 --- a/core/prompts/bug-hunter/problem_explanation.prompt +++ b/core/prompts/bug-hunter/problem_explanation.prompt @@ -1,11 +1,4 @@ This also didn't help to solve the issue so we can conclude that you are unable to solve this problem yourself so I got a human here who will help you out. -First, focus on the problem you're facing and explain it to the human. Explain what is the issue that you're working in, specify logs that are indicative of the problem and the logs should be different, how should they be different and how will that solve the problem. - - -Log that is indicative of the problem: - - how the logs look right now - - how the logs should look like - -What did you try to solve the problem? +First, focus on the problem you're facing and explain it to the human. Explain what is the issue that you're working in and what should the human try to do to solve this problem. Is there anything the human can look at that you don't have access to - a database, API response, etc.? If there is something for the human to look at, specify exactly how can the human obtain this information. Keep the answer short and to the point. From 46de1f04d3b37748a5cd23dbd592e529a42c3caf Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Thu, 8 Aug 2024 16:43:05 -0700 Subject: [PATCH 08/11] Adding instructions from human hint prompt --- core/prompts/bug-hunter/instructions_from_human_hint.prompt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 core/prompts/bug-hunter/instructions_from_human_hint.prompt diff --git a/core/prompts/bug-hunter/instructions_from_human_hint.prompt b/core/prompts/bug-hunter/instructions_from_human_hint.prompt new file mode 100644 index 000000000..a2d95cad3 --- /dev/null +++ b/core/prompts/bug-hunter/instructions_from_human_hint.prompt @@ -0,0 +1,6 @@ +The human is sending you a hint about how to solve this bug. Here is what human said: +``` +{{ human_hint }} +``` + +Now, based on this hint, break down exactly what the problem is, what is the solution to this problem and how can we implement this solution so that the bug is fixed. From 40d57203f00552ca8fb1e6f7c2fa4c7b61868c7e Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Thu, 8 Aug 2024 17:09:45 -0700 Subject: [PATCH 09/11] Renamed stuck in a loop to pair programming --- core/agents/bug_hunter.py | 6 +++--- core/agents/troubleshooter.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index efe999178..ab73f4784 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -115,7 +115,7 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti last_iteration = self.current_state.iterations[-1] if len(self.current_state.iterations) >= 3 else None if last_iteration: - buttons["loop"] = "Start Pair Programming" + buttons["start_pair_programming"] = "Start Pair Programming" if self.current_state.run_command: await self.ui.send_run_command(self.current_state.run_command) @@ -135,7 +135,7 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti if user_feedback.button == "yes": self.next_state.complete_iteration() - elif user_feedback.button == "loop": + elif user_feedback.button == "start_pair_programming": self.next_state.current_iteration["status"] = IterationStatus.START_PAIR_PROGRAMMING else: awaiting_bug_reproduction = True @@ -154,7 +154,7 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti if backend_logs.button == "done": self.next_state.complete_iteration() - elif backend_logs.button == "loop": + elif backend_logs.button == "start_pair_programming": self.next_state.current_iteration["status"] = IterationStatus.START_PAIR_PROGRAMMING else: frontend_logs = await self.ask_question( diff --git a/core/agents/troubleshooter.py b/core/agents/troubleshooter.py index fe8fce542..a960688d5 100644 --- a/core/agents/troubleshooter.py +++ b/core/agents/troubleshooter.py @@ -233,7 +233,7 @@ async def get_user_feedback( buttons = {"continue": "Everything works", "change": "I want to make a change", "bug": "There is an issue"} if last_iteration: - buttons["loop"] = "Start Pair Programming" + buttons["start_pair_programming"] = "Start Pair Programming" user_response = await self.ask_question( test_message, buttons=buttons, default="continue", buttons_only=True, hint=hint @@ -241,9 +241,9 @@ async def get_user_feedback( if user_response.button == "continue" or user_response.cancelled: should_iterate = False - elif user_response.button == "loop": + elif user_response.button == "start_pair_programming": await telemetry.trace_code_event( - "stuck-in-loop", + "pair-programming-started", { "clicked": True, "task_index": self.current_state.tasks.index(self.current_state.current_task) + 1, From 358776318da9b845f9c33c742b7bf3f24ec69636 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Fri, 9 Aug 2024 16:47:59 -0700 Subject: [PATCH 10/11] Fix --- core/agents/bug_hunter.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index ab73f4784..c41300484 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -137,6 +137,8 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti self.next_state.complete_iteration() elif user_feedback.button == "start_pair_programming": self.next_state.current_iteration["status"] = IterationStatus.START_PAIR_PROGRAMMING + # TODO: Leon check if this is needed + self.next_state.flag_iterations_as_modified() else: awaiting_bug_reproduction = True @@ -156,6 +158,8 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti self.next_state.complete_iteration() elif backend_logs.button == "start_pair_programming": self.next_state.current_iteration["status"] = IterationStatus.START_PAIR_PROGRAMMING + # TODO: Leon check if this is needed + self.next_state.flag_iterations_as_modified() else: frontend_logs = await self.ask_question( "Please paste **frontend** logs here and click CONTINUE.", @@ -291,9 +295,9 @@ def generate_iteration_convo_so_far(self, omit_last_cycle=False): for hunting_cycle in hunting_cycles: convo = convo.assistant(hunting_cycle["human_readable_instructions"]).template( "log_data", - backend_logs=hunting_cycle["backend_logs"], - frontend_logs=hunting_cycle["frontend_logs"], - fix_attempted=hunting_cycle["fix_attempted"], + backend_logs=hunting_cycle.get("backend_logs"), + frontend_logs=hunting_cycle.get("frontend_logs"), + fix_attempted=hunting_cycle.get("fix_attempted"), ) return convo From 0fab94716328148c30a326e2c94b3020b3d34e0b Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Fri, 9 Aug 2024 17:06:55 -0700 Subject: [PATCH 11/11] Added important_stream message to the core - this tells the extension that the next stream should be visible to the user (unfolded) and currently, render it as a markdown --- core/agents/bug_hunter.py | 6 ++++++ core/ui/base.py | 7 +++++++ core/ui/console.py | 3 +++ core/ui/ipc_client.py | 7 +++++++ core/ui/virtual.py | 3 +++ 5 files changed, 26 insertions(+) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index c41300484..2ce9b431a 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -184,6 +184,7 @@ async def start_pair_programming(self): convo = self.generate_iteration_convo_so_far(True) convo.remove_last_x_messages(1) convo = convo.template("problem_explanation") + await self.ui.start_important_stream() initial_explanation = await llm(convo, temperature=0.5) convo = convo.template("data_about_logs").require_schema(ImportantLogsForDebugging) @@ -236,16 +237,19 @@ async def start_pair_programming(self): elif next_step.button == "question": user_response = await self.ask_question("Oh, cool, what would you like to know?") convo = convo.template("ask_a_question", question=user_response.text) + await self.ui.start_important_stream() llm_answer = await llm(convo, temperature=0.5) await self.send_message(llm_answer) elif next_step.button == "tell_me_more": convo.template("tell_me_more") + await self.ui.start_important_stream() response = await llm(convo, temperature=0.5) await self.send_message(response) elif next_step.button == "other": # this is the same as "question" - we want to keep an option for users to click to understand if we're missing something with other options user_response = await self.ask_question("Let me know what you think...") convo = convo.template("ask_a_question", question=user_response.text) + await self.ui.start_important_stream() llm_answer = await llm(convo, temperature=0.5) await self.send_message(llm_answer) elif next_step.button == "solution_hint": @@ -253,6 +257,7 @@ async def start_pair_programming(self): while True: human_hint = await self.ask_question(human_hint_label) convo = convo.template("instructions_from_human_hint", human_hint=human_hint.text) + await self.ui.start_important_stream() llm = self.get_llm(CHECK_LOGS_AGENT_NAME) human_readable_instructions = await llm(convo, temperature=0.5) human_approval = await self.ask_question( @@ -270,6 +275,7 @@ async def start_pair_programming(self): break elif next_step.button == "tell_me_more": convo.template("tell_me_more") + await self.ui.start_important_stream() response = await llm(convo, temperature=0.5) await self.send_message(response) continue diff --git a/core/ui/base.py b/core/ui/base.py index bc94da40f..a16ef9308 100644 --- a/core/ui/base.py +++ b/core/ui/base.py @@ -268,6 +268,13 @@ async def send_project_root(self, path: str): """ raise NotImplementedError() + async def start_important_stream(self, path: str): + """ + Tell the extension that next stream should be visible and rendered as markdown + + """ + raise NotImplementedError() + async def send_project_stats(self, stats: dict): """ Send project statistics to the UI. diff --git a/core/ui/console.py b/core/ui/console.py index 1d8d442a7..7c381c15c 100644 --- a/core/ui/console.py +++ b/core/ui/console.py @@ -151,5 +151,8 @@ async def send_features_list(self, features: list[str]): async def import_project(self, project_dir: str): pass + async def start_important_stream(self): + pass + __all__ = ["PlainConsoleUI"] diff --git a/core/ui/ipc_client.py b/core/ui/ipc_client.py index e2e74eace..1decaffc3 100644 --- a/core/ui/ipc_client.py +++ b/core/ui/ipc_client.py @@ -45,6 +45,7 @@ class MessageType(str, Enum): FEATURE_FINISHED = "featureFinished" GENERATE_DIFF = "generateDiff" CLOSE_DIFF = "closeDiff" + IMPORTANT_STREAM = "importantStream" class Message(BaseModel): @@ -362,6 +363,12 @@ async def send_project_root(self, path: str): content=basename(path), ) + async def start_important_stream(self): + await self._send( + MessageType.IMPORTANT_STREAM, + content={}, + ) + async def send_project_stats(self, stats: dict): await self._send( MessageType.PROJECT_STATS, diff --git a/core/ui/virtual.py b/core/ui/virtual.py index 0ed3e2f29..c79f5da54 100644 --- a/core/ui/virtual.py +++ b/core/ui/virtual.py @@ -126,6 +126,9 @@ async def open_editor(self, file: str, line: Optional[int] = None): async def send_project_root(self, path: str): pass + async def start_important_stream(self): + pass + async def send_project_stats(self, stats: dict): pass