From f7f77d5f4a6252a5856948f695690fb383a1f602 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 17 Dec 2024 19:03:20 +0530 Subject: [PATCH] Hotfix: context issue in edit distance based editing --- gpt_instructions.txt | 2 +- pyproject.toml | 2 +- src/wcgw/client/anthropic_client.py | 2 +- src/wcgw/client/mcp_server/server.py | 2 +- src/wcgw/client/openai_client.py | 2 +- src/wcgw/client/tools.py | 27 ++++++++++++++++++--------- uv.lock | 2 +- 7 files changed, 24 insertions(+), 15 deletions(-) diff --git a/gpt_instructions.txt b/gpt_instructions.txt index 36cfd96..f9cf244 100644 --- a/gpt_instructions.txt +++ b/gpt_instructions.txt @@ -16,7 +16,7 @@ Instructions for `BashCommand`: - Do not use interactive commands like nano. Prefer writing simpler commands. - Status of the command and the current working directory will always be returned at the end. - Optionally `exit shell has restarted` is the output, in which case environment resets, you can run fresh commands. -- The first line might be `(...truncated)` if the output is too long. +- The first or the last line might be `(...truncated)` if the output is too long. - The control will return to you in 5 seconds regardless of the status. For heavy commands, keep checking status using BashInteraction till they are finished. - Run long running commands in background using screen instead of "&". diff --git a/pyproject.toml b/pyproject.toml index 5e1242f..f5d6063 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] authors = [{ name = "Aman Rusia", email = "gapypi@arcfu.com" }] name = "wcgw" -version = "2.2.1" +version = "2.2.2" description = "Shell and coding agent on claude and chatgpt" readme = "README.md" requires-python = ">=3.11, <3.13" diff --git a/src/wcgw/client/anthropic_client.py b/src/wcgw/client/anthropic_client.py index 55fdd9d..6594fd2 100644 --- a/src/wcgw/client/anthropic_client.py +++ b/src/wcgw/client/anthropic_client.py @@ -169,7 +169,7 @@ def loop( - Do not use interactive commands like nano. Prefer writing simpler commands. - Status of the command and the current working directory will always be returned at the end. - Optionally `exit shell has restarted` is the output, in which case environment resets, you can run fresh commands. -- The first line might be `(...truncated)` if the output is too long. +- The first or the last line might be `(...truncated)` if the output is too long. - Always run `pwd` if you get any file or directory not found error to make sure you're not lost. - The control will return to you in 5 seconds regardless of the status. For heavy commands, keep checking status using BashInteraction till they are finished. - Run long running commands in background using screen instead of "&". diff --git a/src/wcgw/client/mcp_server/server.py b/src/wcgw/client/mcp_server/server.py index 4b59f5e..587656c 100644 --- a/src/wcgw/client/mcp_server/server.py +++ b/src/wcgw/client/mcp_server/server.py @@ -89,7 +89,7 @@ async def handle_list_tools() -> list[types.Tool]: - Do not use interactive commands like nano. Prefer writing simpler commands. - Status of the command and the current working directory will always be returned at the end. - Optionally `exit shell has restarted` is the output, in which case environment resets, you can run fresh commands. -- The first line might be `(...truncated)` if the output is too long. +- The first or the last line might be `(...truncated)` if the output is too long. - Always run `pwd` if you get any file or directory not found error to make sure you're not lost. - The control will return to you in 5 seconds regardless of the status. For heavy commands, keep checking status using BashInteraction till they are finished. - Run long running commands in background using screen instead of "&". diff --git a/src/wcgw/client/openai_client.py b/src/wcgw/client/openai_client.py index 9603a0e..e371fa8 100644 --- a/src/wcgw/client/openai_client.py +++ b/src/wcgw/client/openai_client.py @@ -172,7 +172,7 @@ def loop( - Do not use interactive commands like nano. Prefer writing simpler commands. - Status of the command and the current working directory will always be returned at the end. - Optionally `exit shell has restarted` is the output, in which case environment resets, you can run fresh commands. -- The first line might be `(...truncated)` if the output is too long. +- The first or the last line might be `(...truncated)` if the output is too long. - Always run `pwd` if you get any file or directory not found error to make sure you're not lost. - The control will return to you in 5 seconds regardless of the status. For heavy commands, keep checking status using BashInteraction till they are finished. - Run long running commands in background using screen instead of "&". diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index 333fbaa..263fe92 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -13,7 +13,7 @@ import importlib.metadata import time import traceback -from tempfile import TemporaryDirectory +from tempfile import NamedTemporaryFile, TemporaryDirectory from typing import ( Callable, Literal, @@ -412,7 +412,7 @@ def execute_bash( tokens = enc.encode(text) if max_tokens and len(tokens) >= max_tokens: - text = "...(truncated)\n" + enc.decode(tokens[-(max_tokens - 1) :]) + text = "(...truncated)\n" + enc.decode(tokens[-(max_tokens - 1) :]) if is_interrupt: text = ( @@ -441,7 +441,7 @@ def execute_bash( tokens = enc.encode(output) if max_tokens and len(tokens) >= max_tokens: - output = "...(truncated)\n" + enc.decode(tokens[-(max_tokens - 1) :]) + output = "(...truncated)\n" + enc.decode(tokens[-(max_tokens - 1) :]) try: exit_status = get_status() @@ -592,7 +592,7 @@ def write_file(writefile: WriteIfEmpty, error_on_exist: bool) -> str: def find_least_edit_distance_substring( content: str, find_str: str -) -> tuple[str, float]: +) -> tuple[str, str, float]: orig_content_lines = content.split("\n") content_lines = [ line.strip() for line in orig_content_lines @@ -612,6 +612,7 @@ def find_least_edit_distance_substring( # Slide window and find one with sum of edit distance least min_edit_distance = float("inf") min_edit_distance_lines = [] + context_lines = [] for i in range(max(1, len(content_lines) - len(find_lines) + 1)): edit_distance_sum = 0 for j in range(len(find_lines)): @@ -629,19 +630,27 @@ def find_least_edit_distance_substring( + 1 ) min_edit_distance_lines = orig_content_lines[ + orig_start_index:orig_end_index + ] + + context_lines = orig_content_lines[ max(0, orig_start_index - 10) : (orig_end_index + 10) ] - return "\n".join(min_edit_distance_lines), min_edit_distance + return ( + "\n".join(min_edit_distance_lines), + "\n".join(context_lines), + min_edit_distance, + ) def edit_content(content: str, find_lines: str, replace_with_lines: str) -> str: count = content.count(find_lines) if count == 0: - closest_match, min_edit_distance = find_least_edit_distance_substring( - content, find_lines + closest_match, context_lines, min_edit_distance = ( + find_least_edit_distance_substring(content, find_lines) ) if min_edit_distance == 0: - return edit_content(content, closest_match, replace_with_lines) + return content.replace(closest_match, replace_with_lines, 1) else: print( f"Exact match not found, found with whitespace removed edit distance: {min_edit_distance}" @@ -649,7 +658,7 @@ def edit_content(content: str, find_lines: str, replace_with_lines: str) -> str: raise Exception( f"""Error: no match found for the provided search block. Requested search block: \n```\n{find_lines}\n``` - Possible relevant section in the file:\n---\n```\n{closest_match}\n```\n---\nFile not edited + Possible relevant section in the file:\n---\n```\n{context_lines}\n```\n---\nFile not edited \nPlease retry with exact search. Re-read the file if unsure. """ ) diff --git a/uv.lock b/uv.lock index eb03e2e..00621e0 100644 --- a/uv.lock +++ b/uv.lock @@ -869,7 +869,7 @@ wheels = [ [[package]] name = "wcgw" -version = "2.2.0" +version = "2.2.2" source = { editable = "." } dependencies = [ { name = "anthropic" },