Merge pull request #846 from ATheorell/refactor

Refactor
gpt-engineer-org · Nov 12, 2023 · 132f428 · 132f428
2 parents 82d2bb1 + e0213af
commit 132f428
Show file tree

Hide file tree

Showing 20 changed files with 68 additions and 64 deletions.
diff --git a/gpt_engineer/applications/cli/cli_agent.py b/gpt_engineer/applications/cli/cli_agent.py
@@ -78,5 +78,5 @@ def init(self, prompt: str) -> Code:
         human_review(self.memory)
         return code
 
-    def improve(self, prompt: str) -> Code:
+    def improve(self, prompt: str, code) -> Code:
         pass
diff --git a/gpt_engineer/core/base_agent.py b/gpt_engineer/core/base_agent.py
@@ -50,5 +50,5 @@ def init(self, prompt: str) -> Code:
         pass
 
     @abstractmethod
-    def improve(self, prompt: str) -> Code:
+    def improve(self, prompt: str, code: Code) -> Code:
         pass
diff --git a/gpt_engineer/core/chat_to_files.py b/gpt_engineer/core/chat_to_files.py
@@ -31,14 +31,15 @@
 import logging
 
 from dataclasses import dataclass
-from typing import List, Tuple
+from typing import List, Tuple, Union
+from pathlib import Path
 
 from gpt_engineer.core.default.on_disk_repository import (
     OnDiskRepository,
-    FileRepositories,
+    # FileRepositories,
 )
 from gpt_engineer.applications.cli.file_selector import FILE_LIST_NAME
-
+from gpt_engineer.core.code import Code
 
 logger = logging.getLogger(__name__)
 
@@ -90,21 +91,6 @@ def parse_chat(chat) -> List[Tuple[str, str]]:
     return files
 
 
-def to_files_and_memory(chat: str, dbs: FileRepositories):
-    """
-    Save chat to memory, and parse chat to extracted file and save them to the workspace.
-
-    Parameters
-    ----------
-    chat : str
-        The chat to parse.
-    dbs : DBs
-        The databases that include the memory and workspace database
-    """
-    dbs.memory["all_output.txt"] = chat
-    to_files(chat, dbs.workspace)
-
-
 def to_files(chat: str, workspace: OnDiskRepository):
     """
     Parse the chat and add all extracted files to the workspace.
@@ -186,9 +172,9 @@ def format_file_to_input(file_name: str, file_content: str) -> str:
     return file_str
 
 
-def overwrite_files_with_edits(chat: str, dbs: FileRepositories):
+def overwrite_files_with_edits(chat: str, code: Code):
     edits = parse_edits(chat)
-    apply_edits(edits, dbs.workspace)
+    apply_edits(edits, code)
 
 
 @dataclass
@@ -198,7 +184,7 @@ class Edit:
     after: str
 
 
-def parse_edits(llm_response):
+def parse_edits(chat: str):
     def parse_one_edit(lines):
         HEAD = "<<<<<<< HEAD"
         DIVIDER = "======="
@@ -216,62 +202,59 @@ def parse_one_edit(lines):
 
         return Edit(filename, before, after)
 
-    def parse_all_edits(txt):
-        edits = []
-        current_edit = []
-        in_fence = False
-
-        for line in txt.split("\n"):
-            if line.startswith("```") and in_fence:
-                edits.append(parse_one_edit(current_edit))
-                current_edit = []
-                in_fence = False
-                continue
-            elif line.startswith("```") and not in_fence:
-                in_fence = True
-                continue
+    edits = []
+    current_edit = []
+    in_fence = False
 
-            if in_fence:
-                current_edit.append(line)
+    for line in chat.split("\n"):
+        if line.startswith("```") and in_fence:
+            edits.append(parse_one_edit(current_edit))
+            current_edit = []
+            in_fence = False
+            continue
+        elif line.startswith("```") and not in_fence:
+            in_fence = True
+            continue
 
-        return edits
+        if in_fence:
+            current_edit.append(line)
 
-    return parse_all_edits(llm_response)
+    return edits
 
 
-def apply_edits(edits: List[Edit], workspace: OnDiskRepository):
+def apply_edits(edits: List[Edit], code: Code):
     for edit in edits:
         filename = edit.filename
         if edit.before == "":
-            if workspace.get(filename) is not None:
-                logger.warn(
+            if filename in code:
+                logger.warning(
                     f"The edit to be applied wants to create a new file `{filename}`, but that already exists. The file will be overwritten. See `.gpteng/memory` for previous version."
                 )
-            workspace[filename] = edit.after  # new file
+            code[filename] = edit.after  # new file
         else:
-            occurrences_cnt = workspace[filename].count(edit.before)
+            occurrences_cnt = code[filename].count(edit.before)
             if occurrences_cnt == 0:
-                logger.warn(
+                logger.warning(
                     f"While applying an edit to `{filename}`, the code block to be replaced was not found. No instances will be replaced."
                 )
             if occurrences_cnt > 1:
-                logger.warn(
+                logger.warning(
                     f"While applying an edit to `{filename}`, the code block to be replaced was found multiple times. All instances will be replaced."
                 )
-            workspace[filename] = workspace[filename].replace(
+            code[filename] = code[filename].replace(
                 edit.before, edit.after
             )  # existing file
 
 
-def _get_all_files_in_dir(directory):
+def _get_all_files_in_dir(directory: Union[str, Path]):
     for root, dirs, files in os.walk(directory):
         for file in files:
             yield os.path.join(root, file)
     for dir in dirs:
         yield from _get_all_files_in_dir(os.path.join(root, dir))
 
 
-def _open_file(file_path) -> str:
+def _open_file(file_path: Union[str, Path]) -> str:
     try:
         with open(file_path, "r", encoding="utf-8") as f:
             return f.read()

diff --git a/gpt_engineer/core/default/lean_agent.py b/gpt_engineer/core/default/lean_agent.py
@@ -76,5 +76,5 @@ def init(self, prompt: str) -> Code:
         execute_entrypoint(self.execution_env, code)
         return code
 
-    def improve(self, prompt: str) -> Code:
+    def improve(self, prompt: str, code: Code) -> Code:
         pass
diff --git a/gpt_engineer/core/default/steps.py b/gpt_engineer/core/default/steps.py
@@ -1,6 +1,6 @@
 from gpt_engineer.core.code import Code
 from gpt_engineer.core.ai import AI
-from gpt_engineer.core.chat_to_files import parse_chat
+from gpt_engineer.core.chat_to_files import parse_chat, overwrite_files_with_edits, format_file_to_input
 from gpt_engineer.core.default.paths import (
     ENTRYPOINT_FILE,
     CODE_GEN_LOG_FILE,
@@ -205,7 +205,28 @@ def execute_entrypoint(execution_env: BaseExecutionEnv, code: Code) -> None:
     execution_env.execute_program(code)
 
 
-def improve(ai: AI, prompt: str) -> Code:
+def setup_sys_prompt_existing_code(db: OnDiskRepository) -> str:
+    """
+    Constructs a system prompt for the AI focused on improving an existing codebase.
+
+    This function sets up the system prompts for the AI, guiding it on how to
+    work with and improve an existing code base. The generated prompt consists
+    of the "improve" instruction (with dynamic format replacements) and the coding
+    "philosophy" taken from the given DBs object.
+
+    Parameters:
+    - dbs (DBs): The database object containing pre-defined prompts and instructions.
+
+    Returns:
+    - str: The constructed system prompt focused on existing code improvement for the AI.
+    """
+    return (
+        db.preprompts["improve"].replace("FILE_FORMAT", db.preprompts["file_format"])
+        + "\nUseful to know:\n"
+        + db.preprompts["philosophy"]
+    )
+
+def improve(ai: AI, prompt: str, code: Code) -> Code:
     """
     Process and improve the code from a specified set of existing files based on a user prompt.
 
@@ -236,21 +257,21 @@ def improve(ai: AI, prompt: str) -> Code:
     to sent the formatted prompt to the LLM.
     """
 
-    files_info = get_code_strings(
-        dbs.workspace, dbs.project_metadata
-    )  # this has file names relative to the workspace path
-
+    # files_info = get_code_strings(
+    #     dbs.workspace, dbs.project_metadata
+    # )  # this has file names relative to the workspace path
+    db = OnDiskRepository(PREPROMPTS_PATH)
     messages = [
-        SystemMessage(content=setup_sys_prompt_existing_code(dbs)),
+        SystemMessage(content=setup_sys_prompt_existing_code(db)),
     ]
     # Add files as input
-    for file_name, file_str in files_info.items():
+    for file_name, file_str in code.items():
         code_input = format_file_to_input(file_name, file_str)
         messages.append(HumanMessage(content=f"{code_input}"))
 
-    messages.append(HumanMessage(content=f"Request: {dbs.input['prompt']}"))
+    messages.append(HumanMessage(content=f"Request: {prompt}"))
 
     messages = ai.next(messages, step_name=curr_fn())
 
-    overwrite_files_with_edits(messages[-1].content.strip(), dbs)
+    overwrite_files_with_edits(messages[-1].content.strip(), code)
     return messages
diff --git a/gpt_engineer/legacy/steps.py b/gpt_engineer/legacy/steps.py
@@ -63,7 +63,7 @@
     format_file_to_input,
     get_code_strings,
     overwrite_files_with_edits,
-    to_files_and_memory,
+    # to_files_and_memory,
 )
 from gpt_engineer.core.default.on_disk_repository import FileRepositories
 from gpt_engineer.applications.cli.file_selector import FILE_LIST_NAME, ask_for_files

diff --git a/projects/example-improve/run.sh b/projects/example-improve/run.sh
@@ -4,4 +4,4 @@ source venv/bin/activate
 pip install -r requirements.txt
 
 # b) Run all necessary parts of the codebase
-python src/main.py
+python main.py
diff --git a/tests/steps/__init__.py → tests/applications/__init__.py b/tests/steps/__init__.py → tests/applications/__init__.py
diff --git a/tests/applications/cli/__init__.py b/tests/applications/cli/__init__.py
diff --git a/tests/test_collect.py → tests/applications/cli/test_collect.py b/tests/test_collect.py → tests/applications/cli/test_collect.py
diff --git a/tests/test_collection_consent.py → ...plications/cli/test_collection_consent.py b/tests/test_collection_consent.py → ...plications/cli/test_collection_consent.py
diff --git a/tests/core/__init__.py b/tests/core/__init__.py
diff --git a/tests/test_ai.py → tests/core/test_ai.py b/tests/test_ai.py → tests/core/test_ai.py
diff --git a/tests/test_chat_to_files.py → tests/core/test_chat_to_files.py b/tests/test_chat_to_files.py → tests/core/test_chat_to_files.py
diff --git a/tests/test_token_usage.py → tests/core/test_token_usage.py b/tests/test_token_usage.py → tests/core/test_token_usage.py
diff --git a/tests/legacy_steps/__init__.py b/tests/legacy_steps/__init__.py
diff --git a/tests/steps/test_archive.py → tests/legacy_steps/test_archive.py b/tests/steps/test_archive.py → tests/legacy_steps/test_archive.py
diff --git a/tests/data/example_snake_files.py → tests/tools/example_snake_files.py b/tests/data/example_snake_files.py → tests/tools/example_snake_files.py
diff --git a/tests/data/test_code_vector_repository.py → tests/tools/test_code_vector_repository.py b/tests/data/test_code_vector_repository.py → tests/tools/test_code_vector_repository.py
diff --git a/tests/data/test_file_repository.py → tests/tools/test_file_repository.py b/tests/data/test_file_repository.py → tests/tools/test_file_repository.py