Merge pull request #840 from ATheorell/refactor

Merging to refactor branch so that we can work async on the missing functionality
AntonOsika · Nov 10, 2023 · 82d2bb1 · 82d2bb1
2 parents 3be1c37 + f2aa1fa
commit 82d2bb1
Show file tree

Hide file tree

Showing 42 changed files with 915 additions and 311 deletions.
diff --git a/.gitignore b/.gitignore
@@ -59,6 +59,12 @@ benchmark
 
 .gpte_consent
 
+# projects folder apart from default prompt
+
+projects
+!projects/example/prompt
+!projects/example-improve
+
 # docs
 
 docs/_build

diff --git a/evals/evals_existing_code.py b/evals/evals_existing_code.py
@@ -12,7 +12,7 @@
 )
 
 from gpt_engineer.core.chat_to_files import parse_chat
-from gpt_engineer.data.file_repository import FileRepository
+from gpt_engineer.core.default.on_disk_repository import OnDiskRepository
 
 app = typer.Typer()  # creates a CLI app
 
@@ -25,7 +25,7 @@ def single_evaluate(eval_ob: dict) -> list[bool]:
     # load the known files into the project
     # the files can be anywhere in the projects folder
 
-    workspace = FileRepository(eval_ob["project_root"])
+    workspace = OnDiskRepository(eval_ob["project_root"])
     file_list_string = ""
     code_base_abs = Path(os.getcwd()) / eval_ob["project_root"]
 

diff --git a/evals/evals_new_code.py b/evals/evals_new_code.py
@@ -11,7 +11,7 @@
     load_evaluations_from_file,
 )
 
-from gpt_engineer.data.file_repository import FileRepository
+from gpt_engineer.core.default.on_disk_repository import OnDiskRepository
 
 app = typer.Typer()  # creates a CLI app
 
@@ -20,7 +20,7 @@ def single_evaluate(eval_ob: dict) -> list[bool]:
     """Evaluates a single prompt for creating a new project."""
     print(f"running evaluation: {eval_ob['name']}")
 
-    workspace = FileRepository(eval_ob["project_root"])
+    workspace = OnDiskRepository(eval_ob["project_root"])
     base_abs = Path(os.getcwd())
     code_base_abs = base_abs / eval_ob["project_root"]
 

diff --git a/gpt_engineer/__init__.py b/gpt_engineer/__init__.py
@@ -3,6 +3,7 @@
     ai,
     domain,
     chat_to_files,
-    steps,
 )
-from gpt_engineer.data import file_repository, code_vector_repository
+from gpt_engineer.legacy import steps
+from gpt_engineer.tools import code_vector_repository
+from gpt_engineer.core.default import on_disk_repository
diff --git a/gpt_engineer/applications/__init__.py b/gpt_engineer/applications/__init__.py
diff --git a/gpt_engineer/cli/__init__.py → gpt_engineer/applications/cli/__init__.py b/gpt_engineer/cli/__init__.py → gpt_engineer/applications/cli/__init__.py
@@ -7,9 +7,9 @@
 
 Modules:
     - main: The primary CLI module for GPT Engineer.
-    - collect: Collect send learning data for analysis and improvement.
+    - collect: Collect send learning tools for analysis and improvement.
     - file_selector: Selecting files using GUI and terminal-based file explorer.
-    - learning: Tools and data structures for data collection.
+    - learning: Tools and tools structures for tools collection.
 
 For more specific details, refer to the docstrings within each module.
 """
diff --git a/gpt_engineer/applications/cli/cli_agent.py b/gpt_engineer/applications/cli/cli_agent.py
@@ -0,0 +1,82 @@
+from gpt_engineer.core.code import Code
+from gpt_engineer.core.base_version_manager import BaseVersionManager
+from gpt_engineer.core.ai import AI
+from gpt_engineer.core.default.steps import gen_code, gen_entrypoint, execute_entrypoint
+from gpt_engineer.core.base_repository import BaseRepository
+from gpt_engineer.core.default.on_disk_repository import OnDiskRepository
+from gpt_engineer.core.base_execution_env import BaseExecutionEnv
+from gpt_engineer.core.default.on_disk_execution_env import OnDiskExecutionEnv
+from gpt_engineer.core.default.paths import memory_path
+from gpt_engineer.core.base_agent import BaseAgent
+from gpt_engineer.applications.cli.learning import human_review
+
+
+class CliAgent(BaseAgent):
+    """
+    The `Agent` class is responsible for managing the lifecycle of code generation and improvement.
+
+    Attributes:
+        path (str): The file path where the `Agent` will operate, used for version management and
+                    file operations.
+        version_manager (BaseVersionManager): An object that adheres to the VersionManagerInterface,
+                        responsible for version control of the generated code. Defaults to `VersionManager`
+                        if not provided. PROBABLY GIT SHOULD BE USED IN THE DEFAULT
+        step_bundle (StepBundleInterface): Workflows of code generation steps that define the behavior of gen_code and
+        improve.
+        ai (AI): Manages calls to the LLM.
+
+    Methods:
+        __init__(self, path: str, version_manager: VersionManagerInterface = None,
+                 step_bundle: StepBundleInterface = None, ai: AI = None):
+            Initializes a new instance of the Agent class with the provided path, version manager,
+            step bundle, and AI. It falls back to default instances if specific components are not provided.
+
+        init(self, prompt: str) -> Code:
+            Generates a new piece of code using the AI and step bundle based on the provided prompt.
+            It also snapshots the generated code using the version manager.
+
+            Parameters:
+                prompt (str): A string prompt that guides the code generation process.
+
+            Returns:
+                Code: An instance of the `Code` class containing the generated code.
+
+        improve(self, prompt: str) -> Code:
+            Improves an existing piece of code using the AI and step bundle based on the provided prompt.
+            It also snapshots the improved code using the version manager.
+
+            Parameters:
+                prompt (str): A string prompt that guides the code improvement process.
+
+            Returns:
+                Code: An instance of the `Code` class containing the improved code.
+    """
+
+    def __init__(
+        self,
+        memory: BaseRepository,
+        execution_env: BaseExecutionEnv,
+        ai: AI = None,
+    ):
+        self.memory = memory
+        self.execution_env = execution_env
+        self.ai = ai or AI()
+
+    @classmethod
+    def with_default_config(cls, path: str, ai: AI = None):
+        return cls(
+            memory=OnDiskRepository(memory_path(path)),
+            execution_env=OnDiskExecutionEnv(path),
+            ai=ai,
+        )
+
+    def init(self, prompt: str) -> Code:
+        code = gen_code(self.ai, prompt, self.memory)
+        entrypoint = gen_entrypoint(self.ai, code, self.memory)
+        code = Code(code | entrypoint)
+        execute_entrypoint(self.execution_env, code)
+        human_review(self.memory)
+        return code
+
+    def improve(self, prompt: str) -> Code:
+        pass
diff --git a/gpt_engineer/cli/collect.py → gpt_engineer/applications/cli/collect.py b/gpt_engineer/cli/collect.py → gpt_engineer/applications/cli/collect.py
@@ -1,12 +1,12 @@
 """
-This module provides functionalities to handle and send learning data to RudderStack
-for the purpose of analysis and to improve the gpt-engineer system. The data is sent
+This module provides functionalities to handle and send learning tools to RudderStack
+for the purpose of analysis and to improve the gpt-engineer system. The tools is sent
 only when the user gives consent to share.
 
 The module provides the following main functions:
 
-- `send_learning`: Directly send a learning data to RudderStack.
-- `collect_learnings`: Extract, possibly adjust, and send the learning data based on
+- `send_learning`: Directly send a learning tools to RudderStack.
+- `collect_learnings`: Extract, possibly adjust, and send the learning tools based on
   provided input parameters.
 - `steps_file_hash`: Computes the SHA-256 hash of the steps file, which might be used
   for identifying the exact version or changes in the steps.
@@ -15,7 +15,7 @@
 - hashlib: For generating SHA-256 hash.
 - typing: For type annotations.
 - gpt_engineer.core: Core functionalities of gpt-engineer.
-- gpt_engineer.cli.learning: Handles the extraction of learning data.
+- gpt_engineer.cli.learning: Handles the extraction of learning tools.
 
 Note:
     Data sent to RudderStack is not shared with third parties and is used solely to
@@ -27,37 +27,26 @@
 
 from typing import List
 
-from gpt_engineer.core import steps
-from gpt_engineer.data.file_repository import FileRepositories
+from gpt_engineer.legacy import steps
+from gpt_engineer.core.default.on_disk_repository import FileRepositories
 from gpt_engineer.core.domain import Step
-from gpt_engineer.cli.learning import Learning, extract_learning
-
-...
-
-import hashlib
-
-from typing import List
-
-from gpt_engineer.core import steps
-from gpt_engineer.data.file_repository import FileRepositories
-from gpt_engineer.core.domain import Step
-from gpt_engineer.cli.learning import Learning, extract_learning
+from gpt_engineer.applications.cli.learning import Learning, extract_learning
 
 
 def send_learning(learning: Learning):
     """
-    Send the learning data to RudderStack for analysis.
+    Send the learning tools to RudderStack for analysis.
 
     Note:
-    This function is only called if consent is given to share data.
+    This function is only called if consent is given to share tools.
     Data is not shared to a third party. It is used with the sole purpose of
     improving gpt-engineer, and letting it handle more use cases.
     Consent logic is in gpt_engineer/learning.py
 
     Parameters
     ----------
     learning : Learning
-        The learning data to send.
+        The learning tools to send.
     """
     import rudderstack.analytics as rudder_analytics
 
@@ -75,7 +64,7 @@ def collect_learnings(
     model: str, temperature: float, steps: List[Step], dbs: FileRepositories
 ):
     """
-    Collect the learning data and send it to RudderStack for analysis.
+    Collect the learning tools and send it to RudderStack for analysis.
 
     Parameters
     ----------

diff --git a/gpt_engineer/cli/file_selector.py → ...ngineer/applications/cli/file_selector.py b/gpt_engineer/cli/file_selector.py → ...ngineer/applications/cli/file_selector.py
@@ -47,7 +47,7 @@
 from pathlib import Path
 from typing import List, Union
 
-from gpt_engineer.data.file_repository import FileRepository
+from gpt_engineer.core.default.on_disk_repository import OnDiskRepository
 
 IGNORE_FOLDERS = {"site-packages", "node_modules", "venv"}
 FILE_LIST_NAME = "file_list.txt"
@@ -321,7 +321,7 @@ def is_in_ignoring_extensions(path: Path) -> bool:
     return is_hidden and is_pycache
 
 
-def ask_for_files(metadata_db: FileRepository, workspace_db: FileRepository) -> None:
+def ask_for_files(metadata_db: OnDiskRepository, workspace_db: OnDiskRepository) -> None:
     """
     Ask user to select files to improve.
     It can be done by terminal, gui, or using the old selection.

diff --git a/gpt_engineer/cli/learning.py → gpt_engineer/applications/cli/learning.py b/gpt_engineer/cli/learning.py → gpt_engineer/applications/cli/learning.py
@@ -1,5 +1,5 @@
 """
-This module provides tools and data structures for supporting a feedback loop in the GPT Engineer application.
+This module provides tools and tools structures for supporting a feedback loop in the GPT Engineer application.
 
 The primary intent of this module is to gather feedback from the user on the output of the gpt-engineer tool,
 with their consent, and to store this feedback for further analysis and improvement of the tool.
@@ -17,13 +17,13 @@
     Interactively gathers feedback from the user regarding the performance of generated code.
 
 check_consent() -> bool:
-    Checks if the user has previously given consent to store their data and if not, asks for it.
+    Checks if the user has previously given consent to store their tools and if not, asks for it.
 
 collect_consent() -> bool:
-    Verifies if the user has given consent to store their data or prompts for it.
+    Verifies if the user has given consent to store their tools or prompts for it.
 
 ask_if_can_store() -> bool:
-    Asks the user if it's permissible to store their data for gpt-engineer improvement.
+    Asks the user if it's permissible to store their tools for gpt-engineer improvement.
 
 logs_to_string(steps: List[Step], logs: DB) -> str:
     Converts logs of steps into a readable string format.
@@ -52,7 +52,10 @@
 from dataclasses_json import dataclass_json
 from termcolor import colored
 
-from gpt_engineer.data.file_repository import FileRepository, FileRepositories
+from gpt_engineer.core.default.on_disk_repository import (
+    OnDiskRepository,
+    FileRepositories,
+)
 from gpt_engineer.core.domain import Step
 
 
@@ -148,7 +151,7 @@ def human_review_input() -> Review:
 
 def check_collection_consent() -> bool:
     """
-    Check if the user has given consent to store their data.
+    Check if the user has given consent to store their tools.
     If not, ask for their consent.
     """
     path = Path(".gpte_consent")
@@ -160,7 +163,7 @@ def check_collection_consent() -> bool:
 
 def ask_collection_consent() -> bool:
     """
-    Ask the user for consent to store their data.
+    Ask the user for consent to store their tools.
     """
     answer = input(
         "Is it ok if we store your prompts to help improve GPT Engineer? (y/n)"
@@ -174,7 +177,7 @@ def ask_collection_consent() -> bool:
         print(colored("Thank you️", "light_green"))
         print()
         print(
-            "(If you no longer wish to participate in data collection, delete the file .gpte_consent)"
+            "(If you no longer wish to participate in tools collection, delete the file .gpte_consent)"
         )
         return True
     else:
@@ -187,7 +190,7 @@ def ask_collection_consent() -> bool:
         return False
 
 
-def logs_to_string(steps: List[Step], logs: FileRepository) -> str:
+def logs_to_string(steps: List[Step], logs: OnDiskRepository) -> str:
     """
     Convert the logs of the steps to a string.
 
@@ -218,7 +221,7 @@ def extract_learning(
     steps_file_hash,
 ) -> Learning:
     """
-    Extract the learning data from the steps and databases.
+    Extract the learning tools from the steps and databases.
 
     Parameters
     ----------
@@ -236,7 +239,7 @@ def extract_learning(
     Returns
     -------
     Learning
-        The extracted learning data.
+        The extracted learning tools.
     """
     review = None
     if "review" in dbs.memory:
@@ -277,3 +280,34 @@ def get_session() -> str:
         return user_id
     except IOError:
         return "ephemeral_" + str(random.randint(0, 2**32))
+
+
+def human_review(memory: OnDiskRepository):
+    """
+    Collects human feedback on the code and stores it in memory.
+
+    This function prompts the user for a review of the generated or improved code using the `human_review_input`
+    function. If a valid review is provided, it's serialized to JSON format and stored within the database's
+    memory under the "review" key.
+
+    Parameters:
+    - ai (AI): An instance of the AI model. Although not directly used within the function, it is kept as
+      a parameter for consistency with other functions.
+    - dbs (DBs): An instance containing the database configurations, user prompts, project metadata,
+      and memory storage. This function specifically interacts with the memory storage to save the human review.
+
+    Returns:
+    - list: Returns an empty list, indicating that there's no subsequent interaction with the LLM
+      or no further messages to be processed.
+
+    Notes:
+    - It's assumed that the `human_review_input` function handles all the interactions with the user to
+      gather feedback and returns either the feedback or None if no feedback was provided.
+    - Ensure that the database's memory has enough space or is set up correctly to store the serialized review tools.
+    """
+
+    """Collects and stores human review of the code"""
+    review = human_review_input()
+    if review is not None:
+        memory["review"] = review.to_json()  # type: ignore
+    return []