diff --git a/dbms/postgres/cli.py b/dbms/postgres/cli.py index b364ff90..91e17469 100644 --- a/dbms/postgres/cli.py +++ b/dbms/postgres/cli.py @@ -226,7 +226,6 @@ def _create_dbdata( stop_postgres(dbgym_workspace, pgbin_path, dbdata_path) # Create .tgz file. - # Note that you can't pass "[dbdata].tgz" as an arg to cur_task_runs_data_path() because that would create "[dbdata].tgz" as a dir. dbdata_tgz_real_path = dbgym_workspace.dbgym_this_run_path / linkname_to_name( expected_dbdata_tgz_symlink_path.name ) diff --git a/env/pg_conn.py b/env/pg_conn.py index 540e6134..7aa4e5d8 100644 --- a/env/pg_conn.py +++ b/env/pg_conn.py @@ -102,12 +102,9 @@ def disconnect(self) -> None: self._conn = None def move_log(self) -> None: - pglog_path = ( - self.dbgym_workspace.cur_task_runs_artifacts_path(mkdir=True) - / f"pg{self.pgport}.log" - ) + pglog_path = self.dbgym_workspace.dbgym_this_run_path / f"pg{self.pgport}.log" pglog_this_step_path = ( - self.dbgym_workspace.cur_task_runs_artifacts_path(mkdir=True) + self.dbgym_workspace.dbgym_this_run_path / f"pg{self.pgport}.log.{self.log_step}" ) if pglog_path.exists(): @@ -299,8 +296,7 @@ def restart_with_changes( "-l", # We log to pg{self.pgport}.log instead of pg.log so that different PostgresConn objects # don't all try to write to the same file. - self.dbgym_workspace.cur_task_runs_artifacts_path(mkdir=True) - / f"pg{self.pgport}.log", + self.dbgym_workspace.dbgym_this_run_path / f"pg{self.pgport}.log", "start", ].run(retcode=None) diff --git a/env/tuning_artifacts.py b/env/tuning_artifacts.py index b5b617da..f2eb9178 100644 --- a/env/tuning_artifacts.py +++ b/env/tuning_artifacts.py @@ -81,9 +81,10 @@ def __init__( self, dbgym_workspace: DBGymWorkspace, metadata: TuningMetadata ) -> None: self.dbgym_workspace = dbgym_workspace - self.tuning_artifacts_path = self.dbgym_workspace.cur_task_runs_artifacts_path( - "tuning_artifacts", mkdir=True + self.tuning_artifacts_path = ( + self.dbgym_workspace.dbgym_this_run_path / "tuning_artifacts" ) + self.tuning_artifacts_path.mkdir(parents=False, exist_ok=False) assert is_fully_resolved(self.tuning_artifacts_path) self.next_step_num = 0 diff --git a/task.py b/task.py index 5579d9cf..f5a0e278 100644 --- a/task.py +++ b/task.py @@ -18,7 +18,7 @@ def task(ctx: click.Context) -> None: dbgym_workspace = make_standard_dbgym_workspace() ctx.obj = dbgym_workspace - log_path = dbgym_workspace.cur_task_runs_artifacts_path(mkdir=True) + log_path = dbgym_workspace.dbgym_this_run_path set_up_loggers(log_path) set_up_warnings(log_path) diff --git a/util/workspace.py b/util/workspace.py index 28ed17fa..83f1a399 100644 --- a/util/workspace.py +++ b/util/workspace.py @@ -50,30 +50,6 @@ def get_latest_run_path_from_workspace_path(workspace_path: Path) -> Path: DEFAULT_BOOT_CONFIG_PATH = POSTGRES_PATH / "default_boot_config.yaml" -# Paths of dependencies in the workspace. These are named "*_path" because they will be an absolute path -# The reason these _cannot_ be relative paths is because relative paths are relative to the codebase root, not the workspace root -# Note that it's okay to hardcode the codebase paths (like dbgym_dbms_postgres) here. In the worst case, we'll just break an -# integration test. The "source of truth" of codebase paths is based on DBGymWorkspace.cur_source_path(), which will always -# reflect the actual codebase structure. As long as we automatically enforce getting the right codebase paths when writing, it's -# ok to have to hardcode them when reading. -# Details -# - If a name already has the workload_name, I omit scale factor. This is because the workload_name includes the scale factor -# - By convention, symlinks should end with ".link". The bug that motivated this decision involved replaying a tuning run. When -# replaying a tuning run, you read the tuning_steps/ folder of the tuning run. Earlier, I created a symlink to that tuning_steps/ -# folder called run_*/*/tuning_steps. However, replay itself generates an replay_info.log file, which goes in -# run_*/*/tuning_steps/. The bug was that my replay function was overwriting the replay_info.log file of the -# tuning run. By naming all symlinks "*.link", we avoid the possibility of subtle bugs like this happening. -def get_default_workload_path( - workspace_path: Path, benchmark_name: str, workload_name: str -) -> Path: - return ( - get_symlinks_path_from_workspace_path(workspace_path) - / f"dbgym_benchmark_{benchmark_name}" - / "data" - / (workload_name + ".link") - ) - - SCALE_FACTOR_PLACEHOLDER: str = "[scale_factor]" @@ -323,51 +299,6 @@ def open_and_save(self, open_path: Path, mode: str = "r") -> IO[Any]: def append_group(self, name: str) -> None: self.cur_path_list.append(name) - def cur_source_path(self, *dirs: str) -> Path: - cur_path = self.base_dbgym_repo_path - assert self.cur_path_list[0] == "dbgym" - for folder in self.cur_path_list[1:]: - cur_path = cur_path / folder - for dir in dirs: - cur_path = cur_path / dir - return cur_path - - def _cur_symlinks_path(self, *dirs: str, mkdir: bool = False) -> Path: - flattened_structure = "_".join(self.cur_path_list) - cur_path = self.dbgym_symlinks_path / flattened_structure - for dir in dirs: - cur_path = cur_path / dir - if mkdir: - cur_path.mkdir(parents=True, exist_ok=True) - return cur_path - - def cur_task_runs_path(self, *dirs: str, mkdir: bool = False) -> Path: - flattened_structure = "_".join(self.cur_path_list) - cur_path = self.dbgym_this_run_path / flattened_structure - for dir in dirs: - cur_path = cur_path / dir - if mkdir: - cur_path.mkdir(parents=True, exist_ok=True) - return cur_path - - def cur_symlinks_bin_path(self, *dirs: str, mkdir: bool = False) -> Path: - return self._cur_symlinks_path("bin", *dirs, mkdir=mkdir) - - def cur_symlinks_build_path(self, *dirs: str, mkdir: bool = False) -> Path: - return self._cur_symlinks_path("build", *dirs, mkdir=mkdir) - - def cur_symlinks_data_path(self, *dirs: str, mkdir: bool = False) -> Path: - return self._cur_symlinks_path("data", *dirs, mkdir=mkdir) - - def cur_task_runs_build_path(self, *dirs: str, mkdir: bool = False) -> Path: - return self.cur_task_runs_path("build", *dirs, mkdir=mkdir) - - def cur_task_runs_data_path(self, *dirs: str, mkdir: bool = False) -> Path: - return self.cur_task_runs_path("data", *dirs, mkdir=mkdir) - - def cur_task_runs_artifacts_path(self, *dirs: str, mkdir: bool = False) -> Path: - return self.cur_task_runs_path("artifacts", *dirs, mkdir=mkdir) - def get_workspace_path_from_config(dbgym_config_path: Path) -> Path: """