Skip to content

Commit

Permalink
deleted all the cur_* functions from workspace fixed all uses of them
Browse files Browse the repository at this point in the history
  • Loading branch information
wangpatrick57 committed Dec 30, 2024
1 parent 2ffa8c7 commit 17f89ff
Show file tree
Hide file tree
Showing 5 changed files with 7 additions and 80 deletions.
1 change: 0 additions & 1 deletion dbms/postgres/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,6 @@ def _create_dbdata(
stop_postgres(dbgym_workspace, pgbin_path, dbdata_path)

# Create .tgz file.
# Note that you can't pass "[dbdata].tgz" as an arg to cur_task_runs_data_path() because that would create "[dbdata].tgz" as a dir.
dbdata_tgz_real_path = dbgym_workspace.dbgym_this_run_path / linkname_to_name(
expected_dbdata_tgz_symlink_path.name
)
Expand Down
10 changes: 3 additions & 7 deletions env/pg_conn.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,9 @@ def disconnect(self) -> None:
self._conn = None

def move_log(self) -> None:
pglog_path = (
self.dbgym_workspace.cur_task_runs_artifacts_path(mkdir=True)
/ f"pg{self.pgport}.log"
)
pglog_path = self.dbgym_workspace.dbgym_this_run_path / f"pg{self.pgport}.log"
pglog_this_step_path = (
self.dbgym_workspace.cur_task_runs_artifacts_path(mkdir=True)
self.dbgym_workspace.dbgym_this_run_path
/ f"pg{self.pgport}.log.{self.log_step}"
)
if pglog_path.exists():
Expand Down Expand Up @@ -299,8 +296,7 @@ def restart_with_changes(
"-l",
# We log to pg{self.pgport}.log instead of pg.log so that different PostgresConn objects
# don't all try to write to the same file.
self.dbgym_workspace.cur_task_runs_artifacts_path(mkdir=True)
/ f"pg{self.pgport}.log",
self.dbgym_workspace.dbgym_this_run_path / f"pg{self.pgport}.log",
"start",
].run(retcode=None)

Expand Down
5 changes: 3 additions & 2 deletions env/tuning_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,10 @@ def __init__(
self, dbgym_workspace: DBGymWorkspace, metadata: TuningMetadata
) -> None:
self.dbgym_workspace = dbgym_workspace
self.tuning_artifacts_path = self.dbgym_workspace.cur_task_runs_artifacts_path(
"tuning_artifacts", mkdir=True
self.tuning_artifacts_path = (
self.dbgym_workspace.dbgym_this_run_path / "tuning_artifacts"
)
self.tuning_artifacts_path.mkdir(parents=False, exist_ok=False)
assert is_fully_resolved(self.tuning_artifacts_path)
self.next_step_num = 0

Expand Down
2 changes: 1 addition & 1 deletion task.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def task(ctx: click.Context) -> None:
dbgym_workspace = make_standard_dbgym_workspace()
ctx.obj = dbgym_workspace

log_path = dbgym_workspace.cur_task_runs_artifacts_path(mkdir=True)
log_path = dbgym_workspace.dbgym_this_run_path
set_up_loggers(log_path)
set_up_warnings(log_path)

Expand Down
69 changes: 0 additions & 69 deletions util/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,30 +50,6 @@ def get_latest_run_path_from_workspace_path(workspace_path: Path) -> Path:
DEFAULT_BOOT_CONFIG_PATH = POSTGRES_PATH / "default_boot_config.yaml"


# Paths of dependencies in the workspace. These are named "*_path" because they will be an absolute path
# The reason these _cannot_ be relative paths is because relative paths are relative to the codebase root, not the workspace root
# Note that it's okay to hardcode the codebase paths (like dbgym_dbms_postgres) here. In the worst case, we'll just break an
# integration test. The "source of truth" of codebase paths is based on DBGymWorkspace.cur_source_path(), which will always
# reflect the actual codebase structure. As long as we automatically enforce getting the right codebase paths when writing, it's
# ok to have to hardcode them when reading.
# Details
# - If a name already has the workload_name, I omit scale factor. This is because the workload_name includes the scale factor
# - By convention, symlinks should end with ".link". The bug that motivated this decision involved replaying a tuning run. When
# replaying a tuning run, you read the tuning_steps/ folder of the tuning run. Earlier, I created a symlink to that tuning_steps/
# folder called run_*/*/tuning_steps. However, replay itself generates an replay_info.log file, which goes in
# run_*/*/tuning_steps/. The bug was that my replay function was overwriting the replay_info.log file of the
# tuning run. By naming all symlinks "*.link", we avoid the possibility of subtle bugs like this happening.
def get_default_workload_path(
workspace_path: Path, benchmark_name: str, workload_name: str
) -> Path:
return (
get_symlinks_path_from_workspace_path(workspace_path)
/ f"dbgym_benchmark_{benchmark_name}"
/ "data"
/ (workload_name + ".link")
)


SCALE_FACTOR_PLACEHOLDER: str = "[scale_factor]"


Expand Down Expand Up @@ -323,51 +299,6 @@ def open_and_save(self, open_path: Path, mode: str = "r") -> IO[Any]:
def append_group(self, name: str) -> None:
self.cur_path_list.append(name)

def cur_source_path(self, *dirs: str) -> Path:
cur_path = self.base_dbgym_repo_path
assert self.cur_path_list[0] == "dbgym"
for folder in self.cur_path_list[1:]:
cur_path = cur_path / folder
for dir in dirs:
cur_path = cur_path / dir
return cur_path

def _cur_symlinks_path(self, *dirs: str, mkdir: bool = False) -> Path:
flattened_structure = "_".join(self.cur_path_list)
cur_path = self.dbgym_symlinks_path / flattened_structure
for dir in dirs:
cur_path = cur_path / dir
if mkdir:
cur_path.mkdir(parents=True, exist_ok=True)
return cur_path

def cur_task_runs_path(self, *dirs: str, mkdir: bool = False) -> Path:
flattened_structure = "_".join(self.cur_path_list)
cur_path = self.dbgym_this_run_path / flattened_structure
for dir in dirs:
cur_path = cur_path / dir
if mkdir:
cur_path.mkdir(parents=True, exist_ok=True)
return cur_path

def cur_symlinks_bin_path(self, *dirs: str, mkdir: bool = False) -> Path:
return self._cur_symlinks_path("bin", *dirs, mkdir=mkdir)

def cur_symlinks_build_path(self, *dirs: str, mkdir: bool = False) -> Path:
return self._cur_symlinks_path("build", *dirs, mkdir=mkdir)

def cur_symlinks_data_path(self, *dirs: str, mkdir: bool = False) -> Path:
return self._cur_symlinks_path("data", *dirs, mkdir=mkdir)

def cur_task_runs_build_path(self, *dirs: str, mkdir: bool = False) -> Path:
return self.cur_task_runs_path("build", *dirs, mkdir=mkdir)

def cur_task_runs_data_path(self, *dirs: str, mkdir: bool = False) -> Path:
return self.cur_task_runs_path("data", *dirs, mkdir=mkdir)

def cur_task_runs_artifacts_path(self, *dirs: str, mkdir: bool = False) -> Path:
return self.cur_task_runs_path("artifacts", *dirs, mkdir=mkdir)


def get_workspace_path_from_config(dbgym_config_path: Path) -> Path:
"""
Expand Down

0 comments on commit 17f89ff

Please sign in to comment.