diff --git a/README.md b/README.md
index c1bb8574..7989c40a 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,86 @@
-# Database Gym
\ No newline at end of file
+# 🛢️ Database Gym 🏋️
+[\[Slides\]](http://www.cidrdb.org/cidr2023/slides/p27-lim-slides.pdf) [\[Paper\]](https://www.cidrdb.org/cidr2023/papers/p27-lim.pdf)
+
+*An end-to-end research vehicle for the field of self-driving DBMSs.*
+
+## Quickstart
+
+These steps were tested on a fresh repository clone, Ubuntu 22.04.
+
+```
+# Setup dependencies.
+# You may want to create a Python virtual environment (e.g. with conda) before doing this.
+./dependency/install_dependencies.sh
+
+# Compile a custom fork of PostgreSQL, load TPC-H (SF 0.01), train the Proto-X agent, and tune.
+./scripts/quickstart.sh postgres tpch 0.01 protox
+```
+
+## Overview
+
+Autonomous DBMS research often involves more engineering than research.
+As new advances in state-of-the-art technology are made, it is common to find that they have have
+reimplemented the database tuning pipeline from scratch: workload capture, database setup,
+training data collection, model creation, model deployment, and more.
+Moreover, these bespoke pipelines make it difficult to combine different techniques even when they
+should be independent (e.g., using a different operator latency model in a tuning algorithm).
+
+The database gym project is our attempt at standardizing the APIs between these disparate tasks,
+allowing researchers to mix-and-match the different pipeline components.
+It draws inspiration from the Farama Foundation's Gymnasium (formerly OpenAI Gym), which
+accelerates the development and comparison of reinforcement learning algorithms by providing a set
+of agents, environments, and a standardized API for communicating between them.
+Through the database gym, we hope to save other people time and reimplementation effort by
+providing an extensible open-source platform for autonomous DBMS research.
+
+This project is under active development.
+Currently, we decompose the database tuning pipeline into the following components:
+
+1. Workload: collection, forecasting, synthesis
+2. Database: database loading, instrumentation, orchestrating workload execution
+3. Agent: identifying tuning actions, suggesting an action
+
+## Repository Structure
+
+`task.py` is the entrypoint for all tasks.
+The tasks are grouped into categories that correspond to the top-level directories of the repository:
+
+- `benchmark` - tasks to generate data and queries for different benchmarks (e.g., TPC-H, JOB)
+- `dbms` - tasks to build and start DBMSs (e.g., PostgreSQL)
+- `tune` - tasks to train autonomous database tuning agents
+
+## Credits
+
+The Database Gym project rose from the ashes of the [NoisePage](https://db.cs.cmu.edu/projects/noisepage/) self-driving DBMS project.
+
+The first prototype was written by [Patrick Wang](https://github.com/wangpatrick57), integrating [Boot (VLDB 2024)](https://github.com/lmwnshn/boot) and [Proto-X (VLDB 2024)](https://github.com/17zhangw/protox) into a cohesive system.
+
+## Citing This Repository
+
+If you use this repository in an academic paper, please cite:
+
+```
+@inproceedings{lim23,
+  author = {Lim, Wan Shen and Butrovich, Matthew and Zhang, William and Crotty, Andrew and Ma, Lin and Xu, Peijing and Gehrke, Johannes and Pavlo, Andrew},
+  title = {Database Gyms},
+  booktitle = {{CIDR} 2023, Conference on Innovative Data Systems Research},
+  year = {2023},
+  url = {https://db.cs.cmu.edu/papers/2023/p27-lim.pdf},
+ }
+```
+
+Additionally, please cite any module-specific paper that is relevant to your use.
+
+**Accelerating Training Data Generation**
+
+```
+(citation pending)
+Boot, appearing at VLDB 2024.
+```
+
+**Simultaneously Tuning Multiple Configuration Spaces with Proto Actions**
+
+```
+(citation pending)
+Proto-X, appearing at VLDB 2024.
+```
diff --git a/benchmark/tpch/cli.py b/benchmark/tpch/cli.py
index d5c8c407..82adeff5 100644
--- a/benchmark/tpch/cli.py
+++ b/benchmark/tpch/cli.py
@@ -21,8 +21,8 @@ def tpch_group(dbgym_cfg: DBGymConfig):
 @tpch_group.command(name="data")
 @click.argument("scale-factor", type=float)
 @click.pass_obj
-# The reason generate-data is separate from create-pgdata is because generate-data is generic
-#   to all DBMSs while create-pgdata is specific to Postgres.
+# The reason generate data is separate from create dbdata is because generate-data is generic
+#   to all DBMSs while create dbdata is specific to a single DBMS.
 def tpch_data(dbgym_cfg: DBGymConfig, scale_factor: float):
     _clone(dbgym_cfg)
     _generate_data(dbgym_cfg, scale_factor)
diff --git a/dbms/postgres/build_repo.sh b/dbms/postgres/build_repo.sh
index 16774edd..271f7056 100755
--- a/dbms/postgres/build_repo.sh
+++ b/dbms/postgres/build_repo.sh
@@ -4,34 +4,34 @@ set -euxo pipefail
 
 REPO_REAL_PARENT_DPATH="$1"
 
-# download and make postgres from the boot repository
+# Download and make postgres from the boot repository.
 mkdir -p "${REPO_REAL_PARENT_DPATH}"
 cd "${REPO_REAL_PARENT_DPATH}"
-git clone git@github.com:lmwnshn/boot.git --single-branch --branch boot --depth 1
+git clone git@github.com:lmwnshn/boot.git --single-branch --branch vldb_2024 --depth 1
 cd ./boot
 ./cmudb/build/configure.sh release "${REPO_REAL_PARENT_DPATH}/boot/build/postgres"
 make clean
 make install-world-bin -j4
 
-# download and make bytejack
-cd ./cmudb/extension/bytejack_rs/
+# Download and make boot.
+cd ./cmudb/extension/boot_rs/
 cargo build --release
-cbindgen . -o target/bytejack_rs.h --lang c
+cbindgen . -o target/boot_rs.h --lang c
 cd "${REPO_REAL_PARENT_DPATH}/boot"
 
-cd ./cmudb/extension/bytejack/
+cd ./cmudb/extension/boot/
 make clean
 make install -j
 cd "${REPO_REAL_PARENT_DPATH}/boot"
 
-# download and make hypopg
+# Download and make hypopg.
 git clone git@github.com:HypoPG/hypopg.git
 cd ./hypopg
 PG_CONFIG="${REPO_REAL_PARENT_DPATH}/boot/build/postgres/bin/pg_config" make install
 cd "${REPO_REAL_PARENT_DPATH}/boot"
 
-# download and make pg_hint_plan
-# we need -L to follow links
+# Download and make pg_hint_plan.
+# We need -L to follow links.
 curl -L https://github.com/ossc-db/pg_hint_plan/archive/refs/tags/REL15_1_5_1.tar.gz -o REL15_1_5_1.tar.gz
 tar -xzf REL15_1_5_1.tar.gz
 rm REL15_1_5_1.tar.gz
diff --git a/dbms/postgres/cli.py b/dbms/postgres/cli.py
index 75b03650..f81a877f 100644
--- a/dbms/postgres/cli.py
+++ b/dbms/postgres/cli.py
@@ -1,5 +1,5 @@
 """
-At a high level, this file's goal is to (1) install+build postgres and (2) create pgdata.
+At a high level, this file's goal is to (1) build postgres and (2) create dbdata (aka pgdata).
 On the other hand, the goal of tune.protox.env.util.postgres is to provide helpers to manage
     a Postgres instance during agent tuning.
 util.pg provides helpers used by *both* of the above files (as well as other files).
@@ -10,11 +10,10 @@
 import subprocess
 from pathlib import Path
 import click
-import ssd_checker
 
 from benchmark.tpch.load_info import TpchLoadInfo
 from dbms.load_info_base_class import LoadInfoBaseClass
-from misc.utils import DBGymConfig, conv_inputpath_to_realabspath, link_result, open_and_save, save_file, get_pgdata_tgz_name, default_pgbin_path, WORKSPACE_PATH_PLACEHOLDER, default_pgdata_parent_dpath
+from misc.utils import DBGymConfig, conv_inputpath_to_realabspath, link_result, open_and_save, save_file, get_dbdata_tgz_name, default_pgbin_path, WORKSPACE_PATH_PLACEHOLDER, default_dbdata_parent_dpath, is_ssd
 from util.shell import subprocess_run
 from sqlalchemy import Connection
 from util.pg import SHARED_PRELOAD_LIBRARIES, conn_execute, sql_file_execute, DBGYM_POSTGRES_DBNAME, create_conn, DEFAULT_POSTGRES_PORT, DBGYM_POSTGRES_USER, DBGYM_POSTGRES_PASS, DEFAULT_POSTGRES_DBNAME
@@ -32,7 +31,7 @@ def postgres_group(dbgym_cfg: DBGymConfig):
 
 @postgres_group.command(
     name="build",
-    help="Download and build the Postgres repository and all necessary extensions/shared libraries. Does not create pgdata.",
+    help="Download and build the Postgres repository and all necessary extensions/shared libraries. Does not create dbdata.",
 )
 @click.pass_obj
 @click.option("--rebuild", is_flag=True, help="Include this flag to rebuild Postgres even if it already exists.")
@@ -41,46 +40,46 @@ def postgres_build(dbgym_cfg: DBGymConfig, rebuild: bool):
 
 
 @postgres_group.command(
-    name="pgdata",
-    help="Build a .tgz file of pgdata with various specifications for its contents.",
+    name="dbdata",
+    help="Build a .tgz file of dbdata with various specifications for its contents.",
 )
 @click.pass_obj
 @click.argument("benchmark_name", type=str)
 @click.option("--scale-factor", type=float, default=1)
 @click.option("--pgbin-path", type=Path, default=None, help=f"The path to the bin containing Postgres executables. The default is {default_pgbin_path(WORKSPACE_PATH_PLACEHOLDER)}.")
 @click.option(
-    "--intended-pgdata-hardware",
+    "--intended-dbdata-hardware",
     type=click.Choice(["hdd", "ssd"]),
     default="hdd",
-    help=f"The intended hardware pgdata should be on. Used as a sanity check for --pgdata-parent-dpath.",
+    help=f"The intended hardware dbdata should be on. Used as a sanity check for --dbdata-parent-dpath.",
 )
 @click.option(
-    "--pgdata-parent-dpath",
+    "--dbdata-parent-dpath",
     default=None,
     type=Path,
-    help=f"The path to the parent directory of the pgdata which will be actively tuned. The default is {default_pgdata_parent_dpath(WORKSPACE_PATH_PLACEHOLDER)}.",
+    help=f"The path to the parent directory of the dbdata which will be actively tuned. The default is {default_dbdata_parent_dpath(WORKSPACE_PATH_PLACEHOLDER)}.",
 )
-def postgres_pgdata(dbgym_cfg: DBGymConfig, benchmark_name: str, scale_factor: float, pgbin_path: Path, intended_pgdata_hardware: str, pgdata_parent_dpath: Path):
+def postgres_dbdata(dbgym_cfg: DBGymConfig, benchmark_name: str, scale_factor: float, pgbin_path: Path, intended_dbdata_hardware: str, dbdata_parent_dpath: Path):
     # Set args to defaults programmatically (do this before doing anything else in the function)
     if pgbin_path == None:
         pgbin_path = default_pgbin_path(dbgym_cfg.dbgym_workspace_path)
-    if pgdata_parent_dpath == None:
-        pgdata_parent_dpath = default_pgdata_parent_dpath(dbgym_cfg.dbgym_workspace_path)
+    if dbdata_parent_dpath == None:
+        dbdata_parent_dpath = default_dbdata_parent_dpath(dbgym_cfg.dbgym_workspace_path)
 
     # Convert all input paths to absolute paths
     pgbin_path = conv_inputpath_to_realabspath(dbgym_cfg, pgbin_path)
-    pgdata_parent_dpath = conv_inputpath_to_realabspath(dbgym_cfg, pgdata_parent_dpath)
+    dbdata_parent_dpath = conv_inputpath_to_realabspath(dbgym_cfg, dbdata_parent_dpath)
 
     # Check assertions on args
-    if intended_pgdata_hardware == "hdd":
-        assert not ssd_checker.is_ssd(pgdata_parent_dpath), f"Intended hardware is HDD but pgdata_parent_dpath ({pgdata_parent_dpath}) is an SSD"
-    elif intended_pgdata_hardware == "ssd":
-        assert ssd_checker.is_ssd(pgdata_parent_dpath), f"Intended hardware is SSD but pgdata_parent_dpath ({pgdata_parent_dpath}) is an HDD"
+    if intended_dbdata_hardware == "hdd":
+        assert not is_ssd(dbdata_parent_dpath), f"Intended hardware is HDD but dbdata_parent_dpath ({dbdata_parent_dpath}) is an SSD"
+    elif intended_dbdata_hardware == "ssd":
+        assert is_ssd(dbdata_parent_dpath), f"Intended hardware is SSD but dbdata_parent_dpath ({dbdata_parent_dpath}) is an HDD"
     else:
         assert False
 
-    # Create pgdata
-    _create_pgdata(dbgym_cfg, benchmark_name, scale_factor, pgbin_path, pgdata_parent_dpath)
+    # Create dbdata
+    _create_dbdata(dbgym_cfg, benchmark_name, scale_factor, pgbin_path, dbdata_parent_dpath)
 
 
 def _get_pgbin_symlink_path(dbgym_cfg: DBGymConfig) -> Path:
@@ -109,52 +108,52 @@ def _build_repo(dbgym_cfg: DBGymConfig, rebuild):
     dbms_postgres_logger.info(f"Set up repo in {expected_repo_symlink_dpath}")
 
 
-def _create_pgdata(dbgym_cfg: DBGymConfig, benchmark_name: str, scale_factor: float, pgbin_path: Path, pgdata_parent_dpath: Path) -> None:
+def _create_dbdata(dbgym_cfg: DBGymConfig, benchmark_name: str, scale_factor: float, pgbin_path: Path, dbdata_parent_dpath: Path) -> None:
     """
-    I chose *not* for this function to skip by default if pgdata_tgz_symlink_path already exists. This
+    I chose *not* for this function to skip by default if dbdata_tgz_symlink_path already exists. This
       is because, while the generated data is deterministic given benchmark_name and scale_factor, any
-      change in the _create_pgdata() function would result in a different pgdata. Since _create_pgdata()
+      change in the _create_dbdata() function would result in a different dbdata. Since _create_dbdata()
       may change somewhat frequently, I decided to get rid of the footgun of having changes to
-      _create_pgdata() not propagate to [pgdata].tgz by default.
+      _create_dbdata() not propagate to [dbdata].tgz by default.
     """
 
-    # It's ok for the pgdata/ directory to be temporary. It just matters that the .tgz is saved in a safe place.
-    pgdata_dpath = pgdata_parent_dpath / "pgdata_being_created"
-    # We might be reusing the same pgdata_parent_dpath, so delete pgdata_dpath if it already exists
-    if pgdata_dpath.exists():
-        shutil.rmtree(pgdata_dpath)
+    # It's ok for the dbdata/ directory to be temporary. It just matters that the .tgz is saved in a safe place.
+    dbdata_dpath = dbdata_parent_dpath / "dbdata_being_created"
+    # We might be reusing the same dbdata_parent_dpath, so delete dbdata_dpath if it already exists
+    if dbdata_dpath.exists():
+        shutil.rmtree(dbdata_dpath)
 
     # Call initdb.
     # Save any script we call from pgbin_symlink_dpath because they are dependencies generated from another task run.
     save_file(dbgym_cfg, pgbin_path / "initdb")
-    subprocess_run(f'./initdb -D "{pgdata_dpath}"', cwd=pgbin_path)
+    subprocess_run(f'./initdb -D "{dbdata_dpath}"', cwd=pgbin_path)
 
-    # Start Postgres (all other pgdata setup requires postgres to be started).
+    # Start Postgres (all other dbdata setup requires postgres to be started).
     # Note that subprocess_run() never returns when running "pg_ctl start", so I'm using subprocess.run() instead.
-    start_postgres(dbgym_cfg, pgbin_path, pgdata_dpath)
+    start_postgres(dbgym_cfg, pgbin_path, dbdata_dpath)
 
     # Set up Postgres.
-    _generic_pgdata_setup(dbgym_cfg)
-    _load_benchmark_into_pgdata(dbgym_cfg, benchmark_name, scale_factor)
+    _generic_dbdata_setup(dbgym_cfg)
+    _load_benchmark_into_dbdata(dbgym_cfg, benchmark_name, scale_factor)
 
     # Stop Postgres so that we don't "leak" processes.
-    stop_postgres(dbgym_cfg, pgbin_path, pgdata_dpath)
+    stop_postgres(dbgym_cfg, pgbin_path, dbdata_dpath)
 
     # Create .tgz file.
-    # Note that you can't pass "[pgdata].tgz" as an arg to cur_task_runs_data_path() because that would create "[pgdata].tgz" as a dir.
-    pgdata_tgz_real_fpath = dbgym_cfg.cur_task_runs_data_path(
+    # Note that you can't pass "[dbdata].tgz" as an arg to cur_task_runs_data_path() because that would create "[dbdata].tgz" as a dir.
+    dbdata_tgz_real_fpath = dbgym_cfg.cur_task_runs_data_path(
         mkdir=True
-    ) / get_pgdata_tgz_name(benchmark_name, scale_factor)
-    # We need to cd into pgdata_dpath so that the tar file does not contain folders for the whole path of pgdata_dpath.
-    subprocess_run(f"tar -czf {pgdata_tgz_real_fpath} .", cwd=pgdata_dpath)
+    ) / get_dbdata_tgz_name(benchmark_name, scale_factor)
+    # We need to cd into dbdata_dpath so that the tar file does not contain folders for the whole path of dbdata_dpath.
+    subprocess_run(f"tar -czf {dbdata_tgz_real_fpath} .", cwd=dbdata_dpath)
 
     # Create symlink.
-    # Only link at the end so that the link only ever points to a complete pgdata.
-    pgdata_tgz_symlink_path = link_result(dbgym_cfg, pgdata_tgz_real_fpath)
-    dbms_postgres_logger.info(f"Created pgdata in {pgdata_tgz_symlink_path}")
+    # Only link at the end so that the link only ever points to a complete dbdata.
+    dbdata_tgz_symlink_path = link_result(dbgym_cfg, dbdata_tgz_real_fpath)
+    dbms_postgres_logger.info(f"Created dbdata in {dbdata_tgz_symlink_path}")
 
 
-def _generic_pgdata_setup(dbgym_cfg: DBGymConfig):
+def _generic_dbdata_setup(dbgym_cfg: DBGymConfig):
     # get necessary vars
     pgbin_real_dpath = _get_pgbin_symlink_path(dbgym_cfg).resolve()
     assert pgbin_real_dpath.exists()
@@ -182,15 +181,15 @@ def _generic_pgdata_setup(dbgym_cfg: DBGymConfig):
             cwd=pgbin_real_dpath,
         )
 
-    # Create the dbgym database. since one pgdata dir maps to one benchmark, all benchmarks will use the same database
-    # as opposed to using databases named after the benchmark
+    # Create the dbgym database. Since one dbdata dir maps to one benchmark, all benchmarks will use the same database
+    # as opposed to using databases named after the benchmark.
     subprocess_run(
         f"./psql -c \"create database {DBGYM_POSTGRES_DBNAME} with owner = '{dbgym_pguser}'\" {DEFAULT_POSTGRES_DBNAME} -p {pgport} -h localhost",
         cwd=pgbin_real_dpath,
     )
 
 
-def _load_benchmark_into_pgdata(
+def _load_benchmark_into_dbdata(
     dbgym_cfg: DBGymConfig, benchmark_name: str, scale_factor: float
 ):
     with create_conn(use_psycopg=False) as conn:
@@ -198,13 +197,13 @@ def _load_benchmark_into_pgdata(
             load_info = TpchLoadInfo(dbgym_cfg, scale_factor)
         else:
             raise AssertionError(
-                f"_load_benchmark_into_pgdata(): the benchmark of name {benchmark_name} is not implemented"
+                f"_load_benchmark_into_dbdata(): the benchmark of name {benchmark_name} is not implemented"
             )
 
-        _load_into_pgdata(dbgym_cfg, conn, load_info)
+        _load_into_dbdata(dbgym_cfg, conn, load_info)
 
 
-def _load_into_pgdata(dbgym_cfg: DBGymConfig, conn: Connection, load_info: LoadInfoBaseClass):
+def _load_into_dbdata(dbgym_cfg: DBGymConfig, conn: Connection, load_info: LoadInfoBaseClass):
     sql_file_execute(dbgym_cfg, conn, load_info.get_schema_fpath())
 
     # truncate all tables first before even loading a single one
@@ -223,21 +222,21 @@ def _load_into_pgdata(dbgym_cfg: DBGymConfig, conn: Connection, load_info: LoadI
         sql_file_execute(dbgym_cfg, conn, constraints_fpath)
 
 
-def start_postgres(dbgym_cfg: DBGymConfig, pgbin_path: Path, pgdata_dpath: Path) -> None:
-    _start_or_stop_postgres(dbgym_cfg, pgbin_path, pgdata_dpath, True)
+def start_postgres(dbgym_cfg: DBGymConfig, pgbin_path: Path, dbdata_dpath: Path) -> None:
+    _start_or_stop_postgres(dbgym_cfg, pgbin_path, dbdata_dpath, True)
 
 
-def stop_postgres(dbgym_cfg: DBGymConfig, pgbin_path: Path, pgdata_dpath: Path) -> None:
-    _start_or_stop_postgres(dbgym_cfg, pgbin_path, pgdata_dpath, False)
+def stop_postgres(dbgym_cfg: DBGymConfig, pgbin_path: Path, dbdata_dpath: Path) -> None:
+    _start_or_stop_postgres(dbgym_cfg, pgbin_path, dbdata_dpath, False)
 
 
-def _start_or_stop_postgres(dbgym_cfg: DBGymConfig, pgbin_path: Path, pgdata_dpath: Path, is_start: bool) -> None:
+def _start_or_stop_postgres(dbgym_cfg: DBGymConfig, pgbin_path: Path, dbdata_dpath: Path, is_start: bool) -> None:
     # They should be absolute paths and should exist
     assert pgbin_path.is_absolute() and pgbin_path.exists()
-    assert pgdata_dpath.is_absolute() and pgdata_dpath.exists()
+    assert dbdata_dpath.is_absolute() and dbdata_dpath.exists()
     # The inputs may be symlinks so we need to resolve them first
     pgbin_real_dpath = pgbin_path.resolve()
-    pgdata_dpath = pgdata_dpath.resolve()
+    dbdata_dpath = dbdata_dpath.resolve()
     pgport = DEFAULT_POSTGRES_PORT
     save_file(dbgym_cfg, pgbin_real_dpath / "pg_ctl")
 
@@ -245,7 +244,7 @@ def _start_or_stop_postgres(dbgym_cfg: DBGymConfig, pgbin_path: Path, pgdata_dpa
         # We use subprocess.run() because subprocess_run() never returns when running "pg_ctl start".
         # The reason subprocess_run() never returns is because pg_ctl spawns a postgres process so .poll() always returns None.
         # On the other hand, subprocess.run() does return normally, like calling `./pg_ctl` on the command line would do.
-        result = subprocess.run(f"./pg_ctl -D \"{pgdata_dpath}\" -o '-p {pgport}' start", cwd=pgbin_real_dpath, shell=True)
+        result = subprocess.run(f"./pg_ctl -D \"{dbdata_dpath}\" -o '-p {pgport}' start", cwd=pgbin_real_dpath, shell=True)
         result.check_returncode()
     else:
-        subprocess_run(f"./pg_ctl -D \"{pgdata_dpath}\" -o '-p {pgport}' stop", cwd=pgbin_real_dpath)
\ No newline at end of file
+        subprocess_run(f"./pg_ctl -D \"{dbdata_dpath}\" -o '-p {pgport}' stop", cwd=pgbin_real_dpath)
\ No newline at end of file
diff --git a/dependency/install_dependencies.sh b/dependency/install_dependencies.sh
new file mode 100755
index 00000000..8a516880
--- /dev/null
+++ b/dependency/install_dependencies.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# You may want to create a conda environment before doing this
+pip install -r dependency/requirements.txt
+cat dependency/apt_requirements.txt | xargs sudo apt-get install -y
+curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
\ No newline at end of file
diff --git a/dependency/requirements.txt b/dependency/requirements.txt
index 1b58b1c9..ba32594c 100644
--- a/dependency/requirements.txt
+++ b/dependency/requirements.txt
@@ -122,5 +122,4 @@ virtualenv==20.25.0
 Werkzeug==3.0.1
 wrapt==1.14.1
 zipp==3.17.0
-ssd_checker==1.0.3
 redis==5.0.3
diff --git a/dependency/rust.sh b/dependency/rust.sh
deleted file mode 100755
index 9af316fc..00000000
--- a/dependency/rust.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
\ No newline at end of file
diff --git a/experiments/load_per_machine_envvars.sh b/experiments/load_per_machine_envvars.sh
index 905c6c01..b9772d3c 100644
--- a/experiments/load_per_machine_envvars.sh
+++ b/experiments/load_per_machine_envvars.sh
@@ -2,9 +2,9 @@
 host=$(hostname)
 
 if [ "$host" == "dev4" ]; then
-    export PGDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
+    export DBDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
 elif [ "$host" == "dev6" ]; then
-    export PGDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/
+    export DBDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/
 else
     echo "Did not recognize host \"$host\""
     exit 1
diff --git a/experiments/protox_tpch_sf0point1/main.sh b/experiments/protox_tpch_sf0point1/main.sh
index 5a111a4f..480f28ca 100755
--- a/experiments/protox_tpch_sf0point1/main.sh
+++ b/experiments/protox_tpch_sf0point1/main.sh
@@ -3,31 +3,31 @@
 set -euxo pipefail
 
 SCALE_FACTOR=0.1
-INTENDED_PGDATA_HARDWARE=ssd
+INTENDED_DBDATA_HARDWARE=ssd
 . ./experiments/load_per_machine_envvars.sh
-echo $PGDATA_PARENT_DPATH
+echo $DBDATA_PARENT_DPATH
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-# python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 0.1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.2
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+# python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 0.1  --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH
+python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.2
+python3 task.py tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
 # benchmark
-python3 task.py --no-startup-check benchmark tpch data $SCALE_FACTOR
-python3 task.py --no-startup-check benchmark tpch workload --scale-factor $SCALE_FACTOR
+python3 task.py benchmark tpch data $SCALE_FACTOR
+python3 task.py benchmark tpch workload --scale-factor $SCALE_FACTOR
 
 # postgres
-python3 task.py --no-startup-check dbms postgres build
-python3 task.py --no-startup-check dbms postgres pgdata tpch --scale-factor $SCALE_FACTOR --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
+python3 task.py dbms postgres build
+python3 task.py dbms postgres dbdata tpch --scale-factor $SCALE_FACTOR --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH
 
 exit 0
 
 # embedding
-python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH # long datagen so that train doesn't crash
-python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
+python3 task.py tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH # long datagen so that train doesn't crash
+python3 task.py tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 1  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 1  --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH --build-space-good-for-boot
+python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py tune protox agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/experiments/protox_tpch_sf10/main.sh b/experiments/protox_tpch_sf10/main.sh
index 2627c942..62814340 100755
--- a/experiments/protox_tpch_sf10/main.sh
+++ b/experiments/protox_tpch_sf10/main.sh
@@ -3,29 +3,29 @@
 set -euxo pipefail
 
 SCALE_FACTOR=10
-INTENDED_PGDATA_HARDWARE=ssd
+INTENDED_DBDATA_HARDWARE=ssd
 . ./experiments/load_per_machine_envvars.sh
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
-# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 4
-# python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune --tune-duration-during-tune 4
-# python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
-# python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR --boot-enabled-during-tune
+python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH --build-space-good-for-boot
+# python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 4
+# python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune --tune-duration-during-tune 4
+# python3 task.py tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+# python3 task.py tune protox agent replay tpch --scale-factor $SCALE_FACTOR --boot-enabled-during-tune
 exit 0
 
 # benchmark
-python3 task.py --no-startup-check benchmark tpch data $SCALE_FACTOR
-python3 task.py --no-startup-check benchmark tpch workload --scale-factor $SCALE_FACTOR
+python3 task.py benchmark tpch data $SCALE_FACTOR
+python3 task.py benchmark tpch workload --scale-factor $SCALE_FACTOR
 
 # postgres
-python3 task.py --no-startup-check dbms postgres build
-python3 task.py --no-startup-check dbms postgres pgdata tpch --scale-factor $SCALE_FACTOR --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
+python3 task.py dbms postgres build
+python3 task.py dbms postgres dbdata tpch --scale-factor $SCALE_FACTOR --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH
 
 # embedding
-python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
-python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --train-max-concurrent 10
+python3 task.py tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH
+python3 task.py tune protox embedding train tpch --scale-factor $SCALE_FACTOR --train-max-concurrent 10
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH --build-space-good-for-boot
+python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR
diff --git a/misc/utils.py b/misc/utils.py
index bec81d97..fb1dbde4 100644
--- a/misc/utils.py
+++ b/misc/utils.py
@@ -48,8 +48,8 @@ def get_scale_factor_string(scale_factor: float | str) -> str:
         else:
             return str(scale_factor).replace(".", "point")
     
-def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
-    return f"{benchmark_name}_sf{get_scale_factor_string(scale_factor)}_pristine_pgdata.tgz"
+def get_dbdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
+    return f"{benchmark_name}_sf{get_scale_factor_string(scale_factor)}_pristine_dbdata.tgz"
 
 
 # Other parameters
@@ -134,15 +134,15 @@ def get_pgdata_tgz_name(benchmark_name: str, scale_factor: float) -> str:
     / "data"
     / (workload_name + ".link")
 )
-default_pristine_pgdata_snapshot_path = (
+default_pristine_dbdata_snapshot_path = (
     lambda workspace_path, benchmark_name, scale_factor: get_symlinks_path_from_workspace_path(
         workspace_path
     )
     / "dbgym_dbms_postgres"
     / "data"
-    / (get_pgdata_tgz_name(benchmark_name, scale_factor) + ".link")
+    / (get_dbdata_tgz_name(benchmark_name, scale_factor) + ".link")
 )
-default_pgdata_parent_dpath = (
+default_dbdata_parent_dpath = (
     lambda workspace_path: get_tmp_path_from_workspace_path(
         workspace_path
     )
@@ -166,13 +166,11 @@ class DBGymConfig:
     Global configurations that apply to all parts of DB-Gym
     """
 
-    def __init__(self, config_path, startup_check=False):
+    def __init__(self, config_path):
         """
         Parameters
         ----------
         config_path : Path
-        startup_check : bool
-            True if startup_check shoul
         """
         assert is_base_git_dir(
             os.getcwd()
@@ -188,18 +186,6 @@ def __init__(self, config_path, startup_check=False):
             Path(yaml_config["dbgym_workspace_path"]).resolve().absolute()
         )
 
-        # Quickly display options.
-        if startup_check:
-            msg = (
-                "💩💩💩 CMU-DB Database Gym: github.com/cmu-db/dbgym 💩💩💩\n"
-                f"\tdbgym_workspace_path: {dbgym_workspace_path}\n"
-                "\n"
-                "Proceed?"
-            )
-            if not click.confirm(msg):
-                print("Goodbye.")
-                sys.exit(0)
-
         self.path: Path = config_path
         self.cur_path_list: list[str] = ["dbgym"]
         self.root_yaml: dict = yaml_config
@@ -216,8 +202,8 @@ def __init__(self, config_path, startup_check=False):
         )
         self.dbgym_symlinks_path.mkdir(parents=True, exist_ok=True)
         # tmp is a workspace for this run only
-        # one use for it is to place the unzipped pgdata
-        # there's no need to save the actual pgdata dir in run_*/ because we just save a symlink to
+        # one use for it is to place the unzipped dbdata
+        # there's no need to save the actual dbdata dir in run_*/ because we just save a symlink to
         #   the .tgz file we unzipped
         self.dbgym_tmp_path = get_tmp_path_from_workspace_path(self.dbgym_workspace_path)
         if self.dbgym_tmp_path.exists():
@@ -589,3 +575,19 @@ def make_redis_started(port: int):
         # When you start Redis in daemon mode, it won't let you know if it's started, so we ping again to check
         r = redis.Redis(port=port)
         r.ping()
+
+
+def is_ssd(path: Path) -> bool:
+    try:
+        device = subprocess.check_output(['df', path]).decode().split('\n')[1].split()[0]
+        device_basename = os.path.basename(device)
+        lsblk_output = subprocess.check_output(['lsblk', '-d', '-o', 'name,rota']).decode()
+        for line in lsblk_output.split('\n')[1:]:
+            parts = line.split()
+            if parts and parts[0] == device_basename:
+                is_ssd = int(parts[1]) == 0
+                return is_ssd
+        return False
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return False
\ No newline at end of file
diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh
index afab9108..4353a5dc 100755
--- a/scripts/pat_test.sh
+++ b/scripts/pat_test.sh
@@ -3,31 +3,31 @@
 set -euxo pipefail
 
 SCALE_FACTOR=0.01
-INTENDED_PGDATA_HARDWARE=ssd
+INTENDED_DBDATA_HARDWARE=ssd
 . ./experiments/load_per_machine_envvars.sh
 
 # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.02
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH --build-space-good-for-boot
+python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.02
+python3 task.py tune protox agent replay tpch --scale-factor $SCALE_FACTOR
 exit 0
 
 # benchmark
-python3 task.py --no-startup-check benchmark tpch data $SCALE_FACTOR
-python3 task.py --no-startup-check benchmark tpch workload --scale-factor $SCALE_FACTOR
+python3 task.py benchmark tpch data $SCALE_FACTOR
+python3 task.py benchmark tpch workload --scale-factor $SCALE_FACTOR
 
 # postgres
-python3 task.py --no-startup-check dbms postgres build
-python3 task.py --no-startup-check dbms postgres pgdata tpch --scale-factor $SCALE_FACTOR --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH
+python3 task.py dbms postgres build
+python3 task.py dbms postgres dbdata tpch --scale-factor $SCALE_FACTOR --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH
 
 exit 0
 
 # embedding
-# python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --default-sample-limit 64 --file-limit 64 --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH # short datagen for testing
-python3 task.py --no-startup-check tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH # long datagen so that train doesn't crash
-python3 task.py --no-startup-check tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
+# python3 task.py tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --default-sample-limit 64 --file-limit 64 --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH # short datagen for testing
+python3 task.py tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH # long datagen so that train doesn't crash
+python3 task.py tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
 
 # agent
-python3 task.py --no-startup-check tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-pgdata-hardware $INTENDED_PGDATA_HARDWARE --pgdata-parent-dpath $PGDATA_PARENT_DPATH --build-space-good-for-boot
-python3 task.py --no-startup-check tune protox agent tune tpch --scale-factor $SCALE_FACTOR
-python3 task.py --no-startup-check tune protox agent replay tpch --scale-factor $SCALE_FACTOR
+python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01  --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH --build-space-good-for-boot
+python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py tune protox agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/scripts/quickstart.sh b/scripts/quickstart.sh
new file mode 100755
index 00000000..7d082726
--- /dev/null
+++ b/scripts/quickstart.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+set -euxo pipefail
+
+DBMS=$1
+BENCHMARK=$2
+SCALE_FACTOR=$3
+AGENT=$4
+
+# Benchmark
+python3 task.py benchmark $BENCHMARK data $SCALE_FACTOR
+python3 task.py benchmark $BENCHMARK workload --scale-factor $SCALE_FACTOR
+
+# DBMS
+python3 task.py dbms $DBMS build
+python3 task.py dbms $DBMS dbdata tpch --scale-factor $SCALE_FACTOR
+
+# Tune
+python3 task.py tune $AGENT embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" # long datagen so that train doesn't crash
+python3 task.py tune $AGENT embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
+python3 task.py tune $AGENT agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01 --build-space-good-for-boot
+python3 task.py tune $AGENT agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py tune $AGENT agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/scripts/wan_test.sh b/scripts/wan_test.sh
index fb39184f..a700dd31 100755
--- a/scripts/wan_test.sh
+++ b/scripts/wan_test.sh
@@ -3,17 +3,17 @@
 set -euxo pipefail
 
 # Build Postgres
-python3 task.py --no-startup-check dbms postgres repo
+python3 task.py dbms postgres repo
 
 # Generate TPC-H
-python3 task.py --no-startup-check benchmark tpch generate-data 1
-python3 task.py --no-startup-check benchmark tpch generate-workload queries_15721_15723 15721 15723
+python3 task.py benchmark tpch generate-data 1
+python3 task.py benchmark tpch generate-workload queries_15721_15723 15721 15723
 
 # Create tpch_sf1.tgz
-python3 task.py --no-startup-check dbms postgres pgdata tpch --scale-factor 1
+python3 task.py dbms postgres dbdata tpch --scale-factor 1
 
 # Run Proto-X
-python3 task.py --no-startup-check dbms postgres start
-python3 task.py --no-startup-check tune protox embedding datagen tpch queries_15721_15723 --connection-str "host=localhost port=15721 dbname=tpch_sf1 user=noisepage_user password=noisepage_pass" --override-sample-limits "lineitem,32768"
-python3 task.py --no-startup-check tune protox embedding train tpch queries_15721_15723 --iterations-per-epoch 1 --num-samples 4 --train-max-concurrent 4 --num-points-to-sample 32 --max-segments 3
-python3 task.py --no-startup-check dbms postgres stop
+python3 task.py dbms postgres start
+python3 task.py tune protox embedding datagen tpch queries_15721_15723 --connection-str "host=localhost port=15721 dbname=tpch_sf1 user=noisepage_user password=noisepage_pass" --override-sample-limits "lineitem,32768"
+python3 task.py tune protox embedding train tpch queries_15721_15723 --iterations-per-epoch 1 --num-samples 4 --train-max-concurrent 4 --num-points-to-sample 32 --max-segments 3
+python3 task.py dbms postgres stop
diff --git a/task.py b/task.py
index 6f952656..c20cdf62 100644
--- a/task.py
+++ b/task.py
@@ -19,11 +19,10 @@
 
 @click.group()
 @click.option("--config-path", default="config.yaml")
-@click.option("--no-startup-check", is_flag=True)
 @click.pass_context
-def task(ctx, config_path, no_startup_check):
+def task(ctx, config_path):
     """💩💩💩 CMU-DB Database Gym: github.com/cmu-db/dbgym 💩💩💩"""
-    ctx.obj = DBGymConfig(config_path, startup_check=not no_startup_check)
+    ctx.obj = DBGymConfig(config_path)
 
 
 @click.group(name="config")
diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py
index 58e1aeb7..53e782a5 100644
--- a/tune/protox/agent/build_trial.py
+++ b/tune/protox/agent/build_trial.py
@@ -158,8 +158,8 @@ def _build_utilities(
     pg_conn = PostgresConn(
         dbgym_cfg=dbgym_cfg,
         pgport=pgport,
-        pristine_pgdata_snapshot_fpath=Path(hpo_params["pgconn_info"]["pristine_pgdata_snapshot_path"]),
-        pgdata_parent_dpath=Path(hpo_params["pgconn_info"]["pgdata_parent_dpath"]),
+        pristine_dbdata_snapshot_fpath=Path(hpo_params["pgconn_info"]["pristine_dbdata_snapshot_path"]),
+        dbdata_parent_dpath=Path(hpo_params["pgconn_info"]["dbdata_parent_dpath"]),
         pgbin_path=Path(hpo_params["pgconn_info"]["pgbin_path"]),
         enable_boot=enable_boot,
         boot_config_fpath=hpo_params["boot_config_fpath"][str(tuning_mode)],
diff --git a/tune/protox/agent/coerce_config.py b/tune/protox/agent/coerce_config.py
index 3c19900c..db8f06eb 100644
--- a/tune/protox/agent/coerce_config.py
+++ b/tune/protox/agent/coerce_config.py
@@ -35,8 +35,8 @@ def coerce_config(dbgym_cfg: DBGymConfig, space: dict[str, Any], hpo_params: dic
                 "pgport": 5432,
                 "pguser": "admin",
                 "pgpass": "",
-                "pristine_pgdata_snapshot_path": "/mnt/nvme0n1/wz2/noisepage/pgdata",
-                "pgdata_parent_dpath": "/mnt/nvme0n1/wz2/noisepage/",
+                "pristine_dbdata_snapshot_path": "/mnt/nvme0n1/wz2/noisepage/pgdata",
+                "dbdata_parent_dpath": "/mnt/nvme0n1/wz2/noisepage/",
                 "pgbin_path": "/mnt/nvme0n1/wz2/noisepage/",
             },
             "benchmark_config": benchmark_config,
diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py
index 60498514..bc3d8432 100644
--- a/tune/protox/agent/hpo.py
+++ b/tune/protox/agent/hpo.py
@@ -13,7 +13,6 @@
 from typing import Any, Optional, Union
 import random
 import click
-import ssd_checker
 import ray
 from ray.tune import Trainable
 from ray.tune.schedulers import FIFOScheduler
@@ -23,22 +22,22 @@
 from ray.train import SyncConfig
 
 from tune.protox.agent.build_trial import build_trial
-from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, TuningMode, link_result, open_and_save, restart_ray, conv_inputpath_to_realabspath, default_pristine_pgdata_snapshot_path, default_workload_path, default_embedder_path, default_benchmark_config_path, default_benchbase_config_path, WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER, DEFAULT_SYSKNOBS_PATH, default_pgbin_path, workload_name_fn, default_pgdata_parent_dpath, default_hpoed_agent_params_fname
+from misc.utils import DEFAULT_BOOT_CONFIG_FPATH, DEFAULT_WORKLOAD_TIMEOUT, DBGymConfig, TuningMode, link_result, open_and_save, restart_ray, conv_inputpath_to_realabspath, default_pristine_dbdata_snapshot_path, default_workload_path, default_embedder_path, default_benchmark_config_path, default_benchbase_config_path, WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER, DEFAULT_SYSKNOBS_PATH, default_pgbin_path, workload_name_fn, default_dbdata_parent_dpath, default_hpoed_agent_params_fname, is_ssd
 
 
 METRIC_NAME = "Best Metric"
 
 
 class AgentHPOArgs:
-    def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, tune_duration_during_hpo, workload_timeout, query_timeout, enable_boot_during_hpo, boot_config_fpath_during_hpo, build_space_good_for_boot):
+    def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_dbdata_snapshot_path, dbdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, tune_duration_during_hpo, workload_timeout, query_timeout, enable_boot_during_hpo, boot_config_fpath_during_hpo, build_space_good_for_boot):
         self.benchmark_name = benchmark_name
         self.workload_name = workload_name
         self.embedder_path = embedder_path
         self.benchmark_config_path = benchmark_config_path
         self.benchbase_config_path = benchbase_config_path
         self.sysknobs_path = sysknobs_path
-        self.pristine_pgdata_snapshot_path = pristine_pgdata_snapshot_path
-        self.pgdata_parent_dpath = pgdata_parent_dpath
+        self.pristine_dbdata_snapshot_path = pristine_dbdata_snapshot_path
+        self.dbdata_parent_dpath = dbdata_parent_dpath
         self.pgbin_path = pgbin_path
         self.workload_path = workload_path
         self.seed = seed
@@ -91,28 +90,22 @@ def __init__(self, benchmark_name, workload_name, embedder_path, benchmark_confi
     help=f"The path to the file configuring the space of system knobs the tuner can tune.",
 )
 @click.option(
-    "--pristine-pgdata-snapshot-path",
+    "--pristine-dbdata-snapshot-path",
     default=None,
     type=Path,
-    help=f"The path to the .tgz snapshot of the pgdata directory to use as a starting point for tuning. The default is {default_pristine_pgdata_snapshot_path(WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER)}.",
+    help=f"The path to the .tgz snapshot of the dbdata directory to use as a starting point for tuning. The default is {default_pristine_dbdata_snapshot_path(WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER)}.",
 )
 @click.option(
-    "--pristine-pgdata-snapshot-path",
-    default=None,
-    type=Path,
-    help=f"The path to the .tgz snapshot of the pgdata directory to use as a starting point for tuning. The default is {default_pristine_pgdata_snapshot_path(WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER)}.",
-)
-@click.option(
-    "--intended-pgdata-hardware",
+    "--intended-dbdata-hardware",
     type=click.Choice(["hdd", "ssd"]),
     default="hdd",
-    help=f"The intended hardware pgdata should be on. Used as a sanity check for --pgdata-parent-dpath.",
+    help=f"The intended hardware dbdata should be on. Used as a sanity check for --dbdata-parent-dpath.",
 )
 @click.option(
-    "--pgdata-parent-dpath",
+    "--dbdata-parent-dpath",
     default=None,
     type=Path,
-    help=f"The path to the parent directory of the pgdata which will be actively tuned. The default is {default_pgdata_parent_dpath(WORKSPACE_PATH_PLACEHOLDER)}.",
+    help=f"The path to the parent directory of the dbdata which will be actively tuned. The default is {default_dbdata_parent_dpath(WORKSPACE_PATH_PLACEHOLDER)}.",
 )
 @click.option(
     "--pgbin-path",
@@ -199,9 +192,9 @@ def hpo(
     benchmark_config_path,
     benchbase_config_path,
     sysknobs_path,
-    pristine_pgdata_snapshot_path,
-    intended_pgdata_hardware,
-    pgdata_parent_dpath,
+    pristine_dbdata_snapshot_path,
+    intended_dbdata_hardware,
+    dbdata_parent_dpath,
     pgbin_path,
     workload_path,
     seed,
@@ -223,10 +216,10 @@ def hpo(
         benchmark_config_path = default_benchmark_config_path(benchmark_name)
     if benchbase_config_path == None:
         benchbase_config_path = default_benchbase_config_path(benchmark_name)
-    if pristine_pgdata_snapshot_path == None:
-        pristine_pgdata_snapshot_path = default_pristine_pgdata_snapshot_path(dbgym_cfg.dbgym_workspace_path, benchmark_name, scale_factor)
-    if pgdata_parent_dpath == None:
-        pgdata_parent_dpath = default_pgdata_parent_dpath(dbgym_cfg.dbgym_workspace_path)
+    if pristine_dbdata_snapshot_path == None:
+        pristine_dbdata_snapshot_path = default_pristine_dbdata_snapshot_path(dbgym_cfg.dbgym_workspace_path, benchmark_name, scale_factor)
+    if dbdata_parent_dpath == None:
+        dbdata_parent_dpath = default_dbdata_parent_dpath(dbgym_cfg.dbgym_workspace_path)
     if pgbin_path == None:
         pgbin_path = default_pgbin_path(dbgym_cfg.dbgym_workspace_path)
     if workload_path == None:
@@ -239,22 +232,22 @@ def hpo(
     benchmark_config_path = conv_inputpath_to_realabspath(dbgym_cfg, benchmark_config_path)
     benchbase_config_path = conv_inputpath_to_realabspath(dbgym_cfg, benchbase_config_path)
     sysknobs_path = conv_inputpath_to_realabspath(dbgym_cfg, sysknobs_path)
-    pristine_pgdata_snapshot_path = conv_inputpath_to_realabspath(dbgym_cfg, pristine_pgdata_snapshot_path)
-    pgdata_parent_dpath = conv_inputpath_to_realabspath(dbgym_cfg, pgdata_parent_dpath)
+    pristine_dbdata_snapshot_path = conv_inputpath_to_realabspath(dbgym_cfg, pristine_dbdata_snapshot_path)
+    dbdata_parent_dpath = conv_inputpath_to_realabspath(dbgym_cfg, dbdata_parent_dpath)
     pgbin_path = conv_inputpath_to_realabspath(dbgym_cfg, pgbin_path)
     workload_path = conv_inputpath_to_realabspath(dbgym_cfg, workload_path)
     boot_config_fpath_during_hpo = conv_inputpath_to_realabspath(dbgym_cfg, boot_config_fpath_during_hpo)
 
     # Check assertions on args
-    if intended_pgdata_hardware == "hdd":
-        assert not ssd_checker.is_ssd(pgdata_parent_dpath), f"Intended hardware is HDD but pgdata_parent_dpath ({pgdata_parent_dpath}) is an SSD"
-    elif intended_pgdata_hardware == "ssd":
-        assert ssd_checker.is_ssd(pgdata_parent_dpath), f"Intended hardware is SSD but pgdata_parent_dpath ({pgdata_parent_dpath}) is an HDD"
+    if intended_dbdata_hardware == "hdd":
+        assert not is_ssd(dbdata_parent_dpath), f"Intended hardware is HDD but dbdata_parent_dpath ({dbdata_parent_dpath}) is an SSD"
+    elif intended_dbdata_hardware == "ssd":
+        assert is_ssd(dbdata_parent_dpath), f"Intended hardware is SSD but dbdata_parent_dpath ({dbdata_parent_dpath}) is an HDD"
     else:
         assert False
 
     # Create args object
-    hpo_args = AgentHPOArgs(benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, tune_duration_during_hpo, workload_timeout, query_timeout, enable_boot_during_hpo, boot_config_fpath_during_hpo, build_space_good_for_boot)
+    hpo_args = AgentHPOArgs(benchmark_name, workload_name, embedder_path, benchmark_config_path, benchbase_config_path, sysknobs_path, pristine_dbdata_snapshot_path, dbdata_parent_dpath, pgbin_path, workload_path, seed, agent, max_concurrent, num_samples, tune_duration_during_hpo, workload_timeout, query_timeout, enable_boot_during_hpo, boot_config_fpath_during_hpo, build_space_good_for_boot)
     _tune_hpo(dbgym_cfg, hpo_args)
 
 
@@ -607,8 +600,8 @@ def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None:
         hpo_args.workload_path,
         embedder_path,
         pgconn_info={
-            "pristine_pgdata_snapshot_path": hpo_args.pristine_pgdata_snapshot_path,
-            "pgdata_parent_dpath": hpo_args.pgdata_parent_dpath,
+            "pristine_dbdata_snapshot_path": hpo_args.pristine_dbdata_snapshot_path,
+            "dbdata_parent_dpath": hpo_args.dbdata_parent_dpath,
             "pgbin_path": hpo_args.pgbin_path,
         },
         benchbase_config=benchbase_config,
diff --git a/tune/protox/embedding/datagen.py b/tune/protox/embedding/datagen.py
index 940a3dfd..3e4889c8 100644
--- a/tune/protox/embedding/datagen.py
+++ b/tune/protox/embedding/datagen.py
@@ -13,7 +13,6 @@
 import yaml
 from sklearn.preprocessing import quantile_transform
 import shutil
-import ssd_checker
 
 from misc.utils import (
     BENCHMARK_NAME_PLACEHOLDER,
@@ -24,14 +23,15 @@
     conv_inputpath_to_realabspath,
     default_benchmark_config_path,
     default_workload_path,
-    default_pristine_pgdata_snapshot_path,
+    default_pristine_dbdata_snapshot_path,
     default_pgbin_path,
     traindata_fname,
     link_result,
     open_and_save,
     save_file,
     workload_name_fn,
-    default_pgdata_parent_dpath,
+    default_dbdata_parent_dpath,
+    is_ssd,
 )
 from tune.protox.embedding.loss import COST_COLUMNS
 from tune.protox.env.space.primitive_space.index_space import IndexSpace
@@ -69,22 +69,22 @@
 @click.option("--pgbin-path", type=Path, default=None, help=f"The path to the bin containing Postgres executables. The default is {default_pgbin_path(WORKSPACE_PATH_PLACEHOLDER)}.")
 # TODO(phw2): need to run pgtune before gathering data
 @click.option(
-    "--pristine-pgdata-snapshot-path",
+    "--pristine-dbdata-snapshot-path",
     default=None,
     type=Path,
-    help=f"The path to the .tgz snapshot of the pgdata directory to build an embedding space over. The default is {default_pristine_pgdata_snapshot_path(WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER)}.",
+    help=f"The path to the .tgz snapshot of the dbdata directory to build an embedding space over. The default is {default_pristine_dbdata_snapshot_path(WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER)}.",
 )
 @click.option(
-    "--intended-pgdata-hardware",
+    "--intended-dbdata-hardware",
     type=click.Choice(["hdd", "ssd"]),
     default="hdd",
-    help=f"The intended hardware pgdata should be on. Used as a sanity check for --pgdata-parent-dpath.",
+    help=f"The intended hardware dbdata should be on. Used as a sanity check for --dbdata-parent-dpath.",
 )
 @click.option(
-    "--pgdata-parent-dpath",
+    "--dbdata-parent-dpath",
     default=None,
     type=Path,
-    help=f"The path to the parent directory of the pgdata which will be actively tuned. The default is {default_pristine_pgdata_snapshot_path(WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER)}.",
+    help=f"The path to the parent directory of the dbdata which will be actively tuned. The default is {default_pristine_dbdata_snapshot_path(WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER)}.",
 )
 @click.option(
     "--benchmark-config-path",
@@ -154,9 +154,9 @@ def datagen(
     query_subset,
     scale_factor,
     pgbin_path,
-    pristine_pgdata_snapshot_path,
-    intended_pgdata_hardware,
-    pgdata_parent_dpath,
+    pristine_dbdata_snapshot_path,
+    intended_dbdata_hardware,
+    dbdata_parent_dpath,
     benchmark_config_path,
     workload_path,
     seed,
@@ -191,12 +191,12 @@ def datagen(
         )
     if pgbin_path == None:
         pgbin_path = default_pgbin_path(dbgym_cfg.dbgym_workspace_path)
-    if pristine_pgdata_snapshot_path == None:
-        pristine_pgdata_snapshot_path = default_pristine_pgdata_snapshot_path(
+    if pristine_dbdata_snapshot_path == None:
+        pristine_dbdata_snapshot_path = default_pristine_dbdata_snapshot_path(
             dbgym_cfg.dbgym_workspace_path, benchmark_name, scale_factor
         )
-    if pgdata_parent_dpath == None:
-        pgdata_parent_dpath = default_pgdata_parent_dpath(dbgym_cfg.dbgym_workspace_path)
+    if dbdata_parent_dpath == None:
+        dbdata_parent_dpath = default_dbdata_parent_dpath(dbgym_cfg.dbgym_workspace_path)
     if max_concurrent == None:
         max_concurrent = os.cpu_count()
     if seed == None:
@@ -206,14 +206,14 @@ def datagen(
     workload_path = conv_inputpath_to_realabspath(dbgym_cfg, workload_path)
     benchmark_config_path = conv_inputpath_to_realabspath(dbgym_cfg, benchmark_config_path)
     pgbin_path = conv_inputpath_to_realabspath(dbgym_cfg, pgbin_path)
-    pristine_pgdata_snapshot_path = conv_inputpath_to_realabspath(dbgym_cfg, pristine_pgdata_snapshot_path)
-    pgdata_parent_dpath = conv_inputpath_to_realabspath(dbgym_cfg, pgdata_parent_dpath)
+    pristine_dbdata_snapshot_path = conv_inputpath_to_realabspath(dbgym_cfg, pristine_dbdata_snapshot_path)
+    dbdata_parent_dpath = conv_inputpath_to_realabspath(dbgym_cfg, dbdata_parent_dpath)
 
     # Check assertions on args
-    if intended_pgdata_hardware == "hdd":
-        assert not ssd_checker.is_ssd(pgdata_parent_dpath), f"Intended hardware is HDD but pgdata_parent_dpath ({pgdata_parent_dpath}) is an SSD"
-    elif intended_pgdata_hardware == "ssd":
-        assert ssd_checker.is_ssd(pgdata_parent_dpath), f"Intended hardware is SSD but pgdata_parent_dpath ({pgdata_parent_dpath}) is an HDD"
+    if intended_dbdata_hardware == "hdd":
+        assert not is_ssd(dbdata_parent_dpath), f"Intended hardware is HDD but dbdata_parent_dpath ({dbdata_parent_dpath}) is an SSD"
+    elif intended_dbdata_hardware == "ssd":
+        assert is_ssd(dbdata_parent_dpath), f"Intended hardware is SSD but dbdata_parent_dpath ({dbdata_parent_dpath}) is an HDD"
     else:
         assert False
 
@@ -238,7 +238,7 @@ def datagen(
     # Group args together to reduce the # of parameters we pass into functions
     # I chose to group them into separate objects instead because it felt hacky to pass a giant args object into every function
     generic_args = EmbeddingDatagenGenericArgs(
-        benchmark_name, workload_name, scale_factor, benchmark_config_path, seed, workload_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath
+        benchmark_name, workload_name, scale_factor, benchmark_config_path, seed, workload_path, pristine_dbdata_snapshot_path, dbdata_parent_dpath
     )
     dir_gen_args = EmbeddingDirGenArgs(
         leading_col_tbls,
@@ -252,31 +252,31 @@ def datagen(
 
     # run all steps
     start_time = time.time()
-    pgdata_dpath = untar_snapshot(dbgym_cfg, generic_args.pristine_pgdata_snapshot_path, generic_args.pgdata_parent_dpath)
+    dbdata_dpath = untar_snapshot(dbgym_cfg, generic_args.pristine_dbdata_snapshot_path, generic_args.dbdata_parent_dpath)
     pgbin_path = default_pgbin_path(dbgym_cfg.dbgym_workspace_path)
-    start_postgres(dbgym_cfg, pgbin_path, pgdata_dpath)
+    start_postgres(dbgym_cfg, pgbin_path, dbdata_dpath)
     _gen_traindata_dir(dbgym_cfg, generic_args, dir_gen_args)
     _combine_traindata_dir_into_parquet(dbgym_cfg, generic_args, file_gen_args)
     datagen_duration = time.time() - start_time
     with open(f"{dbgym_cfg.dbgym_this_run_path}/datagen_time.txt", "w") as f:
         f.write(f"{datagen_duration}")
-    stop_postgres(dbgym_cfg, pgbin_path, pgdata_dpath)
+    stop_postgres(dbgym_cfg, pgbin_path, dbdata_dpath)
 
 
-def untar_snapshot(dbgym_cfg: DBGymConfig, pgdata_snapshot_fpath: Path, pgdata_parent_dpath: Path) -> Path:
+def untar_snapshot(dbgym_cfg: DBGymConfig, dbdata_snapshot_fpath: Path, dbdata_parent_dpath: Path) -> Path:
     # It should be an absolute path and it should exist
-    assert pgdata_snapshot_fpath.is_absolute() and pgdata_snapshot_fpath.exists(), f"untar_snapshot(): pgdata_snapshot_fpath ({pgdata_snapshot_fpath}) either doesn't exist or is not absolute"
+    assert dbdata_snapshot_fpath.is_absolute() and dbdata_snapshot_fpath.exists(), f"untar_snapshot(): dbdata_snapshot_fpath ({dbdata_snapshot_fpath}) either doesn't exist or is not absolute"
     # It may be a symlink so we need to resolve them first
-    pgdata_snapshot_real_fpath = pgdata_snapshot_fpath.resolve()
-    save_file(dbgym_cfg, pgdata_snapshot_real_fpath)
-    pgdata_dpath = pgdata_parent_dpath / "pgdata"
-    # Make the parent dir and the pgdata dir. Note how we require that pgdata_dpath does not exist while it's ok if the parent does.
-    pgdata_parent_dpath.mkdir(parents=True, exist_ok=True)
-    if pgdata_dpath.exists():
-        shutil.rmtree(pgdata_dpath)
-    pgdata_dpath.mkdir(parents=False, exist_ok=False)
-    subprocess_run(f"tar -xzf {pgdata_snapshot_real_fpath} -C {pgdata_dpath}")
-    return pgdata_dpath
+    dbdata_snapshot_real_fpath = dbdata_snapshot_fpath.resolve()
+    save_file(dbgym_cfg, dbdata_snapshot_real_fpath)
+    dbdata_dpath = dbdata_parent_dpath / "dbdata"
+    # Make the parent dir and the dbdata dir. Note how we require that dbdata_dpath does not exist while it's ok if the parent does.
+    dbdata_parent_dpath.mkdir(parents=True, exist_ok=True)
+    if dbdata_dpath.exists():
+        shutil.rmtree(dbdata_dpath)
+    dbdata_dpath.mkdir(parents=False, exist_ok=False)
+    subprocess_run(f"tar -xzf {dbdata_snapshot_real_fpath} -C {dbdata_dpath}")
+    return dbdata_dpath
 
 
 class EmbeddingDatagenGenericArgs:
@@ -286,15 +286,15 @@ class EmbeddingDatagenGenericArgs:
     I wanted to make multiple classes instead of just one to conceptually separate the different args
     """
 
-    def __init__(self, benchmark_name, workload_name, scale_factor, benchmark_config_path, seed, workload_path, pristine_pgdata_snapshot_path, pgdata_parent_dpath):
+    def __init__(self, benchmark_name, workload_name, scale_factor, benchmark_config_path, seed, workload_path, pristine_dbdata_snapshot_path, dbdata_parent_dpath):
         self.benchmark_name = benchmark_name
         self.workload_name = workload_name
         self.scale_factor = scale_factor
         self.benchmark_config_path = benchmark_config_path
         self.seed = seed
         self.workload_path = workload_path
-        self.pristine_pgdata_snapshot_path = pristine_pgdata_snapshot_path
-        self.pgdata_parent_dpath = pgdata_parent_dpath
+        self.pristine_dbdata_snapshot_path = pristine_dbdata_snapshot_path
+        self.dbdata_parent_dpath = dbdata_parent_dpath
 
 
 class EmbeddingDirGenArgs:
diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py
index 62fa92b8..92236519 100644
--- a/tune/protox/env/pg_env.py
+++ b/tune/protox/env/pg_env.py
@@ -220,8 +220,8 @@ def step_before_execution(self, action: HolonAction) -> Tuple[bool, EnvInfoDict]
         # Get the prior state.
         prior_state = copy.deepcopy(self.state_container)
         # Save the old configuration file.
-        old_conf_path = f"{self.pg_conn.pgdata_dpath}/postgresql.auto.conf"
-        conf_path = f"{self.pg_conn.pgdata_dpath}/postgresql.auto.old"
+        old_conf_path = f"{self.pg_conn.dbdata_dpath}/postgresql.auto.conf"
+        conf_path = f"{self.pg_conn.dbdata_dpath}/postgresql.auto.old"
         local["cp"][old_conf_path, conf_path].run()
 
         # Figure out what we have to change to get to the new configuration.
@@ -421,8 +421,8 @@ def attempt_checkpoint(conn_str: str) -> None:
     def close(self) -> None:
         self.pg_conn.shutdown_postgres()
         # This file may not be in in [workspace]/tmp/, so it's important to delete it
-        local["rm"]["-rf", self.pg_conn.pgdata_dpath].run()
+        local["rm"]["-rf", self.pg_conn.dbdata_dpath].run()
         # Even though these files get deleted because [workspace]/tmp/ gets deleted,
         #   we'll just delete them here anyways because why not
-        local["rm"]["-f", self.pg_conn.checkpoint_pgdata_snapshot_fpath].run()
-        local["rm"]["-f", f"{self.pg_conn.checkpoint_pgdata_snapshot_fpath}.tmp"].run()
+        local["rm"]["-f", self.pg_conn.checkpoint_dbdata_snapshot_fpath].run()
+        local["rm"]["-f", f"{self.pg_conn.checkpoint_dbdata_snapshot_fpath}.tmp"].run()
diff --git a/tune/protox/env/util/pg_conn.py b/tune/protox/env/util/pg_conn.py
index 3a4f0207..69b2c701 100644
--- a/tune/protox/env/util/pg_conn.py
+++ b/tune/protox/env/util/pg_conn.py
@@ -2,7 +2,7 @@
 At a high level, this file's goal is to provide helpers to manage a Postgres instance during
     agent tuning.
 On the other hand, the goal of dbms.postgres.cli is to (1) install+build postgres and (2)
-    create pgdata.
+    create dbdata.
 util.pg provides helpers used by *both* of the above files (as well as other files).
 """
 import os
@@ -28,8 +28,8 @@ def __init__(
         self,
         dbgym_cfg: DBGymConfig,
         pgport: int,
-        pristine_pgdata_snapshot_fpath: Path,
-        pgdata_parent_dpath: Path,
+        pristine_dbdata_snapshot_fpath: Path,
+        dbdata_parent_dpath: Path,
         pgbin_path: Union[str, Path],
         connect_timeout: int,
         enable_boot: bool,
@@ -46,20 +46,20 @@ def __init__(
         self.log_step = 0
         self.logger = logger
 
-        # All the paths related to pgdata
-        # pristine_pgdata_snapshot_fpath is the .tgz snapshot that represents the starting state
+        # All the paths related to dbdata
+        # pristine_dbdata_snapshot_fpath is the .tgz snapshot that represents the starting state
         #   of the database (with the default configuration). It is generated by a call to
         #   `python tune.py dbms postgres ...` and should not be overwritten.
-        self.pristine_pgdata_snapshot_fpath = pristine_pgdata_snapshot_fpath
-        # checkpoint_pgdata_snapshot_fpath is the .tgz snapshot that represents the current
+        self.pristine_dbdata_snapshot_fpath = pristine_dbdata_snapshot_fpath
+        # checkpoint_dbdata_snapshot_fpath is the .tgz snapshot that represents the current
         #   state of the database as it is being tuned. It is generated while tuning and is
         #   discarded once tuning is completed.
-        self.checkpoint_pgdata_snapshot_fpath = dbgym_cfg.dbgym_tmp_path / "checkpoint_pgdata.tgz"
-        # pgdata_parent_dpath is the parent directory of the pgdata that is *actively being tuned*.
-        #   Setting this lets us control the hardware device pgdata is built on (e.g. HDD vs. SSD).
-        self.pgdata_parent_dpath = pgdata_parent_dpath
-        # pgdata_dpath is the pgdata that is *actively being tuned*
-        self.pgdata_dpath = self.pgdata_parent_dpath / f"pgdata{self.pgport}"
+        self.checkpoint_dbdata_snapshot_fpath = dbgym_cfg.dbgym_tmp_path / "checkpoint_dbdata.tgz"
+        # dbdata_parent_dpath is the parent directory of the dbdata that is *actively being tuned*.
+        #   Setting this lets us control the hardware device dbdata is built on (e.g. HDD vs. SSD).
+        self.dbdata_parent_dpath = dbdata_parent_dpath
+        # dbdata_dpath is the dbdata that is *actively being tuned*
+        self.dbdata_dpath = self.dbdata_parent_dpath / f"dbdata{self.pgport}"
 
         self._conn: Optional[psycopg.Connection[Any]] = None
 
@@ -92,13 +92,13 @@ def move_log(self) -> None:
     def shutdown_postgres(self) -> None:
         """Shuts down postgres."""
         self.disconnect()
-        if not Path(self.pgdata_dpath).exists():
+        if not Path(self.dbdata_dpath).exists():
             return
 
         while True:
             self.logger.get_logger(__name__).debug("Shutting down postgres...")
             _, stdout, stderr = local[f"{self.pgbin_path}/pg_ctl"][
-                "stop", "--wait", "-t", "180", "-D", self.pgdata_dpath
+                "stop", "--wait", "-t", "180", "-D", self.dbdata_dpath
             ].run(retcode=None)
             time.sleep(1)
             self.logger.get_logger(__name__).debug(
@@ -115,7 +115,7 @@ def shutdown_postgres(self) -> None:
                 DBGYM_POSTGRES_DBNAME,
             ].run(retcode=None)
 
-            exists = (Path(self.pgdata_dpath) / "postmaster.pid").exists()
+            exists = (Path(self.dbdata_dpath) / "postmaster.pid").exists()
             if not exists and retcode != 0:
                 break
 
@@ -127,7 +127,7 @@ def start_with_changes(
         save_checkpoint: bool = False,
     ) -> bool:
         """
-        This function assumes that some snapshot has already been untarred into self.pgdata_dpath
+        This function assumes that some snapshot has already been untarred into self.dbdata_dpath
         """
         # Install the new configuration changes.
         if conf_changes is not None:
@@ -135,11 +135,11 @@ def start_with_changes(
                 # This way of doing it works for both single or multiple libraries. An example of a way
                 # that *doesn't* work is `f"shared_preload_libraries='"{SHARED_PRELOAD_LIBRARIES}"'"`
                 conf_changes.append(f"shared_preload_libraries='{SHARED_PRELOAD_LIBRARIES}'")
-            pgdata_auto_conf_path = self.pgdata_dpath / "postgresql.auto.conf"
-            with open(pgdata_auto_conf_path, "w") as f:
+            dbdata_auto_conf_path = self.dbdata_dpath / "postgresql.auto.conf"
+            with open(dbdata_auto_conf_path, "w") as f:
                 f.write("\n".join(conf_changes))
             save_auto_conf_path = self.dbgym_cfg.cur_task_runs_data_path(".", mkdir=True) / "postgresql.auto.conf"         
-            local["cp"][pgdata_auto_conf_path, save_auto_conf_path].run()
+            local["cp"][dbdata_auto_conf_path, save_auto_conf_path].run()
             link_result(self.dbgym_cfg, save_auto_conf_path)
 
         # Start postgres instance.
@@ -151,14 +151,14 @@ def start_with_changes(
                 "cf",
                 # We append .tmp so that if we fail in the *middle* of running tar, we
                 #   still have the previous checkpoint available to us
-                f"{self.checkpoint_pgdata_snapshot_fpath}.tmp",
+                f"{self.checkpoint_dbdata_snapshot_fpath}.tmp",
                 "-C",
-                parent_dir(self.pgdata_dpath),
-                self.pgdata_dpath,
+                parent_dir(self.dbdata_dpath),
+                self.dbdata_dpath,
             ].run()
 
         # Make sure the PID lock file doesn't exist.
-        pid_lock = Path(f"{self.pgdata_dpath}/postmaster.pid")
+        pid_lock = Path(f"{self.dbdata_dpath}/postmaster.pid")
         assert not pid_lock.exists()
 
         if dump_page_cache:
@@ -170,7 +170,7 @@ def start_with_changes(
             # Try starting up.
             retcode, stdout, stderr = local[f"{self.pgbin_path}/pg_ctl"][
                 "-D",
-                self.pgdata_dpath,
+                self.dbdata_dpath,
                 "--wait",
                 "-t",
                 "180",
@@ -241,7 +241,7 @@ def start_with_changes(
 
         # Move the temporary over since we now know the temporary can load.
         if save_checkpoint:
-            shutil.move(f"{self.pgdata_dpath}.tgz.tmp", f"{self.pgdata_dpath}.tgz")
+            shutil.move(f"{self.dbdata_dpath}.tgz.tmp", f"{self.dbdata_dpath}.tgz")
 
         return True
 
@@ -258,20 +258,20 @@ def _set_up_boot(self, intelligent_cache: bool, early_stop: bool, seq_sample: bo
         # If any of these commands fail, they'll throw a Python exception
         # Thus, if none of them throw an exception, we know they passed
         self.logger.get_logger(__name__).debug("Setting up boot")
-        self.conn().execute("DROP EXTENSION IF EXISTS bytejack")
-        self.conn().execute("CREATE EXTENSION IF NOT EXISTS bytejack")
-        self.conn().execute("SELECT bytejack_connect()")
-        self.conn().execute("SELECT bytejack_cache_clear()")
-        self.conn().execute("SET bytejack.enable=true")
-        self.conn().execute("SET bytejack.intercept_explain_analyze=true")
-        self.conn().execute(f"SET bytejack.intelligent_cache={intelligent_cache}")
-        self.conn().execute(f"SET bytejack.early_stop={early_stop}")
-        self.conn().execute(f"SET bytejack.seq_sample={seq_sample}")
-        self.conn().execute(f"SET bytejack.seq_sample_pct={seq_sample_pct}")
-        self.conn().execute(f"SET bytejack.seq_sample_seed={seq_sample_seed}")
-        self.conn().execute(f"SET bytejack.mu_hyp_opt={mu_hyp_opt}")
-        self.conn().execute(f"SET bytejack.mu_hyp_time={mu_hyp_time}")
-        self.conn().execute(f"SET bytejack.mu_hyp_stdev={mu_hyp_stdev}")
+        self.conn().execute("DROP EXTENSION IF EXISTS boot")
+        self.conn().execute("CREATE EXTENSION IF NOT EXISTS boot")
+        self.conn().execute("SELECT boot_connect()")
+        self.conn().execute("SELECT boot_cache_clear()")
+        self.conn().execute("SET boot.enable=true")
+        self.conn().execute("SET boot.intercept_explain_analyze=true")
+        self.conn().execute(f"SET boot.intelligent_cache={intelligent_cache}")
+        self.conn().execute(f"SET boot.early_stop={early_stop}")
+        self.conn().execute(f"SET boot.seq_sample={seq_sample}")
+        self.conn().execute(f"SET boot.seq_sample_pct={seq_sample_pct}")
+        self.conn().execute(f"SET boot.seq_sample_seed={seq_sample_seed}")
+        self.conn().execute(f"SET boot.mu_hyp_opt={mu_hyp_opt}")
+        self.conn().execute(f"SET boot.mu_hyp_time={mu_hyp_time}")
+        self.conn().execute(f"SET boot.mu_hyp_stdev={mu_hyp_stdev}")
         self.logger.get_logger(__name__).debug("Set up boot")
 
     @time_record("psql")
@@ -332,29 +332,29 @@ def cancel_fn(conn_str: str) -> None:
         return 0, None
     
     def restore_pristine_snapshot(self):
-        self._restore_snapshot(self.pristine_pgdata_snapshot_fpath)
+        self._restore_snapshot(self.pristine_dbdata_snapshot_fpath)
 
     def restore_checkpointed_snapshot(self):
-        self._restore_snapshot(self.checkpoint_pgdata_snapshot_fpath)
+        self._restore_snapshot(self.checkpoint_dbdata_snapshot_fpath)
 
     @time_record("restore")
     def _restore_snapshot(
-        self, pgdata_snapshot_path: Path,
+        self, dbdata_snapshot_path: Path,
     ) -> bool:
         self.shutdown_postgres()
 
-        local["rm"]["-rf", self.pgdata_dpath].run()
-        local["mkdir"]["-m", "0700", "-p", self.pgdata_dpath].run()
+        local["rm"]["-rf", self.dbdata_dpath].run()
+        local["mkdir"]["-m", "0700", "-p", self.dbdata_dpath].run()
 
-        # Strip the "pgdata" so we can implant directly into the target pgdata_dpath.
-        assert pgdata_snapshot_path.exists()
+        # Strip the "dbdata" so we can implant directly into the target dbdata_dpath.
+        assert dbdata_snapshot_path.exists()
         local["tar"][
-            "xf", pgdata_snapshot_path, "-C", self.pgdata_dpath, "--strip-components", "1"
+            "xf", dbdata_snapshot_path, "-C", self.dbdata_dpath, "--strip-components", "1"
         ].run()
         # Imprint the required port.
         (
             (local["echo"][f"port={self.pgport}"])
-            >> f"{self.pgdata_dpath}/postgresql.conf"
+            >> f"{self.dbdata_dpath}/postgresql.conf"
         )()
 
         return self.start_with_changes(conf_changes=None)
diff --git a/util/pg.py b/util/pg.py
index 469f5660..ee45772d 100644
--- a/util/pg.py
+++ b/util/pg.py
@@ -12,7 +12,7 @@
 DBGYM_POSTGRES_DBNAME = "dbgym"
 DEFAULT_POSTGRES_DBNAME = "postgres"
 DEFAULT_POSTGRES_PORT = 5432
-SHARED_PRELOAD_LIBRARIES = "bytejack,pg_hint_plan,pg_prewarm"
+SHARED_PRELOAD_LIBRARIES = "boot,pg_hint_plan,pg_prewarm"
 
 
 def conn_execute(conn: Connection, sql: str) -> CursorResult: