From 72e9808a87adc1fe3adf141d7075b15c1f1ccc48 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Wed, 30 Oct 2024 18:14:44 -0400 Subject: [PATCH] Postgres start/stop demo (#45) **Summary**: Basic demo letting you start and stop Postgres. **Demo**: (Yes, this is a demo of the demo). The demo uses `pgrep postgres` to show that no Postgres instance is running. Then the user is able to start/stop Postgres on the web UI. The results of that are verified using `pgrep postgres`. https://github.com/user-attachments/assets/e407df5d-62b6-4c14-90c8-7602d2d8dce1 **Details**: * Uses `streamlit` for the frontend. * Uses the recently factored out `PostgresConn` in the backend. * Right now is only deployable locally. In the future we'll want to deploy it on a CMU-DB machine so people can play around with it. * It currently relies on the same workspace used in development. In the future, we may want to give the demo its own workspace. The setup process of this workspace would then require more scripting. --- .../workflows/{tests_ci.yaml => tests.yaml} | 11 ++-- dependencies/requirements.txt | 1 + experiments/load_per_machine_envvars.sh | 5 ++ scripts/pat_test.sh | 4 +- scripts/run_demo.sh | 2 + task.py | 5 +- tune/demo/__init__.py | 0 tune/demo/main.py | 64 +++++++++++++++++++ tune/env/integtest_pg_conn.py | 16 +++-- util/workspace.py | 10 +++ 10 files changed, 102 insertions(+), 16 deletions(-) rename .github/workflows/{tests_ci.yaml => tests.yaml} (90%) create mode 100755 scripts/run_demo.sh create mode 100644 tune/demo/__init__.py create mode 100644 tune/demo/main.py diff --git a/.github/workflows/tests_ci.yaml b/.github/workflows/tests.yaml similarity index 90% rename from .github/workflows/tests_ci.yaml rename to .github/workflows/tests.yaml index 60f46015..25593ea6 100644 --- a/.github/workflows/tests_ci.yaml +++ b/.github/workflows/tests.yaml @@ -1,4 +1,4 @@ -name: Unit and Integration Tests +name: Static, Unit, Integration, and End-to-End Tests on: push: {} @@ -6,7 +6,7 @@ on: branches: [main] jobs: - ci: + tests: # The code for the self-hosted runners is at https://github.com/wangpatrick57/dbgym-runners. runs-on: self-hosted @@ -50,11 +50,12 @@ jobs: - name: Run integration tests # Integration tests do require external systems to be running (most commonly a database instance). # Unlike end-to-end tests though, they test a specific module in a detailed manner, much like a unit test does. - # - # We set `INTENDED_DBDATA_HARDWARE` so that it's seen when `integtest_pg_conn.py` executes `./tune/env/set_up_env_integtests.sh`. + env: + # We set `INTENDED_DBDATA_HARDWARE` so that it's seen when `integtest_pg_conn.py` executes `./tune/env/set_up_env_integtests.sh`. + INTENDED_DBDATA_HARDWARE: ssd run: | . "$HOME/.cargo/env" - export INTENDED_DBDATA_HARDWARE=ssd + export ./scripts/run_integration_tests.sh - name: Run end-to-end tests diff --git a/dependencies/requirements.txt b/dependencies/requirements.txt index 216c159a..6c0cb4b7 100644 --- a/dependencies/requirements.txt +++ b/dependencies/requirements.txt @@ -134,3 +134,4 @@ virtualenv==20.25.0 Werkzeug==3.0.1 wrapt==1.14.1 zipp==3.17.0 +streamlit==1.39.0 diff --git a/experiments/load_per_machine_envvars.sh b/experiments/load_per_machine_envvars.sh index b9772d3c..22b220c8 100644 --- a/experiments/load_per_machine_envvars.sh +++ b/experiments/load_per_machine_envvars.sh @@ -3,8 +3,13 @@ host=$(hostname) if [ "$host" == "dev4" ]; then export DBDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/ + export INTENDED_DBDATA_HARDWARE=ssd elif [ "$host" == "dev6" ]; then export DBDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/ + export INTENDED_DBDATA_HARDWARE=ssd +elif [ "$host" == "patnuc" ]; then + export DBDATA_PARENT_DPATH=../dbgym_workspace/tmp/ + export INTENDED_DBDATA_HARDWARE=hdd else echo "Did not recognize host \"$host\"" exit 1 diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh index fc2ae203..f858c76d 100755 --- a/scripts/pat_test.sh +++ b/scripts/pat_test.sh @@ -3,11 +3,11 @@ set -euxo pipefail SCALE_FACTOR=0.01 -INTENDED_DBDATA_HARDWARE=ssd . ./experiments/load_per_machine_envvars.sh # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars) -python3 task.py tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2 +python3 task.py dbms postgres build +python3 task.py dbms postgres dbdata tpch --scale-factor $SCALE_FACTOR --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH exit 0 # benchmark diff --git a/scripts/run_demo.sh b/scripts/run_demo.sh new file mode 100755 index 00000000..1e434a7d --- /dev/null +++ b/scripts/run_demo.sh @@ -0,0 +1,2 @@ +#!/bin/bash +python -m streamlit run tune/demo/main.py \ No newline at end of file diff --git a/task.py b/task.py index 46adb114..ff30f9c3 100644 --- a/task.py +++ b/task.py @@ -17,7 +17,7 @@ from dbms.cli import dbms_group from manage.cli import manage_group from tune.cli import tune_group -from util.workspace import DBGymConfig +from util.workspace import make_standard_dbgym_cfg # TODO(phw2): Save commit, git diff, and run command. # TODO(phw2): Remove write permissions on old run_*/ dirs to enforce that they are immutable. @@ -28,8 +28,7 @@ @click.pass_context def task(ctx: click.Context) -> None: """🛢️ CMU-DB Database Gym: github.com/cmu-db/dbgym 🏋️""" - dbgym_config_path = Path(os.getenv("DBGYM_CONFIG_PATH", "dbgym_config.yaml")) - dbgym_cfg = DBGymConfig(dbgym_config_path) + dbgym_cfg = make_standard_dbgym_cfg() ctx.obj = dbgym_cfg log_dpath = dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) diff --git a/tune/demo/__init__.py b/tune/demo/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tune/demo/main.py b/tune/demo/main.py new file mode 100644 index 00000000..9520e611 --- /dev/null +++ b/tune/demo/main.py @@ -0,0 +1,64 @@ +import streamlit as st + +from tune.env.pg_conn import PostgresConn +from util.pg import DEFAULT_POSTGRES_PORT, get_is_postgres_running +from util.workspace import ( + DEFAULT_BOOT_CONFIG_FPATH, + DBGymConfig, + default_dbdata_parent_dpath, + default_pgbin_path, + default_pristine_dbdata_snapshot_path, + make_standard_dbgym_cfg, +) + + +# This ensures that DBGymConfig is only created once. Check DBGymConfig.__init__() for why we must do this. +@st.cache_resource +def make_dbgym_cfg() -> DBGymConfig: + return make_standard_dbgym_cfg() + + +class Demo: + BENCHMARK = "tpch" + SCALE_FACTOR = 0.01 + + def __init__(self) -> None: + self.dbgym_cfg = make_dbgym_cfg() + self.pristine_dbdata_snapshot_path = default_pristine_dbdata_snapshot_path( + self.dbgym_cfg.dbgym_workspace_path, Demo.BENCHMARK, Demo.SCALE_FACTOR + ) + self.dbdata_parent_dpath = default_dbdata_parent_dpath( + self.dbgym_cfg.dbgym_workspace_path + ) + self.pgbin_dpath = default_pgbin_path(self.dbgym_cfg.dbgym_workspace_path) + self.pg_conn = PostgresConn( + self.dbgym_cfg, + DEFAULT_POSTGRES_PORT, + self.pristine_dbdata_snapshot_path, + self.dbdata_parent_dpath, + self.pgbin_dpath, + False, + DEFAULT_BOOT_CONFIG_FPATH, + ) + + def main(self) -> None: + is_postgres_running = get_is_postgres_running() + + if is_postgres_running: + st.write("Postgres is running") + + if st.button("Stop Postgres"): + self.pg_conn.shutdown_postgres() + st.rerun() + else: + st.write("Postgres is not running") + + if st.button("Start Postgres"): + self.pg_conn.restore_pristine_snapshot() + self.pg_conn.start_with_changes() + st.rerun() + + +if __name__ == "__main__": + demo = Demo() + demo.main() diff --git a/tune/env/integtest_pg_conn.py b/tune/env/integtest_pg_conn.py index e4f356af..a2470571 100644 --- a/tune/env/integtest_pg_conn.py +++ b/tune/env/integtest_pg_conn.py @@ -5,7 +5,11 @@ import yaml from tune.env.pg_conn import PostgresConn -from util.pg import get_is_postgres_running, get_running_postgres_ports +from util.pg import ( + DEFAULT_POSTGRES_PORT, + get_is_postgres_running, + get_running_postgres_ports, +) from util.workspace import ( DEFAULT_BOOT_CONFIG_FPATH, DBGymConfig, @@ -17,7 +21,6 @@ ENV_INTEGTESTS_DBGYM_CONFIG_FPATH = Path("tune/env/env_integtests_dbgym_config.yaml") BENCHMARK = "tpch" SCALE_FACTOR = 0.01 -BASE_PGPORT = 5432 def get_unittest_workspace_path() -> Path: @@ -54,7 +57,7 @@ def setUp(self) -> None: def tearDown(self) -> None: self.assertFalse(get_is_postgres_running()) - def create_pg_conn(self, pgport: int = BASE_PGPORT) -> PostgresConn: + def create_pg_conn(self, pgport: int = DEFAULT_POSTGRES_PORT) -> PostgresConn: return PostgresConn( PostgresConnTests.dbgym_cfg, pgport, @@ -79,12 +82,13 @@ def test_start_on_multiple_ports(self) -> None: pg_conn0 = self.create_pg_conn() pg_conn0.restore_pristine_snapshot() pg_conn0.start_with_changes() - self.assertEqual(set(get_running_postgres_ports()), {BASE_PGPORT}) - pg_conn1 = self.create_pg_conn(BASE_PGPORT + 1) + self.assertEqual(set(get_running_postgres_ports()), {DEFAULT_POSTGRES_PORT}) + pg_conn1 = self.create_pg_conn(DEFAULT_POSTGRES_PORT + 1) pg_conn1.restore_pristine_snapshot() pg_conn1.start_with_changes() self.assertEqual( - set(get_running_postgres_ports()), {BASE_PGPORT, BASE_PGPORT + 1} + set(get_running_postgres_ports()), + {DEFAULT_POSTGRES_PORT, DEFAULT_POSTGRES_PORT + 1}, ) # Clean up diff --git a/util/workspace.py b/util/workspace.py index 7ee7c91b..f94d70ef 100644 --- a/util/workspace.py +++ b/util/workspace.py @@ -342,6 +342,16 @@ def cur_task_runs_artifacts_path(self, *dirs: str, mkdir: bool = False) -> Path: return self.cur_task_runs_path("artifacts", *dirs, mkdir=mkdir) +def make_standard_dbgym_cfg() -> DBGymConfig: + """ + The "standard" way to make a DBGymConfig using the DBGYM_CONFIG_PATH envvar and the + default path of dbgym_config.yaml. + """ + dbgym_config_path = Path(os.getenv("DBGYM_CONFIG_PATH", "dbgym_config.yaml")) + dbgym_cfg = DBGymConfig(dbgym_config_path) + return dbgym_cfg + + def conv_inputpath_to_realabspath( dbgym_cfg: DBGymConfig, inputpath: os.PathLike[str] ) -> Path: