Skip to content

Commit

Permalink
Postgres start/stop demo (#45)
Browse files Browse the repository at this point in the history
**Summary**: Basic demo letting you start and stop Postgres.

**Demo**:
(Yes, this is a demo of the demo). The demo uses `pgrep postgres` to
show that no Postgres instance is running. Then the user is able to
start/stop Postgres on the web UI. The results of that are verified
using `pgrep postgres`.


https://github.com/user-attachments/assets/e407df5d-62b6-4c14-90c8-7602d2d8dce1

**Details**:
* Uses `streamlit` for the frontend.
* Uses the recently factored out `PostgresConn` in the backend.
* Right now is only deployable locally. In the future we'll want to
deploy it on a CMU-DB machine so people can play around with it.
* It currently relies on the same workspace used in development. In the
future, we may want to give the demo its own workspace. The setup
process of this workspace would then require more scripting.
  • Loading branch information
wangpatrick57 authored Oct 30, 2024
1 parent 2faecff commit 72e9808
Show file tree
Hide file tree
Showing 10 changed files with 102 additions and 16 deletions.
11 changes: 6 additions & 5 deletions .github/workflows/tests_ci.yaml → .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
name: Unit and Integration Tests
name: Static, Unit, Integration, and End-to-End Tests

on:
push: {}
pull_request:
branches: [main]

jobs:
ci:
tests:
# The code for the self-hosted runners is at https://github.com/wangpatrick57/dbgym-runners.
runs-on: self-hosted

Expand Down Expand Up @@ -50,11 +50,12 @@ jobs:
- name: Run integration tests
# Integration tests do require external systems to be running (most commonly a database instance).
# Unlike end-to-end tests though, they test a specific module in a detailed manner, much like a unit test does.
#
# We set `INTENDED_DBDATA_HARDWARE` so that it's seen when `integtest_pg_conn.py` executes `./tune/env/set_up_env_integtests.sh`.
env:
# We set `INTENDED_DBDATA_HARDWARE` so that it's seen when `integtest_pg_conn.py` executes `./tune/env/set_up_env_integtests.sh`.
INTENDED_DBDATA_HARDWARE: ssd
run: |
. "$HOME/.cargo/env"
export INTENDED_DBDATA_HARDWARE=ssd
export
./scripts/run_integration_tests.sh
- name: Run end-to-end tests
Expand Down
1 change: 1 addition & 0 deletions dependencies/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,4 @@ virtualenv==20.25.0
Werkzeug==3.0.1
wrapt==1.14.1
zipp==3.17.0
streamlit==1.39.0
5 changes: 5 additions & 0 deletions experiments/load_per_machine_envvars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,13 @@ host=$(hostname)

if [ "$host" == "dev4" ]; then
export DBDATA_PARENT_DPATH=/mnt/nvme1n1/phw2/dbgym_tmp/
export INTENDED_DBDATA_HARDWARE=ssd
elif [ "$host" == "dev6" ]; then
export DBDATA_PARENT_DPATH=/mnt/nvme0n1/phw2/dbgym_tmp/
export INTENDED_DBDATA_HARDWARE=ssd
elif [ "$host" == "patnuc" ]; then
export DBDATA_PARENT_DPATH=../dbgym_workspace/tmp/
export INTENDED_DBDATA_HARDWARE=hdd
else
echo "Did not recognize host \"$host\""
exit 1
Expand Down
4 changes: 2 additions & 2 deletions scripts/pat_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
set -euxo pipefail

SCALE_FACTOR=0.01
INTENDED_DBDATA_HARDWARE=ssd
. ./experiments/load_per_machine_envvars.sh

# space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars)
python3 task.py tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
python3 task.py dbms postgres build
python3 task.py dbms postgres dbdata tpch --scale-factor $SCALE_FACTOR --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH
exit 0

# benchmark
Expand Down
2 changes: 2 additions & 0 deletions scripts/run_demo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
python -m streamlit run tune/demo/main.py
5 changes: 2 additions & 3 deletions task.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from dbms.cli import dbms_group
from manage.cli import manage_group
from tune.cli import tune_group
from util.workspace import DBGymConfig
from util.workspace import make_standard_dbgym_cfg

# TODO(phw2): Save commit, git diff, and run command.
# TODO(phw2): Remove write permissions on old run_*/ dirs to enforce that they are immutable.
Expand All @@ -28,8 +28,7 @@
@click.pass_context
def task(ctx: click.Context) -> None:
"""🛢️ CMU-DB Database Gym: github.com/cmu-db/dbgym 🏋️"""
dbgym_config_path = Path(os.getenv("DBGYM_CONFIG_PATH", "dbgym_config.yaml"))
dbgym_cfg = DBGymConfig(dbgym_config_path)
dbgym_cfg = make_standard_dbgym_cfg()
ctx.obj = dbgym_cfg

log_dpath = dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True)
Expand Down
Empty file added tune/demo/__init__.py
Empty file.
64 changes: 64 additions & 0 deletions tune/demo/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import streamlit as st

from tune.env.pg_conn import PostgresConn
from util.pg import DEFAULT_POSTGRES_PORT, get_is_postgres_running
from util.workspace import (
DEFAULT_BOOT_CONFIG_FPATH,
DBGymConfig,
default_dbdata_parent_dpath,
default_pgbin_path,
default_pristine_dbdata_snapshot_path,
make_standard_dbgym_cfg,
)


# This ensures that DBGymConfig is only created once. Check DBGymConfig.__init__() for why we must do this.
@st.cache_resource
def make_dbgym_cfg() -> DBGymConfig:
return make_standard_dbgym_cfg()


class Demo:
BENCHMARK = "tpch"
SCALE_FACTOR = 0.01

def __init__(self) -> None:
self.dbgym_cfg = make_dbgym_cfg()
self.pristine_dbdata_snapshot_path = default_pristine_dbdata_snapshot_path(
self.dbgym_cfg.dbgym_workspace_path, Demo.BENCHMARK, Demo.SCALE_FACTOR
)
self.dbdata_parent_dpath = default_dbdata_parent_dpath(
self.dbgym_cfg.dbgym_workspace_path
)
self.pgbin_dpath = default_pgbin_path(self.dbgym_cfg.dbgym_workspace_path)
self.pg_conn = PostgresConn(
self.dbgym_cfg,
DEFAULT_POSTGRES_PORT,
self.pristine_dbdata_snapshot_path,
self.dbdata_parent_dpath,
self.pgbin_dpath,
False,
DEFAULT_BOOT_CONFIG_FPATH,
)

def main(self) -> None:
is_postgres_running = get_is_postgres_running()

if is_postgres_running:
st.write("Postgres is running")

if st.button("Stop Postgres"):
self.pg_conn.shutdown_postgres()
st.rerun()
else:
st.write("Postgres is not running")

if st.button("Start Postgres"):
self.pg_conn.restore_pristine_snapshot()
self.pg_conn.start_with_changes()
st.rerun()


if __name__ == "__main__":
demo = Demo()
demo.main()
16 changes: 10 additions & 6 deletions tune/env/integtest_pg_conn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@
import yaml

from tune.env.pg_conn import PostgresConn
from util.pg import get_is_postgres_running, get_running_postgres_ports
from util.pg import (
DEFAULT_POSTGRES_PORT,
get_is_postgres_running,
get_running_postgres_ports,
)
from util.workspace import (
DEFAULT_BOOT_CONFIG_FPATH,
DBGymConfig,
Expand All @@ -17,7 +21,6 @@
ENV_INTEGTESTS_DBGYM_CONFIG_FPATH = Path("tune/env/env_integtests_dbgym_config.yaml")
BENCHMARK = "tpch"
SCALE_FACTOR = 0.01
BASE_PGPORT = 5432


def get_unittest_workspace_path() -> Path:
Expand Down Expand Up @@ -54,7 +57,7 @@ def setUp(self) -> None:
def tearDown(self) -> None:
self.assertFalse(get_is_postgres_running())

def create_pg_conn(self, pgport: int = BASE_PGPORT) -> PostgresConn:
def create_pg_conn(self, pgport: int = DEFAULT_POSTGRES_PORT) -> PostgresConn:
return PostgresConn(
PostgresConnTests.dbgym_cfg,
pgport,
Expand All @@ -79,12 +82,13 @@ def test_start_on_multiple_ports(self) -> None:
pg_conn0 = self.create_pg_conn()
pg_conn0.restore_pristine_snapshot()
pg_conn0.start_with_changes()
self.assertEqual(set(get_running_postgres_ports()), {BASE_PGPORT})
pg_conn1 = self.create_pg_conn(BASE_PGPORT + 1)
self.assertEqual(set(get_running_postgres_ports()), {DEFAULT_POSTGRES_PORT})
pg_conn1 = self.create_pg_conn(DEFAULT_POSTGRES_PORT + 1)
pg_conn1.restore_pristine_snapshot()
pg_conn1.start_with_changes()
self.assertEqual(
set(get_running_postgres_ports()), {BASE_PGPORT, BASE_PGPORT + 1}
set(get_running_postgres_ports()),
{DEFAULT_POSTGRES_PORT, DEFAULT_POSTGRES_PORT + 1},
)

# Clean up
Expand Down
10 changes: 10 additions & 0 deletions util/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,16 @@ def cur_task_runs_artifacts_path(self, *dirs: str, mkdir: bool = False) -> Path:
return self.cur_task_runs_path("artifacts", *dirs, mkdir=mkdir)


def make_standard_dbgym_cfg() -> DBGymConfig:
"""
The "standard" way to make a DBGymConfig using the DBGYM_CONFIG_PATH envvar and the
default path of dbgym_config.yaml.
"""
dbgym_config_path = Path(os.getenv("DBGYM_CONFIG_PATH", "dbgym_config.yaml"))
dbgym_cfg = DBGymConfig(dbgym_config_path)
return dbgym_cfg


def conv_inputpath_to_realabspath(
dbgym_cfg: DBGymConfig, inputpath: os.PathLike[str]
) -> Path:
Expand Down

0 comments on commit 72e9808

Please sign in to comment.