Merge branch 'main' of https://github.com/columbia/cookiemonster

columbia · Aug 20, 2024 · dbb7880 · dbb7880
2 parents 6b1f720 + cd05687
commit dbb7880
Show file tree

Hide file tree

Showing 3 changed files with 37 additions and 26 deletions.
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 Cookie Monster is an on-device budgeting component that can be integrated into differentially private ad-measurement systems.
 Powered by a robust theoretical framework known as Individual Differential Privacy (IDP), a variant of traditional differential privacy, Cookie Monster allows advertisers to conserve significantly more privacy budget compared to existing alternatives.
-A description of this project can be found on our paper, titled Cookie Monster: Effective On-Device DP Budgeting for Private Attribution Measurement Systems and published as SOSP '24.
+A description of this project can be found on our paper, titled Cookie Monster: Effective On-Device DP Budgeting for Private Attribution Measurement Systems and published at SOSP '24.
 
 ## Repo Structure
 
@@ -44,7 +44,7 @@ cd cookiemonster
 
 Build the docker image for CookieMonster. This will automatically install all dependencies required for the CookieMonster system as well as the datasets used in the evaluation section of the paper. This step takes several minutes to finish (~60') due to the processing and generation of the datasets.
 ``` bash 
-sudo docker build -t cookiemonster -f Dockerfile .
+sudo docker build --network host -t cookiemonster -f Dockerfile .
 ```
 
 ## 3. Reproduce experiments
@@ -55,7 +55,7 @@ The script [experiments/run_all.sh](https://github.com/columbia/cookiemonster/bl
 
 ### 3.1. Run all experiments
 
-Reproduce all Cookie Monster experiments by running the cookiemonster docker with the following command:
+Reproduce all Cookie Monster experiments by running the cookiemonster docker with the following command, from any directory (e.g., the `cookiemonster` repository root):
 
 ``` bash
 sudo docker run -v $PWD/logs:/cookiemonster/logs -v $PWD/figures:/cookiemonster/figures -v $PWD/cookiemonster/config:/cookiemonster/cookiemonster/config -v $PWD/temp:/tmp --network=host --name cookiemonster --shm-size=204.89gb --rm cookiemonster experiments/run_all.sh
@@ -73,7 +73,7 @@ With the `-v` flag we mount directories `cookiemonster/logs`, `cookiemonster/fig
 
 ### 3.2. Analyze results
 
-The [experiments/runner.cli.py](https://github.com/columbia/cookiemonster/blob/artifact-sosp/experiments/runner.cli.py) script will automatically analyze the execution logs and create plots corresponding to the figures presented in the paper.
+The previous `experiments/run_all.sh` command will automatically analyze the execution logs and create plots corresponding to the figures presented in the paper, using the [experiments/runner.cli.py](https://github.com/columbia/cookiemonster/blob/artifact-sosp/experiments/runner.cli.py) script.
 
 Check the `figures` directory for all the outputs.
 Due to noise addition not being deterministic results might not be identical but the relative difference between baselines should be the same.
diff --git a/experiments/run_all.sh b/experiments/run_all.sh
@@ -1,13 +1,11 @@
-export LOGURU_LEVEL=ERROR
-
 echo "Running Figures 4.a and 4.b.."
-python3 experiments/runner.cli.py --exp microbenchmark_varying_knob1
+python3 experiments/runner.cli.py --exp microbenchmark_varying_knob1 --loguru-level ERROR
 
 echo "Running Figures 4.c and 4.d.."
-python3 experiments/runner.cli.py --exp microbenchmark_varying_knob2
+python3 experiments/runner.cli.py --exp microbenchmark_varying_knob2 --loguru-level ERROR
 
 echo "Running Figures 5.a, 5.b and 5.c.."
-python3 experiments/runner.cli.py --exp patcg_varying_epoch_granularity
+python3 experiments/runner.cli.py --exp patcg_varying_epoch_granularity --loguru-level ERROR
 
 echo "Running Figures 6.a, 6.b and 6.c.."
-python3 experiments/runner.cli.py --exp criteo_run
+python3 experiments/runner.cli.py --exp criteo_run --loguru-level ERROR
diff --git a/experiments/runner.cli.py b/experiments/runner.cli.py
@@ -1,16 +1,20 @@
+import multiprocessing
 import os
 import time
-from omegaconf import OmegaConf
-import typer
-import multiprocessing
 from copy import deepcopy
-from data.criteo.creators.query_pool_creator import QueryPoolDatasetCreator as CriteoQueries
+
+import typer
+from omegaconf import OmegaConf
 from ray_runner import grid_run
-from cookiemonster.utils import BUDGET, BIAS, LOGS_PATH
+
+from cookiemonster.utils import BIAS, BUDGET, LOGS_PATH
+from data.criteo.creators.query_pool_creator import (
+    QueryPoolDatasetCreator as CriteoQueries,
+)
 from notebooks.utils import save_data
+from plotting.criteo_plot import criteo_plot_experiments_side_by_side
 from plotting.microbenchmark_plot import microbenchmark_plot_budget_consumption_bars
 from plotting.patcg_plot import patcg_plot_experiments_side_by_side
-from plotting.criteo_plot import criteo_plot_experiments_side_by_side
 
 app = typer.Typer()
 
@@ -72,8 +76,9 @@ def microbenchmark_varying_knob1(ray_session_dir):
     path = "ray/microbenchmark/varying_knob1"
     save_data(path, type="budget")
     microbenchmark_plot_budget_consumption_bars(
-        "knob1", f"{LOGS_PATH.joinpath(path)}/budgets.csv", "figures/fig4_a_b.png")
-
+        "knob1", f"{LOGS_PATH.joinpath(path)}/budgets.csv", "figures/fig4_a_b.png"
+    )
+
 
 def microbenchmark_varying_knob2(ray_session_dir):
     dataset = "microbenchmark"
@@ -117,7 +122,8 @@ def microbenchmark_varying_knob2(ray_session_dir):
     path = "ray/microbenchmark/varying_knob2"
     save_data(path, type="budget")
     microbenchmark_plot_budget_consumption_bars(
-        "knob2", f"{LOGS_PATH.joinpath(path)}/budgets.csv", "figures/fig4_c_d.png")
+        "knob2", f"{LOGS_PATH.joinpath(path)}/budgets.csv", "figures/fig4_c_d.png"
+    )
 
 
 def microbenchmark_varying_epoch_granularity(ray_session_dir):
@@ -209,26 +215,33 @@ def criteo_run(ray_session_dir):
     criteo_plot_experiments_side_by_side(
         f"{LOGS_PATH.joinpath(path1)}",
         f"{LOGS_PATH.joinpath(path2)}",
-        "figures/fig6_a_b_c_d.png")
+        "figures/fig6_a_b_c_d.png",
+    )
 
 
 def criteo_impressions_run(ray_session_dir):
     dataset = "criteo"
     conversions_path = f"{dataset}/{dataset}_query_pool_conversions.csv"
     workload_generation = OmegaConf.load("data/criteo/config.json")
-    impression_augment_rates = CriteoQueries(workload_generation).get_impression_augment_rates()
+    impression_augment_rates = CriteoQueries(
+        workload_generation
+    ).get_impression_augment_rates()
     ray_init = True
 
     for rate in impression_augment_rates:
-        impressions_path = f"{dataset}/{dataset}_query_pool_impressions_augment_{rate}.csv"
+        impressions_path = (
+            f"{dataset}/{dataset}_query_pool_impressions_augment_{rate}.csv"
+        )
         logs_dir = f"{dataset}/augment_impressions_{rate}"
         config = {
             "baseline": ["ipa", "cookiemonster_base", "cookiemonster"],
             "dataset_name": dataset,
             "impressions_path": impressions_path,
             "conversions_path": conversions_path,
             "num_days_attribution_window": [30],
-            "workload_size": [1_000],  # force a high number so that we run on all queries
+            "workload_size": [
+                1_000
+            ],  # force a high number so that we run on all queries
             "max_scheduling_batch_size_per_query": workload_generation.max_batch_size,
             "min_scheduling_batch_size_per_query": workload_generation.min_batch_size,
             "initial_budget": [1],
@@ -273,7 +286,7 @@ def patcg_varying_epoch_granularity(ray_session_dir):
     grid_run(**config)
     config["num_days_per_epoch"] = [1, 60]
     grid_run(**config)
-    
+
     config["num_days_per_epoch"] = [14, 7]
     grid_run(**config)
 
@@ -282,9 +295,9 @@ def patcg_varying_epoch_granularity(ray_session_dir):
     save_data(path, type="bias")
     os.makedirs("figures", exist_ok=True)
     patcg_plot_experiments_side_by_side(
-        f"{LOGS_PATH.joinpath(path)}", "figures/fig5_a_b_c.png")
+        f"{LOGS_PATH.joinpath(path)}", "figures/fig5_a_b_c.png"
+    )
 
-
 
 def patcg_varying_initial_budget(ray_session_dir):
     dataset = "patcg"