diff --git a/examples/benchmark.py b/examples/benchmark.py
new file mode 100644
index 000000000..967746a32
--- /dev/null
+++ b/examples/benchmark.py
@@ -0,0 +1,147 @@
+# Benchmark script
+
+import glob
+import os
+import time
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass, field
+
+import GPUtil
+
+
+@dataclass
+class BenchmarkConfig:
+    """Baseline benchmark config"""
+
+    # trainer to run
+    trainer: str = "simple_trainer.py"
+    # path to data
+    data_dir: str = "data/360_v2"
+    # scenes to run
+    scenes: set = (
+        "bicycle",
+        "bonsai",
+        "counter",
+        "garden",
+        "stump",
+        "kitchen",
+        "room",
+    )
+    # downscale factors
+    factors: set = (4, 2, 2, 4, 4, 2, 2)
+    # exclude gpus
+    excluded_gpus: set = field(default_factory=set)
+    # result directory
+    result_dir: str = "results/baseline"
+    # dry run, useful for debugging
+    dry_run: bool = False
+    # extra model specific configs
+    model_configs: dict = field(default_factory=dict)
+
+
+# Configurations of different GSPLAT options
+baseline_config = BenchmarkConfig()
+absgrad_config = BenchmarkConfig(
+    result_dir="results/absgrad",
+    model_configs={"--absgrad": "", "--grow_grad2d": 0.0006},
+)
+antialiased_config = BenchmarkConfig(
+    result_dir="results/antialiased", model_configs={"--antialiased": ""}
+)
+mcmc_config = BenchmarkConfig(
+    trainer="simple_trainer_mcmc.py",
+    result_dir="results/mcmc",
+)
+
+# Configs to run
+configs_to_run = [
+    baseline_config,
+    mcmc_config,
+    absgrad_config,
+    antialiased_config,
+]
+
+
+def train_scene(gpu, scene, factor, config):
+    """Train a single scene with config on current gpu"""
+    # additional user set model configs
+    model_config_args = " ".join(f"{k} {v}" for k, v in config.model_configs.items())
+
+    # train without eval
+    cmd = f"OMP_NUM_THREADS=4 CUDA_VISIBLE_DEVICES={gpu} python {config.trainer} --eval_steps -1 --disable_viewer --data_factor {factor} --data_dir {config.data_dir}/{scene} --result_dir {config.result_dir}/{scene} {model_config_args}"
+
+    print(cmd)
+    if not config.dry_run:
+        os.system(cmd)
+
+    # eval and render for all the ckpts
+    ckpts = glob.glob(f"{config.result_dir}/{scene}/ckpts/*.pt")
+    for ckpt in ckpts:
+        cmd = f"OMP_NUM_THREADS=4 CUDA_VISIBLE_DEVICES={gpu} python {config.trainer} --disable_viewer --data_factor {factor} --data_dir {config.data_dir}//{scene} --result_dir {config.result_dir}/{scene} --ckpt {ckpt} {model_config_args}"
+        print(cmd)
+        if not config.dry_run:
+            os.system(cmd)
+
+    return True
+
+
+def worker(gpu, scene, factor, config):
+    """This worker function starts a job and returns when it's done."""
+    print(f"Starting {config.trainer} job on GPU {gpu} with scene {scene}\n")
+    train_scene(gpu, scene, factor, config)
+    print(f"Finished {config.trainer} job on GPU {gpu} with scene {scene}\n")
+
+
+def dispatch_jobs(jobs, executor, config):
+    future_to_job = {}
+    reserved_gpus = set()  # GPUs that are slated for work but may not be active yet
+
+    while jobs or future_to_job:
+        # Get the list of available GPUs, not including those that are reserved.
+        all_available_gpus = set(
+            GPUtil.getAvailable(order="first", limit=10, maxMemory=0.1, maxLoad=0.1)
+        )
+        available_gpus = list(all_available_gpus - reserved_gpus - config.excluded_gpus)
+
+        # Launch new jobs on available GPUs
+        while available_gpus and jobs:
+            gpu = available_gpus.pop(0)
+            job = jobs.pop(0)
+            future = executor.submit(
+                worker, gpu, *job, config
+            )  # Unpacking job as arguments to worker
+            future_to_job[future] = (gpu, job)
+            reserved_gpus.add(gpu)  # Reserve this GPU until the job starts processing
+
+        # Check for completed jobs and remove them from the list of running jobs.
+        # Also, release the GPUs they were using.
+        done_futures = [future for future in future_to_job if future.done()]
+        for future in done_futures:
+            job = future_to_job.pop(
+                future
+            )  # Remove the job associated with the completed future
+            gpu = job[0]  # The GPU is the first element in each job tuple
+            reserved_gpus.discard(gpu)  # Release this GPU
+            print(f"Job {job} has finished., releasing GPU {gpu}")
+        # (Optional) You might want to introduce a small delay here to prevent this loop from spinning very fast
+        # when there are no GPUs available.
+        time.sleep(5)
+
+    print("All jobs have been processed.")
+
+
+def main():
+    """Launch batch_configs in serial but process each config in parallel (multi gpu)"""
+
+    for config in configs_to_run:
+        # num jobs = num scenes to run for current config
+        jobs = list(zip(config.scenes, config.factors))
+
+        # Run multiple gpu train scripts
+        # Using ThreadPoolExecutor to manage the thread pool
+        with ThreadPoolExecutor(max_workers=8) as executor:
+            dispatch_jobs(jobs, executor, config)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/requirements.txt b/examples/requirements.txt
index f5cf24dfe..4ee545fad 100644
--- a/examples/requirements.txt
+++ b/examples/requirements.txt
@@ -16,3 +16,5 @@ opencv-python
 tyro
 Pillow
 tensorboard
+GPUtil
+tyro