From 87fee1237b673c3bc7610641f0163c420fac6184 Mon Sep 17 00:00:00 2001
From: anwesha-saha <anwesha.saha284@gmail.com>
Date: Sat, 24 Feb 2024 21:18:09 -0500
Subject: [PATCH] support for yzlsm and qlsm for bo (#11)

* added support for yzlsm and qlsm for bo

* qlsm solver update

* remove redundant call
---
 endure.toml                      |  30 +++--
 endure/lsm/solver/qlsm_solver.py |   5 +-
 endure/util/db_log.py            |  23 +++-
 jobs/bayesian_pipeline.py        | 224 ++++++++++++++++++++-----------
 jobs/bo_job_runs.py              |  80 +++++++++++
 5 files changed, 263 insertions(+), 99 deletions(-)
 create mode 100644 jobs/bo_job_runs.py

diff --git a/endure.toml b/endure.toml
index 498d92e..2b6ec24 100644
--- a/endure.toml
+++ b/endure.toml
@@ -153,29 +153,33 @@ drop_last = true
 [job.BayesianOptimization]
 # -----------------------------------------------------------------------------
 num_iterations = 15
-num_restarts = 200
-raw_samples = 512
-initial_samples = 30
-max_levels = 16
-# This will take value 0 and 1 where 1 means write each cost and run details into the MySqlLite database
-# and 0 means run details are not stored in the database
-write_to_db = 1
-#by default the databases directory will be created inside the data director. To change this, you need to change ["io"]["data_dir"]
-db_path = "databases"
-# This must be a .db file for code to function. It will create a sqllite database
-db_name = "db_cost.db"
-
+num_restarts = 20
+# value of raw_samples determines how many initial random samples are taken from the search space before starting the optimization process
+raw_samples = 30
+initial_samples = 20
 
 # This is the q value used in BoTorch Acquisition functions.
 # if it is set to a value above 1 sequential processing will stop in acquisition function and batch processing will start
 # note that for batch processing tensor shape will change and will require modification of code.
 # TODO: Add code to handle batch
 batch_size = 1
+max_levels = 16
 
 # Acquisition function options
-#   [ExpectedImprovement, UpperConfidenceBound, qExpectedImprovement]
+# [ExpectedImprovement, UpperConfidenceBound, qExpectedImprovement]
 acquisition_function = "ExpectedImprovement"
 beta_value = 0.3
+# model_type can take values - "Classic", "QHybrid", "YZHybrid", "KHybrid"
+model_type = "YZHybrid"
+
+[job.BayesianOptimization.database]
+# This will take value 0 and 1 where 1 means write each cost and run details into the MySqlLite database
+# and 0 means run details are not stored in the database
+write_to_db = 1
+# by default the databases directory will be created inside the data director. To change this, you need to change ["io"]["data_dir"]
+db_path = "databases"
+# This must be a .db file for code to function. It will create a sqllite database
+db_name = "db_cost.db"
 
 [job.BayesianOptimization.system]
 E = 1024
diff --git a/endure/lsm/solver/qlsm_solver.py b/endure/lsm/solver/qlsm_solver.py
index 7237245..3641179 100644
--- a/endure/lsm/solver/qlsm_solver.py
+++ b/endure/lsm/solver/qlsm_solver.py
@@ -43,8 +43,8 @@ def nominal_objective(
         q: float,
         w: float,
     ) -> float:
-        h, t, q = x
-        design = LSMDesign(h=h, T=t, Q=q, policy=Policy.QFixed)
+        h, t, q_val = x
+        design = LSMDesign(h=h, T=t, Q=q_val, policy=Policy.QFixed)
         cost = self.cf.calc_cost(design, system, z0, z1, q, w)
 
         return cost
@@ -80,6 +80,7 @@ def get_nominal_design(
             "method": "SLSQP",
             "bounds": get_bounds(
                 config=self.config,
+                policy=Policy.QFixed,
                 system=system,
                 robust=False,
             ),
diff --git a/endure/util/db_log.py b/endure/util/db_log.py
index 7317a4e..16eb652 100644
--- a/endure/util/db_log.py
+++ b/endure/util/db_log.py
@@ -12,13 +12,15 @@ def initialize_database(db_path='cost_log.db'):
             non_empty_reads REAL,
             range_queries REAL,
             writes REAL,
-            
             max_bits_per_element REAL,
             physical_entries_per_page INT,
             range_selectivity REAL,
             entries_per_page INT,
             total_elements INT,
-            read_write_asymmetry REAL
+            read_write_asymmetry REAL,
+            iterations INT,
+            sample_size INT,
+            acquisition_function TEXT
         );''')
     cursor.execute('''
         CREATE TABLE IF NOT EXISTS design_costs (
@@ -27,6 +29,9 @@ def initialize_database(db_path='cost_log.db'):
             bits_per_element REAL,
             size_ratio INTEGER,
             policy INTEGER,
+            Q INTEGER,
+            Y INTEGER,
+            Z INTEGER,
             cost REAL,
             FOREIGN KEY (run_id) REFERENCES runs(run_id)
         );''')
@@ -34,12 +39,15 @@ def initialize_database(db_path='cost_log.db'):
     return connector
 
 
-def log_new_run(connector, system, workload):
+def log_new_run(connector, system, workload, iterations, sample, acqf):
     cursor = connector.cursor()
     cursor.execute('INSERT INTO runs (empty_reads, non_empty_reads, range_queries, writes, '
                    'max_bits_per_element, physical_entries_per_page, range_selectivity, '
-                   'entries_per_page, total_elements, read_write_asymmetry) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
-                   (workload.z0,  workload.z1, workload.q, workload.w, system.H, system.E, system.s, system.B, system.N, system.phi))
+                   'entries_per_page, total_elements, read_write_asymmetry, iterations, sample_size, '
+                   'acquisition_function) '
+                   'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
+                   (workload.z0,  workload.z1, workload.q, workload.w, system.H, system.E, system.s, system.B,
+                    system.N, system.phi, iterations, sample, acqf))
     connector.commit()
     return cursor.lastrowid
 
@@ -48,8 +56,9 @@ def log_design_cost(connector, run_id, design, cost):
     cursor = connector.cursor()
 
     policy_value = design.policy.value if hasattr(design.policy, 'value') else design.policy
-    cursor.execute('INSERT INTO design_costs (run_id, bits_per_element, size_ratio, policy, cost) '
-                   'VALUES (?, ?, ?, ?, ?)',(run_id, design.h, design.T, policy_value, cost))
+    cursor.execute('INSERT INTO design_costs (run_id, bits_per_element, size_ratio, policy, Q, Y, Z, cost) '
+                   'VALUES (?, ?, ?, ?, ?, ?, ?, ?)', (run_id, design.h, design.T, policy_value, design.Q, design.Y,
+                                                       design.Z, cost))
     connector.commit()
 
 
diff --git a/jobs/bayesian_pipeline.py b/jobs/bayesian_pipeline.py
index 2e79bf6..9fb17a9 100644
--- a/jobs/bayesian_pipeline.py
+++ b/jobs/bayesian_pipeline.py
@@ -4,6 +4,7 @@
 import logging
 import csv
 import os
+import time
 
 from botorch.models import MixedSingleTaskGP
 from botorch.fit import fit_gpytorch_model
@@ -16,16 +17,21 @@
 from endure.lsm.cost import EndureCost
 from endure.data.io import Reader
 from endure.lsm.types import LSMDesign, System, Policy, Workload
-from endure.lcm.data.generator import LCMDataGenerator
+from endure.lcm.data.generator import ClassicGenerator, QCostGenerator, YZCostGenerator, KHybridGenerator
 from endure.lsm.solver.classic_solver import ClassicSolver
+from endure.lsm.solver.qlsm_solver import QLSMSolver
+from endure.lsm.solver.yzlsm_solver import YZLSMSolver
+from endure.lsm.solver.klsm_solver import KLSMSolver
 from endure.util.db_log import initialize_database, log_new_run, log_design_cost
 
 
 class BayesianPipeline:
     def __init__(self, conf: dict) -> None:
+        self.end_time = None
         self.config: dict = conf
         self.bayesian_setting: dict = self.config["job"]["BayesianOptimization"]
-        self.cf: EndureCost = EndureCost(self.bayesian_setting["max_levels"])
+        max_levels = self.config['job']['BayesianOptimization']['max_levels']
+        self.cf: EndureCost = EndureCost(max_levels)
         self.log: logging.Logger = logging.getLogger(self.config["log"]["name"])
 
         self.system: System = System(**self.bayesian_setting["system"])
@@ -45,19 +51,22 @@ def __init__(self, conf: dict) -> None:
         self.beta_value: float = self.bayesian_setting["beta_value"]
         self.conn = None
         self.run_id: int = None
-        self.write_to_db = self.bayesian_setting["write_to_db"]
+        self.write_to_db = self.bayesian_setting["database"]["write_to_db"]
         self.output_dir = os.path.join(
-            self.config["io"]["data_dir"], self.bayesian_setting["db_path"]
+            self.config["io"]["data_dir"], self.bayesian_setting["database"]["db_path"]
         )
-        self.db_path = os.path.join(self.output_dir, self.bayesian_setting["db_name"])
+        self.db_path = os.path.join(self.output_dir, self.bayesian_setting["database"]["db_name"])
+        self.model_type = self.bayesian_setting['model_type']
 
     def run(self, system: Optional[System] = None, z0: Optional[float] = None, z1: Optional[float] = None,
             q: Optional[float] = None, w: Optional[float] = None, num_iterations: Optional[int] = None,
             sample_size: Optional[int] = None, acqf: Optional[str] = None) -> Tuple[Optional[LSMDesign],
                                                                                     Optional[float]]:
+        start_time = time.time()
         os.makedirs(self.output_dir, exist_ok=True)
         self.conn = initialize_database(self.db_path)
         system = system if system is not None else self.system
+        # print(system.E, "E - system")
         sample_size = sample_size if sample_size is not None else self.initial_samples
         z0 = z0 if z0 is not None else self.workload.z0
         z1 = z1 if z1 is not None else self.workload.z1
@@ -65,21 +74,33 @@ def run(self, system: Optional[System] = None, z0: Optional[float] = None, z1: O
         w = w if w is not None else self.workload.w
         w = w if w is not None else self.workload.w
         acqf = acqf if acqf is not None else self.acquisition_function
-        workload = Workload(z0, z1, q, w)
-        print("path", self.db_path)
-        self.run_id = log_new_run(self.conn, system, workload)
         iterations = num_iterations if num_iterations is not None else self.num_iterations
+        workload = Workload(z0, z1, q, w)
+        self.run_id = log_new_run(self.conn, system, workload, iterations, sample_size, acqf)
         train_x, train_y, best_y = self._generate_initial_data(z0, z1, q, w, system, sample_size)
         bounds = self.generate_initial_bounds(system)
         best_designs = []
-
         for i in range(iterations):
             new_candidates = self.get_next_points(train_x, train_y, best_y, bounds, acqf, 1)
             for cand in new_candidates:
-                h, size_ratio, policy_val = cand[0].item(), cand[1].item(), cand[2].item()
-                policy = Policy.Leveling if policy_val < 0.5 else Policy.Tiering
-                new_designs = [LSMDesign(h, np.ceil(size_ratio), policy)]
+                h = cand[0].item()
+                if h == system.H:
+                    h = h - 0.01
+                if self.model_type == "QHybrid":
+                    size_ratio, q_val = cand[1].item(), cand[2].item()
+                    policy = Policy.QFixed
+                    new_designs = [LSMDesign(h=h, T=np.ceil(size_ratio), policy=policy, Q=int(q_val))]
+                elif self.model_type == "YZHybrid":
+                    size_ratio, y_val, z_val = cand[1].item(), cand[2].item(), cand[3].item()
+                    policy = Policy.YZHybrid
+                    new_designs = [LSMDesign(h=h, T=np.ceil(size_ratio), policy=policy, Y=int(y_val), Z=int(z_val))]
+                # TODO: Add KHybrid here
+                else:
+                    size_ratio, policy_val = cand[1].item(), cand[2].item()
+                    policy = Policy.Leveling if policy_val < 0.5 else Policy.Tiering
+                    new_designs = [LSMDesign(h, np.ceil(size_ratio), policy)]
 
+            # This will solve the division by 0 error
             for design in new_designs:
                 try:
                     self.cf.calc_cost(design, system, z0, z1, q, w)
@@ -102,12 +123,15 @@ def run(self, system: Optional[System] = None, z0: Optional[float] = None, z1: O
             self.log.debug(f"Iteration {i + 1}/{iterations} complete")
         self.log.debug("Bayesian Optimization completed")
         self._print_best_designs(best_designs)
-        self._find_analytical_results(system, z0, z1, q, w)
+        self._find_analytical_results(system, z0, z1, q, w) # Uncomment this if running this file through endure.py
         sorted_designs = sorted(best_designs, key=lambda x: x[1])
         self.conn.close()
+        end_time = time.time()
+        elapsed_time = end_time - start_time
+        print("elapsed time", elapsed_time)
         if sorted_designs:
             best_design, best_cost = sorted_designs[0]
-            return best_design, best_cost
+            return best_design, best_cost, elapsed_time
         else:
             return None, None
 
@@ -116,13 +140,28 @@ def generate_initial_bounds(self, system: System) -> torch.Tensor:
         t_bounds = torch.tensor([int(self.bayesian_setting["bounds"]["T_min"]),
                                  int(self.bayesian_setting["bounds"]["T_max"])])
         policy_bounds = torch.tensor([0, 1])
-        bounds = torch.stack([h_bounds, t_bounds, policy_bounds], dim=-1)
+        if self.model_type == "QHybrid":
+            q_bounds = torch.tensor([1, self.bayesian_setting["bounds"]["T_max"]-1])
+            bounds = torch.stack([h_bounds, t_bounds, q_bounds], dim=-1)
+        elif self.model_type == "YZHybrid":
+            y_bounds = torch.tensor([1, self.bayesian_setting["bounds"]["T_max"]-1])
+            z_bounds = torch.tensor([1, self.bayesian_setting["bounds"]["T_max"]-1])
+            bounds = torch.stack([h_bounds, t_bounds, y_bounds, z_bounds], dim=-1)
+        # elif self.model_type == "KHybrid": # TODO add support for KHybrid model
+        else:
+            bounds = torch.stack([h_bounds, t_bounds, policy_bounds], dim=-1)
         return bounds
 
     def get_next_points(self, x: torch.Tensor, y: torch.Tensor, best_y: float, bounds: torch.Tensor,
                         acquisition_function: str = "ExpectedImprovement", n_points: int = 1) -> torch.Tensor:
-        single_model = MixedSingleTaskGP(x, y, cat_dims=[1, 2], input_transform=Normalize(d=x.shape[1], bounds=bounds),
-                                         outcome_transform=Standardize(m=1))
+        if self.model_type == "QHybrid" or self.model_type == "Classic":
+            single_model = MixedSingleTaskGP(x, y, cat_dims=[1, 2], input_transform=Normalize(d=x.shape[1],
+                                                                                              bounds=bounds),
+                                             outcome_transform=Standardize(m=1))
+        elif self.model_type == "YZHybrid":
+            single_model = MixedSingleTaskGP(x, y, cat_dims=[1, 2, 3], input_transform=Normalize(d=x.shape[1],
+                                                                                                 bounds=bounds),
+                                             outcome_transform=Standardize(m=1))
         mll = ExactMarginalLogLikelihood(single_model.likelihood, single_model)
         fit_gpytorch_model(mll)
         if acquisition_function == "ExpectedImprovement":
@@ -134,11 +173,23 @@ def get_next_points(self, x: torch.Tensor, y: torch.Tensor, best_y: float, bound
             acqf = qExpectedImprovement(model=single_model, best_f=best_y)
         else:
             raise ValueError(f"Unknown acquisition function: {acquisition_function}")
+        t_bounds = bounds[:, 1]
+        lower_t_bound = int(np.floor(t_bounds[0].item()))
+        upper_t_bound = int(np.ceil(t_bounds[1].item()))
         fixed_features_list = []
-        for size_ratio in range(2, 33):
-            for pol in range(2):
-                fixed_features_list.append({1: size_ratio, 2: pol})
-
+        if self.model_type == "Classic":
+            for size_ratio in range(lower_t_bound, upper_t_bound):
+                for pol in range(2):
+                    fixed_features_list.append({1: size_ratio, 2: pol})
+        elif self.model_type == "QHybrid":
+            for size_ratio in range(lower_t_bound, upper_t_bound):
+                for q in range(1, size_ratio-1):
+                    fixed_features_list.append({1: size_ratio, 2: q})
+        elif self.model_type == "YZHybrid":
+            for size_ratio in range(lower_t_bound, upper_t_bound, 2):
+                for y in range(1, size_ratio-1):
+                    for z in range(1, size_ratio-1):
+                        fixed_features_list.append({1: size_ratio, 2: y, 3: z})
         candidates, _ = optimize_acqf_mixed(
             acq_function=acqf,
             bounds=bounds,
@@ -153,18 +204,33 @@ def _generate_initial_data(self, z0, z1, q, w, system: System, n: int = 30, run_
             Tuple[torch.Tensor, torch.Tensor]:
         train_x = []
         train_y = []
-        policy = 0
         run_id = run_id if run_id is not None else self.run_id
-        lcm_data_generator = LCMDataGenerator()
+        if self.model_type == "QHybrid":
+            generator = QCostGenerator()
+        elif self.model_type == "YZHybrid":
+            generator = YZCostGenerator()
+        elif self.model_type == "KHybrid":
+            generator = KHybridGenerator()
+        else:
+            generator = ClassicGenerator()
         for _ in range(n):
-            design = lcm_data_generator._sample_design(system)
-            if design.policy == Policy.Leveling:
-                policy = 0
-            elif design.policy == Policy.Tiering:
-                policy = 1
-            x_values = np.array([design.h, int(design.T), int(policy)])
+            design = generator._sample_design(system)
+            if self.model_type == "Classic":
+                if design.policy == Policy.Leveling:
+                    policy = 0
+                elif design.policy == Policy.Tiering:
+                    policy = 1
+                x_values = np.array([design.h, design.T, policy])
+                # log_design = LSMDesign(design.h, policy="", design.T, policy)
+            elif self.model_type == "QHybrid":
+                x_values = np.array([design.h, design.T, design.Q])
+                # log_design = LSMDesign(design.h, design.T, policy=Policy.QFixed, Q=design.Q)
+            elif self.model_type == "YZHybrid":
+                x_values = np.array([design.h, design.T, design.Y, design.Z])
+                # log_design = LSMDesign(design.h, design.T, design.Y, design.Z)
+            # TODO: add logic for KHybrid
             cost = self.cf.calc_cost(design, system, z0, z1, q, w)
-            log_design_cost(self.conn, run_id, LSMDesign(design.h, design.T, policy), cost)
+            log_design_cost(self.conn, run_id, design, cost)
             train_x.append(x_values)
             train_y.append(cost)
         train_x = np.array(train_x)
@@ -173,52 +239,44 @@ def _generate_initial_data(self, z0, z1, q, w, system: System, n: int = 30, run_
         best_y = train_y.min().item()
         return train_x, train_y, best_y
 
-    def _scale_and_standardize(self, train_x: torch.Tensor, train_y: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-        scaled_train_x = self._min_max_scale(train_x)
-        standardized_train_y = self._standardize_mean_std(train_y)
-        return scaled_train_x, standardized_train_y
-
-    def _min_max_scale(self, x: torch.Tensor, bounds) -> torch.Tensor:
-        continuous_data = x[:, :2]
-        categorical_data = x[:, 2:]
-        scaled_continuous_data = (continuous_data - bounds[:, :2][0]) / (bounds[:, :2][1] - bounds[:, :2][0])
-        scaled_data = torch.cat([scaled_continuous_data, categorical_data], dim=-1)
-        return scaled_data
-
-    def _standardize_mean_std(self, x: torch.Tensor) -> torch.Tensor:
-        stddim = -1 if x.dim() < 2 else -2
-        x_std = x.std(dim=stddim, keepdim=True)
-        x_std = x_std.where(x_std >= 1e-9, torch.full_like(x_std, 1.0))
-        return (x - x.mean(dim=stddim, keepdim=True)) / x_std
-
-    def _initialize_model(self, train_x: torch.Tensor, train_y: torch.Tensor, state_dict: dict = None) \
-            -> Tuple[MixedSingleTaskGP, ExactMarginalLogLikelihood]:
-        print("Initial train_x", train_x)
-        gp_model = MixedSingleTaskGP(train_x, train_y, cat_dims=[-1])
-        if state_dict is not None:
-            gp_model.load_state_dict(state_dict)
-
-        mll = ExactMarginalLogLikelihood(gp_model.likelihood, gp_model)
-        return mll, gp_model
-
-    def _update_best_designs(self, best_designs: List[Tuple[LSMDesign, float]], new_x: torch.Tensor, new_y: torch.Tensor) -> List[Tuple[LSMDesign, float]]:
+    def _update_best_designs(self, best_designs: List[Tuple[LSMDesign, float]], new_x: torch.Tensor,
+                             new_y: torch.Tensor) -> List[Tuple[LSMDesign, float]]:
         for x, y in zip(new_x, new_y):
-            h, size_ratio, policy_continuous = x[0], x[1], x[2]
-            policy = Policy.Leveling if policy_continuous < 0.5 else Policy.Tiering
-            best_designs.append((LSMDesign(h.item(), np.ceil(size_ratio.item()), policy), y.item()))
+            if self.model_type == "QHybrid":
+                h, size_ratio, qvalue = x[0], x[1], x[2]
+                best_designs.append((LSMDesign(h.item(), np.ceil(size_ratio.item()), qvalue.item()), y.item()))
+            elif self.model_type == "YZHybrid":
+                h, size_ratio, yvalue, zvalue = x[0], x[1], x[2], x[3]
+                best_designs.append(
+                    (LSMDesign(h.item(), np.ceil(size_ratio.item()), yvalue.item(), zvalue.item()), y.item()))
+            # TODO: code for KHybrid to be added
+            else:
+                h, size_ratio, policy = x[0], x[1], x[2]
+                # policy = Policy.Leveling if policy_continuous < 0.5 else Policy.Tiering
+                best_designs.append((LSMDesign(h.item(), np.ceil(size_ratio.item()), policy.item()), y.item()))
         return best_designs
 
     def _print_best_designs(self, best_designs: List[Tuple[LSMDesign, float]]) -> None:
         sorted_designs = sorted(best_designs, key=lambda x: x[1])
         print("Best Design Found:")
-        for design, cost in sorted_designs[:1]:
-            print(f"Design: h={design.h}, T={design.T}, Policy={design.policy}, Cost={cost}")
-        with open('best_designs.txt', 'w') as file:
-            file.write("All Best Designs Found:\n")
-            for design, cost in best_designs:
-                file.write(f"Design: h={design.h}, T={design.T}, Policy={design.policy}, Cost={cost}\n")
+        if self.model_type == "Classic":
+            for design, cost in sorted_designs[:1]:
+                print(f"Design: h={design.h}, T={design.T}, Policy={design.policy}, Cost={cost}")
+            with open('best_designs.txt', 'w') as file:
+                file.write("All Best Designs Found:\n")
+                for design, cost in best_designs:
+                    file.write(f"Design: h={design.h}, T={design.T}, Policy={design.policy}, Cost={cost}\n")
+        if self.model_type == "QHybrid":
+            for design, cost in sorted_designs[:1]:
+                print(f"Design: h={design.h}, T={design.T}, Q={design.Q}, Cost={cost}")
+            with open('best_designs.txt', 'w') as file:
+                file.write("All Best Designs Found:\n")
+                for design, cost in best_designs:
+                    file.write(f"Design: h={design.h}, T={design.T}, Q={design.Q}, Cost={cost}\n")
 
-    def _write_to_csv(self, best_designs: List[Tuple[LSMDesign, float]], system: Optional[System] = None, filename: str = 'best_designs.csv') -> None:
+
+    def _write_to_csv(self, best_designs: List[Tuple[LSMDesign, float]], system: Optional[System] = None,
+                      filename: str = 'best_designs.csv') -> None:
         sorted_designs = sorted(best_designs, key=lambda x: x[1])[:1]
         with open(filename, mode='w', newline='') as file:
             writer = csv.writer(file)
@@ -235,16 +293,28 @@ def _write_to_csv(self, best_designs: List[Tuple[LSMDesign, float]], system: Opt
     def _find_analytical_results(self, system: System, z0: float, z1: float, q: float, w: float,
                                  conf: Optional[dict] = None) -> Tuple[LSMDesign, float]:
         conf = conf if conf is not None else self.config
-        solver = ClassicSolver(conf)
+        if self.model_type == "Classic":
+            solver = ClassicSolver(conf)
+        elif self.model_type == "QHybrid":
+            solver = QLSMSolver(conf)
+        elif self.model_type == "YZHybrid":
+            solver = YZLSMSolver(conf)
         nominal_design, nominal_solution = solver.get_nominal_design(system, z0, z1, q, w)
-        x = np.array([[nominal_design.h, nominal_design.T]])
-        train_x = torch.tensor(x)
-        policy = nominal_design.policy
-        cost = solver.nominal_objective(x[0], policy, system, z0, z1, q, w)
-        train_y = torch.tensor(cost, dtype=torch.float64).unsqueeze(-1)
+
+        # train_x = torch.tensor(x)
+        if self.model_type == "Classic":
+            x = np.array([[nominal_design.h, nominal_design.T]])
+            policy = nominal_design.policy
+            cost = solver.nominal_objective(x[0], policy, system, z0, z1, q, w)
+        elif self.model_type == "QHybrid":
+            x = np.array([[nominal_design.h, nominal_design.T, nominal_design.Q]])
+            cost = solver.nominal_objective(x[0], system, z0, z1, q, w)
+        elif self.model_type == "YZHybrid":
+            x = np.array([[nominal_design.h, nominal_design.T, nominal_design.Y, nominal_design.Z]])
+            cost = solver.nominal_objective(x[0], system, z0, z1, q, w)
+        # train_y = torch.tensor(cost, dtype=torch.float64).unsqueeze(-1)
         print("Cost for the nominal design using analytical solver: ", cost)
         print("Nominal Design suggested by analytical solver: ", nominal_design)
-
         return nominal_design, cost
 
 
@@ -255,4 +325,4 @@ def _find_analytical_results(self, system: System, z0: float, z1: float, q: floa
     log.info("Initializing Bayesian Optimization Job")
 
     bayesian_optimizer = BayesianPipeline(config)
-    bayesian_optimizer.run()
\ No newline at end of file
+    bayesian_optimizer.run()
diff --git a/jobs/bo_job_runs.py b/jobs/bo_job_runs.py
new file mode 100644
index 0000000..dbbe456
--- /dev/null
+++ b/jobs/bo_job_runs.py
@@ -0,0 +1,80 @@
+import sys
+import os
+import csv
+import toml
+import numpy as np
+import torch
+
+sys.path.append(os.path.join(sys.path[0], '../'))
+
+from endure.lcm.data.generator import LCMDataGenerator
+from endure.data.io import Reader
+from jobs.bayesian_pipeline import BayesianPipeline
+from endure.lsm.solver.classic_solver import ClassicSolver
+from endure.lsm.cost import EndureCost
+
+
+def to_cuda(obj, seen=None):
+    """Recursively move tensors to CUDA if available, avoiding infinite recursion."""
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+        if seen is None:
+            seen = set()
+
+        obj_id = id(obj)
+        if obj_id in seen:
+            return
+        seen.add(obj_id)
+
+        for attr_name in dir(obj):
+            if attr_name.startswith('__'):
+                continue
+
+            try:
+                attr_value = getattr(obj, attr_name)
+                if isinstance(attr_value, torch.Tensor):
+                    setattr(obj, attr_name, attr_value.to(device))
+                elif hasattr(attr_value, '__dict__') or isinstance(attr_value, (list, dict)):
+                    to_cuda(attr_value, seen)
+            except Exception as e:
+                pass
+    else:
+        print("CUDA not available")
+
+
+def compare_designs(n_runs=3, csv_filename='design_comparison.csv'):
+    """Compare Bayesian and analytical designs."""
+    with open(csv_filename, mode='w', newline='') as file:
+        writer = csv.writer(file)
+        writer.writerow(['Entries per page(E)', 'Physical Entries per page(B)', 'Selectivity(s)',
+                         'Max bits per element(H)', 'Total elements (N)', 'Empty Reads', 'Non-Empty Reads',
+                         'Range Queries', 'Writes', 'BO Design', 'Analytical Design', 'BO Cost',
+                         'Analytical Cost', 'Diff(Analytical-Bayesian)', "Elapsed Time"])
+
+        for i in range(n_runs):
+            print(f"Iteration {i + 1}/{n_runs} running")
+            system = generator._sample_system()
+            z0, z1, q, w = generator._sample_workload(4)
+            print(np.floor(system.H))
+            bo_design, bo_cost, time = bayesian_optimizer.run(system, z0, z1, q, w)
+            analytical_design, analytical_cost = bayesian_optimizer._find_analytical_results(system, z0, z1, q, w)
+            writer.writerow([system.E, system.B, system.s, system.H, system.N, z0, z1, q, w,
+                             bo_design, analytical_design, bo_cost, analytical_cost, analytical_cost - bo_cost, time])
+
+
+if __name__ == "__main__":
+    file_dir = os.path.dirname(__file__)
+    config_path = os.path.join(file_dir, "../endure.toml")
+    with open(config_path) as fid:
+        config = toml.load(fid)
+    bayesian_optimizer = BayesianPipeline(config)
+    generator = LCMDataGenerator()
+    solver = ClassicSolver(config)
+    cf = EndureCost(config)
+
+    to_cuda(bayesian_optimizer)
+    to_cuda(generator)
+    to_cuda(solver)
+    to_cuda(cf)
+
+    compare_designs()
\ No newline at end of file