From 87fee1237b673c3bc7610641f0163c420fac6184 Mon Sep 17 00:00:00 2001 From: anwesha-saha Date: Sat, 24 Feb 2024 21:18:09 -0500 Subject: [PATCH] support for yzlsm and qlsm for bo (#11) * added support for yzlsm and qlsm for bo * qlsm solver update * remove redundant call --- endure.toml | 30 +++-- endure/lsm/solver/qlsm_solver.py | 5 +- endure/util/db_log.py | 23 +++- jobs/bayesian_pipeline.py | 224 ++++++++++++++++++++----------- jobs/bo_job_runs.py | 80 +++++++++++ 5 files changed, 263 insertions(+), 99 deletions(-) create mode 100644 jobs/bo_job_runs.py diff --git a/endure.toml b/endure.toml index 498d92e..2b6ec24 100644 --- a/endure.toml +++ b/endure.toml @@ -153,29 +153,33 @@ drop_last = true [job.BayesianOptimization] # ----------------------------------------------------------------------------- num_iterations = 15 -num_restarts = 200 -raw_samples = 512 -initial_samples = 30 -max_levels = 16 -# This will take value 0 and 1 where 1 means write each cost and run details into the MySqlLite database -# and 0 means run details are not stored in the database -write_to_db = 1 -#by default the databases directory will be created inside the data director. To change this, you need to change ["io"]["data_dir"] -db_path = "databases" -# This must be a .db file for code to function. It will create a sqllite database -db_name = "db_cost.db" - +num_restarts = 20 +# value of raw_samples determines how many initial random samples are taken from the search space before starting the optimization process +raw_samples = 30 +initial_samples = 20 # This is the q value used in BoTorch Acquisition functions. # if it is set to a value above 1 sequential processing will stop in acquisition function and batch processing will start # note that for batch processing tensor shape will change and will require modification of code. # TODO: Add code to handle batch batch_size = 1 +max_levels = 16 # Acquisition function options -# [ExpectedImprovement, UpperConfidenceBound, qExpectedImprovement] +# [ExpectedImprovement, UpperConfidenceBound, qExpectedImprovement] acquisition_function = "ExpectedImprovement" beta_value = 0.3 +# model_type can take values - "Classic", "QHybrid", "YZHybrid", "KHybrid" +model_type = "YZHybrid" + +[job.BayesianOptimization.database] +# This will take value 0 and 1 where 1 means write each cost and run details into the MySqlLite database +# and 0 means run details are not stored in the database +write_to_db = 1 +# by default the databases directory will be created inside the data director. To change this, you need to change ["io"]["data_dir"] +db_path = "databases" +# This must be a .db file for code to function. It will create a sqllite database +db_name = "db_cost.db" [job.BayesianOptimization.system] E = 1024 diff --git a/endure/lsm/solver/qlsm_solver.py b/endure/lsm/solver/qlsm_solver.py index 7237245..3641179 100644 --- a/endure/lsm/solver/qlsm_solver.py +++ b/endure/lsm/solver/qlsm_solver.py @@ -43,8 +43,8 @@ def nominal_objective( q: float, w: float, ) -> float: - h, t, q = x - design = LSMDesign(h=h, T=t, Q=q, policy=Policy.QFixed) + h, t, q_val = x + design = LSMDesign(h=h, T=t, Q=q_val, policy=Policy.QFixed) cost = self.cf.calc_cost(design, system, z0, z1, q, w) return cost @@ -80,6 +80,7 @@ def get_nominal_design( "method": "SLSQP", "bounds": get_bounds( config=self.config, + policy=Policy.QFixed, system=system, robust=False, ), diff --git a/endure/util/db_log.py b/endure/util/db_log.py index 7317a4e..16eb652 100644 --- a/endure/util/db_log.py +++ b/endure/util/db_log.py @@ -12,13 +12,15 @@ def initialize_database(db_path='cost_log.db'): non_empty_reads REAL, range_queries REAL, writes REAL, - max_bits_per_element REAL, physical_entries_per_page INT, range_selectivity REAL, entries_per_page INT, total_elements INT, - read_write_asymmetry REAL + read_write_asymmetry REAL, + iterations INT, + sample_size INT, + acquisition_function TEXT );''') cursor.execute(''' CREATE TABLE IF NOT EXISTS design_costs ( @@ -27,6 +29,9 @@ def initialize_database(db_path='cost_log.db'): bits_per_element REAL, size_ratio INTEGER, policy INTEGER, + Q INTEGER, + Y INTEGER, + Z INTEGER, cost REAL, FOREIGN KEY (run_id) REFERENCES runs(run_id) );''') @@ -34,12 +39,15 @@ def initialize_database(db_path='cost_log.db'): return connector -def log_new_run(connector, system, workload): +def log_new_run(connector, system, workload, iterations, sample, acqf): cursor = connector.cursor() cursor.execute('INSERT INTO runs (empty_reads, non_empty_reads, range_queries, writes, ' 'max_bits_per_element, physical_entries_per_page, range_selectivity, ' - 'entries_per_page, total_elements, read_write_asymmetry) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', - (workload.z0, workload.z1, workload.q, workload.w, system.H, system.E, system.s, system.B, system.N, system.phi)) + 'entries_per_page, total_elements, read_write_asymmetry, iterations, sample_size, ' + 'acquisition_function) ' + 'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', + (workload.z0, workload.z1, workload.q, workload.w, system.H, system.E, system.s, system.B, + system.N, system.phi, iterations, sample, acqf)) connector.commit() return cursor.lastrowid @@ -48,8 +56,9 @@ def log_design_cost(connector, run_id, design, cost): cursor = connector.cursor() policy_value = design.policy.value if hasattr(design.policy, 'value') else design.policy - cursor.execute('INSERT INTO design_costs (run_id, bits_per_element, size_ratio, policy, cost) ' - 'VALUES (?, ?, ?, ?, ?)',(run_id, design.h, design.T, policy_value, cost)) + cursor.execute('INSERT INTO design_costs (run_id, bits_per_element, size_ratio, policy, Q, Y, Z, cost) ' + 'VALUES (?, ?, ?, ?, ?, ?, ?, ?)', (run_id, design.h, design.T, policy_value, design.Q, design.Y, + design.Z, cost)) connector.commit() diff --git a/jobs/bayesian_pipeline.py b/jobs/bayesian_pipeline.py index 2e79bf6..9fb17a9 100644 --- a/jobs/bayesian_pipeline.py +++ b/jobs/bayesian_pipeline.py @@ -4,6 +4,7 @@ import logging import csv import os +import time from botorch.models import MixedSingleTaskGP from botorch.fit import fit_gpytorch_model @@ -16,16 +17,21 @@ from endure.lsm.cost import EndureCost from endure.data.io import Reader from endure.lsm.types import LSMDesign, System, Policy, Workload -from endure.lcm.data.generator import LCMDataGenerator +from endure.lcm.data.generator import ClassicGenerator, QCostGenerator, YZCostGenerator, KHybridGenerator from endure.lsm.solver.classic_solver import ClassicSolver +from endure.lsm.solver.qlsm_solver import QLSMSolver +from endure.lsm.solver.yzlsm_solver import YZLSMSolver +from endure.lsm.solver.klsm_solver import KLSMSolver from endure.util.db_log import initialize_database, log_new_run, log_design_cost class BayesianPipeline: def __init__(self, conf: dict) -> None: + self.end_time = None self.config: dict = conf self.bayesian_setting: dict = self.config["job"]["BayesianOptimization"] - self.cf: EndureCost = EndureCost(self.bayesian_setting["max_levels"]) + max_levels = self.config['job']['BayesianOptimization']['max_levels'] + self.cf: EndureCost = EndureCost(max_levels) self.log: logging.Logger = logging.getLogger(self.config["log"]["name"]) self.system: System = System(**self.bayesian_setting["system"]) @@ -45,19 +51,22 @@ def __init__(self, conf: dict) -> None: self.beta_value: float = self.bayesian_setting["beta_value"] self.conn = None self.run_id: int = None - self.write_to_db = self.bayesian_setting["write_to_db"] + self.write_to_db = self.bayesian_setting["database"]["write_to_db"] self.output_dir = os.path.join( - self.config["io"]["data_dir"], self.bayesian_setting["db_path"] + self.config["io"]["data_dir"], self.bayesian_setting["database"]["db_path"] ) - self.db_path = os.path.join(self.output_dir, self.bayesian_setting["db_name"]) + self.db_path = os.path.join(self.output_dir, self.bayesian_setting["database"]["db_name"]) + self.model_type = self.bayesian_setting['model_type'] def run(self, system: Optional[System] = None, z0: Optional[float] = None, z1: Optional[float] = None, q: Optional[float] = None, w: Optional[float] = None, num_iterations: Optional[int] = None, sample_size: Optional[int] = None, acqf: Optional[str] = None) -> Tuple[Optional[LSMDesign], Optional[float]]: + start_time = time.time() os.makedirs(self.output_dir, exist_ok=True) self.conn = initialize_database(self.db_path) system = system if system is not None else self.system + # print(system.E, "E - system") sample_size = sample_size if sample_size is not None else self.initial_samples z0 = z0 if z0 is not None else self.workload.z0 z1 = z1 if z1 is not None else self.workload.z1 @@ -65,21 +74,33 @@ def run(self, system: Optional[System] = None, z0: Optional[float] = None, z1: O w = w if w is not None else self.workload.w w = w if w is not None else self.workload.w acqf = acqf if acqf is not None else self.acquisition_function - workload = Workload(z0, z1, q, w) - print("path", self.db_path) - self.run_id = log_new_run(self.conn, system, workload) iterations = num_iterations if num_iterations is not None else self.num_iterations + workload = Workload(z0, z1, q, w) + self.run_id = log_new_run(self.conn, system, workload, iterations, sample_size, acqf) train_x, train_y, best_y = self._generate_initial_data(z0, z1, q, w, system, sample_size) bounds = self.generate_initial_bounds(system) best_designs = [] - for i in range(iterations): new_candidates = self.get_next_points(train_x, train_y, best_y, bounds, acqf, 1) for cand in new_candidates: - h, size_ratio, policy_val = cand[0].item(), cand[1].item(), cand[2].item() - policy = Policy.Leveling if policy_val < 0.5 else Policy.Tiering - new_designs = [LSMDesign(h, np.ceil(size_ratio), policy)] + h = cand[0].item() + if h == system.H: + h = h - 0.01 + if self.model_type == "QHybrid": + size_ratio, q_val = cand[1].item(), cand[2].item() + policy = Policy.QFixed + new_designs = [LSMDesign(h=h, T=np.ceil(size_ratio), policy=policy, Q=int(q_val))] + elif self.model_type == "YZHybrid": + size_ratio, y_val, z_val = cand[1].item(), cand[2].item(), cand[3].item() + policy = Policy.YZHybrid + new_designs = [LSMDesign(h=h, T=np.ceil(size_ratio), policy=policy, Y=int(y_val), Z=int(z_val))] + # TODO: Add KHybrid here + else: + size_ratio, policy_val = cand[1].item(), cand[2].item() + policy = Policy.Leveling if policy_val < 0.5 else Policy.Tiering + new_designs = [LSMDesign(h, np.ceil(size_ratio), policy)] + # This will solve the division by 0 error for design in new_designs: try: self.cf.calc_cost(design, system, z0, z1, q, w) @@ -102,12 +123,15 @@ def run(self, system: Optional[System] = None, z0: Optional[float] = None, z1: O self.log.debug(f"Iteration {i + 1}/{iterations} complete") self.log.debug("Bayesian Optimization completed") self._print_best_designs(best_designs) - self._find_analytical_results(system, z0, z1, q, w) + self._find_analytical_results(system, z0, z1, q, w) # Uncomment this if running this file through endure.py sorted_designs = sorted(best_designs, key=lambda x: x[1]) self.conn.close() + end_time = time.time() + elapsed_time = end_time - start_time + print("elapsed time", elapsed_time) if sorted_designs: best_design, best_cost = sorted_designs[0] - return best_design, best_cost + return best_design, best_cost, elapsed_time else: return None, None @@ -116,13 +140,28 @@ def generate_initial_bounds(self, system: System) -> torch.Tensor: t_bounds = torch.tensor([int(self.bayesian_setting["bounds"]["T_min"]), int(self.bayesian_setting["bounds"]["T_max"])]) policy_bounds = torch.tensor([0, 1]) - bounds = torch.stack([h_bounds, t_bounds, policy_bounds], dim=-1) + if self.model_type == "QHybrid": + q_bounds = torch.tensor([1, self.bayesian_setting["bounds"]["T_max"]-1]) + bounds = torch.stack([h_bounds, t_bounds, q_bounds], dim=-1) + elif self.model_type == "YZHybrid": + y_bounds = torch.tensor([1, self.bayesian_setting["bounds"]["T_max"]-1]) + z_bounds = torch.tensor([1, self.bayesian_setting["bounds"]["T_max"]-1]) + bounds = torch.stack([h_bounds, t_bounds, y_bounds, z_bounds], dim=-1) + # elif self.model_type == "KHybrid": # TODO add support for KHybrid model + else: + bounds = torch.stack([h_bounds, t_bounds, policy_bounds], dim=-1) return bounds def get_next_points(self, x: torch.Tensor, y: torch.Tensor, best_y: float, bounds: torch.Tensor, acquisition_function: str = "ExpectedImprovement", n_points: int = 1) -> torch.Tensor: - single_model = MixedSingleTaskGP(x, y, cat_dims=[1, 2], input_transform=Normalize(d=x.shape[1], bounds=bounds), - outcome_transform=Standardize(m=1)) + if self.model_type == "QHybrid" or self.model_type == "Classic": + single_model = MixedSingleTaskGP(x, y, cat_dims=[1, 2], input_transform=Normalize(d=x.shape[1], + bounds=bounds), + outcome_transform=Standardize(m=1)) + elif self.model_type == "YZHybrid": + single_model = MixedSingleTaskGP(x, y, cat_dims=[1, 2, 3], input_transform=Normalize(d=x.shape[1], + bounds=bounds), + outcome_transform=Standardize(m=1)) mll = ExactMarginalLogLikelihood(single_model.likelihood, single_model) fit_gpytorch_model(mll) if acquisition_function == "ExpectedImprovement": @@ -134,11 +173,23 @@ def get_next_points(self, x: torch.Tensor, y: torch.Tensor, best_y: float, bound acqf = qExpectedImprovement(model=single_model, best_f=best_y) else: raise ValueError(f"Unknown acquisition function: {acquisition_function}") + t_bounds = bounds[:, 1] + lower_t_bound = int(np.floor(t_bounds[0].item())) + upper_t_bound = int(np.ceil(t_bounds[1].item())) fixed_features_list = [] - for size_ratio in range(2, 33): - for pol in range(2): - fixed_features_list.append({1: size_ratio, 2: pol}) - + if self.model_type == "Classic": + for size_ratio in range(lower_t_bound, upper_t_bound): + for pol in range(2): + fixed_features_list.append({1: size_ratio, 2: pol}) + elif self.model_type == "QHybrid": + for size_ratio in range(lower_t_bound, upper_t_bound): + for q in range(1, size_ratio-1): + fixed_features_list.append({1: size_ratio, 2: q}) + elif self.model_type == "YZHybrid": + for size_ratio in range(lower_t_bound, upper_t_bound, 2): + for y in range(1, size_ratio-1): + for z in range(1, size_ratio-1): + fixed_features_list.append({1: size_ratio, 2: y, 3: z}) candidates, _ = optimize_acqf_mixed( acq_function=acqf, bounds=bounds, @@ -153,18 +204,33 @@ def _generate_initial_data(self, z0, z1, q, w, system: System, n: int = 30, run_ Tuple[torch.Tensor, torch.Tensor]: train_x = [] train_y = [] - policy = 0 run_id = run_id if run_id is not None else self.run_id - lcm_data_generator = LCMDataGenerator() + if self.model_type == "QHybrid": + generator = QCostGenerator() + elif self.model_type == "YZHybrid": + generator = YZCostGenerator() + elif self.model_type == "KHybrid": + generator = KHybridGenerator() + else: + generator = ClassicGenerator() for _ in range(n): - design = lcm_data_generator._sample_design(system) - if design.policy == Policy.Leveling: - policy = 0 - elif design.policy == Policy.Tiering: - policy = 1 - x_values = np.array([design.h, int(design.T), int(policy)]) + design = generator._sample_design(system) + if self.model_type == "Classic": + if design.policy == Policy.Leveling: + policy = 0 + elif design.policy == Policy.Tiering: + policy = 1 + x_values = np.array([design.h, design.T, policy]) + # log_design = LSMDesign(design.h, policy="", design.T, policy) + elif self.model_type == "QHybrid": + x_values = np.array([design.h, design.T, design.Q]) + # log_design = LSMDesign(design.h, design.T, policy=Policy.QFixed, Q=design.Q) + elif self.model_type == "YZHybrid": + x_values = np.array([design.h, design.T, design.Y, design.Z]) + # log_design = LSMDesign(design.h, design.T, design.Y, design.Z) + # TODO: add logic for KHybrid cost = self.cf.calc_cost(design, system, z0, z1, q, w) - log_design_cost(self.conn, run_id, LSMDesign(design.h, design.T, policy), cost) + log_design_cost(self.conn, run_id, design, cost) train_x.append(x_values) train_y.append(cost) train_x = np.array(train_x) @@ -173,52 +239,44 @@ def _generate_initial_data(self, z0, z1, q, w, system: System, n: int = 30, run_ best_y = train_y.min().item() return train_x, train_y, best_y - def _scale_and_standardize(self, train_x: torch.Tensor, train_y: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - scaled_train_x = self._min_max_scale(train_x) - standardized_train_y = self._standardize_mean_std(train_y) - return scaled_train_x, standardized_train_y - - def _min_max_scale(self, x: torch.Tensor, bounds) -> torch.Tensor: - continuous_data = x[:, :2] - categorical_data = x[:, 2:] - scaled_continuous_data = (continuous_data - bounds[:, :2][0]) / (bounds[:, :2][1] - bounds[:, :2][0]) - scaled_data = torch.cat([scaled_continuous_data, categorical_data], dim=-1) - return scaled_data - - def _standardize_mean_std(self, x: torch.Tensor) -> torch.Tensor: - stddim = -1 if x.dim() < 2 else -2 - x_std = x.std(dim=stddim, keepdim=True) - x_std = x_std.where(x_std >= 1e-9, torch.full_like(x_std, 1.0)) - return (x - x.mean(dim=stddim, keepdim=True)) / x_std - - def _initialize_model(self, train_x: torch.Tensor, train_y: torch.Tensor, state_dict: dict = None) \ - -> Tuple[MixedSingleTaskGP, ExactMarginalLogLikelihood]: - print("Initial train_x", train_x) - gp_model = MixedSingleTaskGP(train_x, train_y, cat_dims=[-1]) - if state_dict is not None: - gp_model.load_state_dict(state_dict) - - mll = ExactMarginalLogLikelihood(gp_model.likelihood, gp_model) - return mll, gp_model - - def _update_best_designs(self, best_designs: List[Tuple[LSMDesign, float]], new_x: torch.Tensor, new_y: torch.Tensor) -> List[Tuple[LSMDesign, float]]: + def _update_best_designs(self, best_designs: List[Tuple[LSMDesign, float]], new_x: torch.Tensor, + new_y: torch.Tensor) -> List[Tuple[LSMDesign, float]]: for x, y in zip(new_x, new_y): - h, size_ratio, policy_continuous = x[0], x[1], x[2] - policy = Policy.Leveling if policy_continuous < 0.5 else Policy.Tiering - best_designs.append((LSMDesign(h.item(), np.ceil(size_ratio.item()), policy), y.item())) + if self.model_type == "QHybrid": + h, size_ratio, qvalue = x[0], x[1], x[2] + best_designs.append((LSMDesign(h.item(), np.ceil(size_ratio.item()), qvalue.item()), y.item())) + elif self.model_type == "YZHybrid": + h, size_ratio, yvalue, zvalue = x[0], x[1], x[2], x[3] + best_designs.append( + (LSMDesign(h.item(), np.ceil(size_ratio.item()), yvalue.item(), zvalue.item()), y.item())) + # TODO: code for KHybrid to be added + else: + h, size_ratio, policy = x[0], x[1], x[2] + # policy = Policy.Leveling if policy_continuous < 0.5 else Policy.Tiering + best_designs.append((LSMDesign(h.item(), np.ceil(size_ratio.item()), policy.item()), y.item())) return best_designs def _print_best_designs(self, best_designs: List[Tuple[LSMDesign, float]]) -> None: sorted_designs = sorted(best_designs, key=lambda x: x[1]) print("Best Design Found:") - for design, cost in sorted_designs[:1]: - print(f"Design: h={design.h}, T={design.T}, Policy={design.policy}, Cost={cost}") - with open('best_designs.txt', 'w') as file: - file.write("All Best Designs Found:\n") - for design, cost in best_designs: - file.write(f"Design: h={design.h}, T={design.T}, Policy={design.policy}, Cost={cost}\n") + if self.model_type == "Classic": + for design, cost in sorted_designs[:1]: + print(f"Design: h={design.h}, T={design.T}, Policy={design.policy}, Cost={cost}") + with open('best_designs.txt', 'w') as file: + file.write("All Best Designs Found:\n") + for design, cost in best_designs: + file.write(f"Design: h={design.h}, T={design.T}, Policy={design.policy}, Cost={cost}\n") + if self.model_type == "QHybrid": + for design, cost in sorted_designs[:1]: + print(f"Design: h={design.h}, T={design.T}, Q={design.Q}, Cost={cost}") + with open('best_designs.txt', 'w') as file: + file.write("All Best Designs Found:\n") + for design, cost in best_designs: + file.write(f"Design: h={design.h}, T={design.T}, Q={design.Q}, Cost={cost}\n") - def _write_to_csv(self, best_designs: List[Tuple[LSMDesign, float]], system: Optional[System] = None, filename: str = 'best_designs.csv') -> None: + + def _write_to_csv(self, best_designs: List[Tuple[LSMDesign, float]], system: Optional[System] = None, + filename: str = 'best_designs.csv') -> None: sorted_designs = sorted(best_designs, key=lambda x: x[1])[:1] with open(filename, mode='w', newline='') as file: writer = csv.writer(file) @@ -235,16 +293,28 @@ def _write_to_csv(self, best_designs: List[Tuple[LSMDesign, float]], system: Opt def _find_analytical_results(self, system: System, z0: float, z1: float, q: float, w: float, conf: Optional[dict] = None) -> Tuple[LSMDesign, float]: conf = conf if conf is not None else self.config - solver = ClassicSolver(conf) + if self.model_type == "Classic": + solver = ClassicSolver(conf) + elif self.model_type == "QHybrid": + solver = QLSMSolver(conf) + elif self.model_type == "YZHybrid": + solver = YZLSMSolver(conf) nominal_design, nominal_solution = solver.get_nominal_design(system, z0, z1, q, w) - x = np.array([[nominal_design.h, nominal_design.T]]) - train_x = torch.tensor(x) - policy = nominal_design.policy - cost = solver.nominal_objective(x[0], policy, system, z0, z1, q, w) - train_y = torch.tensor(cost, dtype=torch.float64).unsqueeze(-1) + + # train_x = torch.tensor(x) + if self.model_type == "Classic": + x = np.array([[nominal_design.h, nominal_design.T]]) + policy = nominal_design.policy + cost = solver.nominal_objective(x[0], policy, system, z0, z1, q, w) + elif self.model_type == "QHybrid": + x = np.array([[nominal_design.h, nominal_design.T, nominal_design.Q]]) + cost = solver.nominal_objective(x[0], system, z0, z1, q, w) + elif self.model_type == "YZHybrid": + x = np.array([[nominal_design.h, nominal_design.T, nominal_design.Y, nominal_design.Z]]) + cost = solver.nominal_objective(x[0], system, z0, z1, q, w) + # train_y = torch.tensor(cost, dtype=torch.float64).unsqueeze(-1) print("Cost for the nominal design using analytical solver: ", cost) print("Nominal Design suggested by analytical solver: ", nominal_design) - return nominal_design, cost @@ -255,4 +325,4 @@ def _find_analytical_results(self, system: System, z0: float, z1: float, q: floa log.info("Initializing Bayesian Optimization Job") bayesian_optimizer = BayesianPipeline(config) - bayesian_optimizer.run() \ No newline at end of file + bayesian_optimizer.run() diff --git a/jobs/bo_job_runs.py b/jobs/bo_job_runs.py new file mode 100644 index 0000000..dbbe456 --- /dev/null +++ b/jobs/bo_job_runs.py @@ -0,0 +1,80 @@ +import sys +import os +import csv +import toml +import numpy as np +import torch + +sys.path.append(os.path.join(sys.path[0], '../')) + +from endure.lcm.data.generator import LCMDataGenerator +from endure.data.io import Reader +from jobs.bayesian_pipeline import BayesianPipeline +from endure.lsm.solver.classic_solver import ClassicSolver +from endure.lsm.cost import EndureCost + + +def to_cuda(obj, seen=None): + """Recursively move tensors to CUDA if available, avoiding infinite recursion.""" + if torch.cuda.is_available(): + device = torch.device("cuda") + if seen is None: + seen = set() + + obj_id = id(obj) + if obj_id in seen: + return + seen.add(obj_id) + + for attr_name in dir(obj): + if attr_name.startswith('__'): + continue + + try: + attr_value = getattr(obj, attr_name) + if isinstance(attr_value, torch.Tensor): + setattr(obj, attr_name, attr_value.to(device)) + elif hasattr(attr_value, '__dict__') or isinstance(attr_value, (list, dict)): + to_cuda(attr_value, seen) + except Exception as e: + pass + else: + print("CUDA not available") + + +def compare_designs(n_runs=3, csv_filename='design_comparison.csv'): + """Compare Bayesian and analytical designs.""" + with open(csv_filename, mode='w', newline='') as file: + writer = csv.writer(file) + writer.writerow(['Entries per page(E)', 'Physical Entries per page(B)', 'Selectivity(s)', + 'Max bits per element(H)', 'Total elements (N)', 'Empty Reads', 'Non-Empty Reads', + 'Range Queries', 'Writes', 'BO Design', 'Analytical Design', 'BO Cost', + 'Analytical Cost', 'Diff(Analytical-Bayesian)', "Elapsed Time"]) + + for i in range(n_runs): + print(f"Iteration {i + 1}/{n_runs} running") + system = generator._sample_system() + z0, z1, q, w = generator._sample_workload(4) + print(np.floor(system.H)) + bo_design, bo_cost, time = bayesian_optimizer.run(system, z0, z1, q, w) + analytical_design, analytical_cost = bayesian_optimizer._find_analytical_results(system, z0, z1, q, w) + writer.writerow([system.E, system.B, system.s, system.H, system.N, z0, z1, q, w, + bo_design, analytical_design, bo_cost, analytical_cost, analytical_cost - bo_cost, time]) + + +if __name__ == "__main__": + file_dir = os.path.dirname(__file__) + config_path = os.path.join(file_dir, "../endure.toml") + with open(config_path) as fid: + config = toml.load(fid) + bayesian_optimizer = BayesianPipeline(config) + generator = LCMDataGenerator() + solver = ClassicSolver(config) + cf = EndureCost(config) + + to_cuda(bayesian_optimizer) + to_cuda(generator) + to_cuda(solver) + to_cuda(cf) + + compare_designs() \ No newline at end of file