From bd586a7bd013b5fcb676470ab941115801fbd2f6 Mon Sep 17 00:00:00 2001 From: anweshasaha Date: Mon, 12 Feb 2024 00:52:31 -0500 Subject: [PATCH 1/2] added mixed acqf support --- endure.toml | 28 +-- endure/util/db_log.py | 59 ++++++ jobs/bayesian_pipeline.py | 372 ++++++++++++++++++++------------------ requirements_botorch.txt | 145 +++++++++++++++ 4 files changed, 417 insertions(+), 187 deletions(-) create mode 100644 endure/util/db_log.py create mode 100644 requirements_botorch.txt diff --git a/endure.toml b/endure.toml index c16a9b4..7741f31 100644 --- a/endure.toml +++ b/endure.toml @@ -153,9 +153,10 @@ drop_last = true [job.BayesianOptimization] # ----------------------------------------------------------------------------- num_iterations = 15 -num_restarts = 20 -raw_samples = 100 -initial_samples = 50 +num_restarts = 200 +raw_samples = 512 +initial_samples = 30 +max_levels = 16 # This is the q value used in BoTorch Acquisition functions. # if it is set to a value above 1 sequential processing will stop in acquisition function and batch processing will start @@ -166,23 +167,24 @@ batch_size = 1 # Acquisition function options # [ExpectedImprovement, UpperConfidenceBound, qExpectedImprovement] acquisition_function = "ExpectedImprovement" +beta_value = 0.3 [job.BayesianOptimization.system] -E = 8192 -s = 4e-7 -B = 4 -N = 1000000000 -H = 10.0 +E = 1024 +s = 1.905581e-8 +B = 64.0 +N = 522365629 +H = 5.705814 phi = 1.0 [job.BayesianOptimization.workload] -z0 = 0.25 -z1 = 0.25 -q = 0.25 -w = 0.25 +z0 = 0.063 +z1 = 0.190 +q = 0.545 +w = 0.202 [job.BayesianOptimization.bounds] -h_min = 0.0 +h_min = 1.0 h_max = 10.0 T_min = 2.0 T_max = 31.0 diff --git a/endure/util/db_log.py b/endure/util/db_log.py new file mode 100644 index 0000000..7317a4e --- /dev/null +++ b/endure/util/db_log.py @@ -0,0 +1,59 @@ +import sqlite3 +from endure.data.io import Reader + + +def initialize_database(db_path='cost_log.db'): + connector = sqlite3.connect(db_path) + cursor = connector.cursor() + cursor.execute(''' + CREATE TABLE IF NOT EXISTS runs ( + run_id INTEGER PRIMARY KEY AUTOINCREMENT, + empty_reads REAL, + non_empty_reads REAL, + range_queries REAL, + writes REAL, + + max_bits_per_element REAL, + physical_entries_per_page INT, + range_selectivity REAL, + entries_per_page INT, + total_elements INT, + read_write_asymmetry REAL + );''') + cursor.execute(''' + CREATE TABLE IF NOT EXISTS design_costs ( + idx INTEGER PRIMARY KEY AUTOINCREMENT, + run_id INTEGER, + bits_per_element REAL, + size_ratio INTEGER, + policy INTEGER, + cost REAL, + FOREIGN KEY (run_id) REFERENCES runs(run_id) + );''') + connector.commit() + return connector + + +def log_new_run(connector, system, workload): + cursor = connector.cursor() + cursor.execute('INSERT INTO runs (empty_reads, non_empty_reads, range_queries, writes, ' + 'max_bits_per_element, physical_entries_per_page, range_selectivity, ' + 'entries_per_page, total_elements, read_write_asymmetry) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', + (workload.z0, workload.z1, workload.q, workload.w, system.H, system.E, system.s, system.B, system.N, system.phi)) + connector.commit() + return cursor.lastrowid + + +def log_design_cost(connector, run_id, design, cost): + cursor = connector.cursor() + + policy_value = design.policy.value if hasattr(design.policy, 'value') else design.policy + cursor.execute('INSERT INTO design_costs (run_id, bits_per_element, size_ratio, policy, cost) ' + 'VALUES (?, ?, ?, ?, ?)',(run_id, design.h, design.T, policy_value, cost)) + connector.commit() + + +if __name__ == "__main__": + config = Reader.read_config("endure.toml") + conn = initialize_database() + conn.close() diff --git a/jobs/bayesian_pipeline.py b/jobs/bayesian_pipeline.py index 9a04929..1d556d9 100644 --- a/jobs/bayesian_pipeline.py +++ b/jobs/bayesian_pipeline.py @@ -1,218 +1,242 @@ import torch import numpy as np -from typing import Any, List, Union, Optional, Tuple +from typing import List, Optional, Tuple import logging +import csv -from botorch.models import SingleTaskGP +from botorch.models import MixedSingleTaskGP from botorch.fit import fit_gpytorch_model from gpytorch.mlls import ExactMarginalLogLikelihood -from botorch.acquisition import ExpectedImprovement -from botorch.acquisition import UpperConfidenceBound +from botorch.acquisition import ExpectedImprovement, UpperConfidenceBound from botorch.acquisition.monte_carlo import qExpectedImprovement -from botorch.optim import optimize_acqf +from botorch.optim import optimize_acqf_mixed +from botorch.models.transforms import Normalize, Standardize from endure.lsm.cost import EndureCost from endure.data.io import Reader -from endure.lcm.data.generator import LCMDataGenerator from endure.lsm.types import LSMDesign, System, Policy, Workload +from endure.lcm.data.generator import LCMDataGenerator from endure.lsm.solver.classic_solver import ClassicSolver +from endure.util.db_log import initialize_database, log_new_run, log_design_cost class BayesianPipeline: - def __init__(self, config): - self.config = config - self.bayesian_setting = config["job"]["BayesianOptimization"] - self.cf = EndureCost(self.config["lsm"]["max_levels"]) - self.log = logging.getLogger(config["log"]["name"]) - self.lcm_data_generator = LCMDataGenerator(config) - - self.system = System(**self.bayesian_setting["system"]) - self.workload = Workload(**self.bayesian_setting["workload"]) - self.h_bounds = torch.tensor( - [ - self.bayesian_setting["bounds"]["h_min"], - self.bayesian_setting["bounds"]["h_max"], - ] - ) - self.T_bounds = torch.tensor( - [ - self.bayesian_setting["bounds"]["T_min"], - self.bayesian_setting["bounds"]["T_max"], - ] - ) - self.bounds = torch.stack([self.h_bounds, self.T_bounds]) - self.initial_samples = self.bayesian_setting["initial_samples"] - self.acquisition_function = self.bayesian_setting["acquisition_function"] - self.q = self.bayesian_setting["batch_size"] - self.num_restarts = self.bayesian_setting["num_restarts"] - self.raw_samples = self.bayesian_setting["raw_samples"] - self.num_iterations = self.bayesian_setting["num_iterations"] - self.best_designs = [] - - # model initial generation where: - # train_x is the parameters we are optimizing - # train_y is the target that we are optimizing that is minimized or maximized - self.train_x, self.train_y = self._generate_initial_data( - self.system, self.initial_samples - ) - self.scaled_train_x, self.standardized_train_y = self._scale_and_standardize( - self.train_x, self.train_y - ) - self.mll, self.gp_model = self._initialize_model( - self.scaled_train_x, self.standardized_train_y - ) - - def run(self) -> None: - self.log.debug("Starting Bayesian Optimization") - for i in range(self.num_iterations): - new_x, new_y = self._optimize_acquisition_function( - self.acquisition_function, self.gp_model, self.train_y - ) - self.train_x = torch.cat([self.train_x, new_x]) - self.train_y = torch.cat([self.train_y.squeeze(), new_y]) - self.gp_model.set_train_data( - inputs=self.train_x, targets=self.train_y, strict=False - ) - fit_gpytorch_model(self.mll) - self._update_best_designs(new_x, new_y) - self.log.debug(f"Iteration {i+1}/{self.num_iterations} complete") - self._print_best_designs() - self._find_analytical_results() + def __init__(self, conf: dict) -> None: + self.config: dict = conf + self.bayesian_setting: dict = self.config["job"]["BayesianOptimization"] + self.cf: EndureCost = EndureCost(self.bayesian_setting["max_levels"]) + self.log: logging.Logger = logging.getLogger(self.config["log"]["name"]) + + self.system: System = System(**self.bayesian_setting["system"]) + self.workload: Workload = Workload(**self.bayesian_setting["workload"]) + self.h_bounds: torch.Tensor = torch.tensor([self.bayesian_setting["bounds"]["h_min"], + self.bayesian_setting["system"]["H"]]) + self.T_bounds: torch.Tensor = torch.tensor([self.bayesian_setting["bounds"]["T_min"], + self.bayesian_setting["bounds"]["T_max"]]) + self.policy_bounds: torch.Tensor = torch.tensor([0.0, 1.0]) + self.bounds: torch.Tensor = torch.stack([self.h_bounds, self.T_bounds, self.policy_bounds], dim=-1) + self.initial_samples: int = self.bayesian_setting["initial_samples"] + self.acquisition_function: str = self.bayesian_setting["acquisition_function"] + self.q: int = self.bayesian_setting["batch_size"] + self.num_restarts: int = self.bayesian_setting["num_restarts"] + self.raw_samples: int = self.bayesian_setting["raw_samples"] + self.num_iterations: int = self.bayesian_setting["num_iterations"] + self.beta_value: float = self.bayesian_setting["beta_value"] + self.conn = initialize_database() + self.run_id: int = None + + def run(self, system: Optional[System] = None, z0: Optional[float] = None, z1: Optional[float] = None, + q: Optional[float] = None, w: Optional[float] = None, num_iterations: Optional[int] = None, + sample_size: Optional[int] = None, acqf: Optional[str] = None) -> Tuple[Optional[LSMDesign], Optional[float]]: + + system = system if system is not None else self.system + sample_size = sample_size if sample_size is not None else self.initial_samples + z0 = z0 if z0 is not None else self.workload.z0 + z1 = z1 if z1 is not None else self.workload.z1 + q = q if q is not None else self.workload.q + w = w if w is not None else self.workload.w + acqf = acqf if acqf is not None else self.acquisition_function + workload = Workload(z0, z1, q, w) + self.run_id = log_new_run(self.conn, system, workload) + iterations = num_iterations if num_iterations is not None else self.num_iterations + train_x, train_y, best_y = self._generate_initial_data(z0, z1, q, w, system, sample_size) + bounds = self.generate_initial_bounds(system) + best_designs = [] + + for i in range(iterations): + new_candidates = self.get_next_points(train_x, train_y, best_y, bounds, acqf, 1) + for cand in new_candidates: + h, size_ratio, policy_val = cand[0].item(), cand[1].item(), cand[2].item() + policy = Policy.Leveling if policy_val < 0.5 else Policy.Tiering + new_designs = [LSMDesign(h, np.ceil(size_ratio), policy)] + + for design in new_designs: + try: + self.cf.calc_cost(design, system, z0, z1, q, w) + except ZeroDivisionError: + print(design, " - Design") + print(system, " - System") + print("Ratios: z0, z1, q, w: ", z0, z1, q, w) + raise + except Exception as e: + logging.exception(e) + costs = [self.cf.calc_cost(design, system, z0, z1, q, w) for design in new_designs] + + for design, cost in zip(new_designs, costs): + log_design_cost(self.conn, self.run_id, design, cost) + new_target = torch.tensor(costs).unsqueeze(-1) + train_x = torch.cat([train_x, new_candidates]) + train_y = torch.cat([train_y, new_target]) + best_y = train_y.min().item() + best_designs = self._update_best_designs(best_designs, new_candidates, new_target) + self.log.debug(f"Iteration {i + 1}/{iterations} complete") self.log.debug("Bayesian Optimization completed") + self._print_best_designs(best_designs) + self._find_analytical_results(system, z0, z1, q, w) + sorted_designs = sorted(best_designs, key=lambda x: x[1]) + self.conn.close() + if sorted_designs: + best_design, best_cost = sorted_designs[0] + return best_design, best_cost + else: + return None, None + + def generate_initial_bounds(self, system: System) -> torch.Tensor: + h_bounds = torch.tensor([self.bayesian_setting["bounds"]["h_min"], np.floor(system.H)]) + t_bounds = torch.tensor([int(self.bayesian_setting["bounds"]["T_min"]), + int(self.bayesian_setting["bounds"]["T_max"])]) + policy_bounds = torch.tensor([0, 1]) + bounds = torch.stack([h_bounds, t_bounds, policy_bounds], dim=-1) + return bounds + + def get_next_points(self, x: torch.Tensor, y: torch.Tensor, best_y: float, bounds: torch.Tensor, + acquisition_function: str = "ExpectedImprovement", n_points: int = 1) -> torch.Tensor: + single_model = MixedSingleTaskGP(x, y, cat_dims=[1, 2], input_transform=Normalize(d=x.shape[1], bounds=bounds), + outcome_transform=Standardize(m=1)) + mll = ExactMarginalLogLikelihood(single_model.likelihood, single_model) + fit_gpytorch_model(mll) + if acquisition_function == "ExpectedImprovement": + acqf = ExpectedImprovement(model=single_model, best_f=best_y, maximize=False) + elif acquisition_function == "UpperConfidenceBound": + beta = self.beta_value + acqf = UpperConfidenceBound(model=single_model, beta=beta, maximize=False) + elif acquisition_function == "qExpectedImprovement": + acqf = qExpectedImprovement(model=single_model, best_f=best_y) + else: + raise ValueError(f"Unknown acquisition function: {acquisition_function}") + fixed_features_list = [] + for size_ratio in range(2, 33): + for pol in range(2): + fixed_features_list.append({1: size_ratio, 2: pol}) + + candidates, _ = optimize_acqf_mixed( + acq_function=acqf, + bounds=bounds, + q=n_points, + num_restarts=self.num_restarts, + raw_samples=self.raw_samples, + fixed_features_list=fixed_features_list + ) + return candidates - def _generate_initial_data( - self, system: System, n: int = 30 - ) -> Tuple[torch.Tensor, torch.Tensor]: + def _generate_initial_data(self, z0, z1, q, w, system: System, n: int = 30, run_id=None) -> \ + Tuple[torch.Tensor, torch.Tensor]: train_x = [] train_y = [] + policy = 0 + run_id = run_id if run_id is not None else self.run_id + lcm_data_generator = LCMDataGenerator() for _ in range(n): - design = self.lcm_data_generator._sample_design(system) - x_values = np.array([design.h, design.T]) - # cost is negated here - cost = -self.cf.calc_cost( - design, - system, - self.workload.z0, - self.workload.z1, - self.workload.q, - self.workload.w, - ) + design = lcm_data_generator._sample_design(system) + if design.policy == Policy.Leveling: + policy = 0 + elif design.policy == Policy.Tiering: + policy = 1 + x_values = np.array([design.h, int(design.T), int(policy)]) + cost = self.cf.calc_cost(design, system, z0, z1, q, w) + log_design_cost(self.conn, run_id, LSMDesign(design.h, design.T, policy), cost) train_x.append(x_values) train_y.append(cost) train_x = np.array(train_x) - train_x = torch.tensor(train_x, dtype=torch.float64) + train_x = torch.tensor(train_x) train_y = torch.tensor(train_y, dtype=torch.float64).unsqueeze(-1) - return train_x, train_y + best_y = train_y.min().item() + return train_x, train_y, best_y - def _scale_and_standardize( - self, train_x: torch.Tensor, train_y: torch.Tensor - ) -> Tuple[torch.Tensor, torch.Tensor]: + def _scale_and_standardize(self, train_x: torch.Tensor, train_y: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: scaled_train_x = self._min_max_scale(train_x) standardized_train_y = self._standardize_mean_std(train_y) - return scaled_train_x, standardized_train_y - def _min_max_scale(self, x: torch.Tensor) -> torch.Tensor: - x_min = x.min(0, keepdim=True)[0] - x_max = x.max(0, keepdim=True)[0] - scaled_x = (x - x_min) / (x_max - x_min) - return scaled_x + def _min_max_scale(self, x: torch.Tensor, bounds) -> torch.Tensor: + continuous_data = x[:, :2] + categorical_data = x[:, 2:] + scaled_continuous_data = (continuous_data - bounds[:, :2][0]) / (bounds[:, :2][1] - bounds[:, :2][0]) + scaled_data = torch.cat([scaled_continuous_data, categorical_data], dim=-1) + return scaled_data def _standardize_mean_std(self, x: torch.Tensor) -> torch.Tensor: - x_mean = x.mean() - x_std = x.std() - standardized_x = (x - x_mean) / x_std - return standardized_x - - def _initialize_model( - self, train_x: torch.Tensor, train_y: torch.Tensor, state_dict: dict = None - ) -> Tuple[SingleTaskGP, ExactMarginalLogLikelihood]: - gp_model = SingleTaskGP(train_x, train_y) + stddim = -1 if x.dim() < 2 else -2 + x_std = x.std(dim=stddim, keepdim=True) + x_std = x_std.where(x_std >= 1e-9, torch.full_like(x_std, 1.0)) + return (x - x.mean(dim=stddim, keepdim=True)) / x_std + + def _initialize_model(self, train_x: torch.Tensor, train_y: torch.Tensor, state_dict: dict = None) \ + -> Tuple[MixedSingleTaskGP, ExactMarginalLogLikelihood]: + print("Initial train_x", train_x) + gp_model = MixedSingleTaskGP(train_x, train_y, cat_dims=[-1]) if state_dict is not None: gp_model.load_state_dict(state_dict) mll = ExactMarginalLogLikelihood(gp_model.likelihood, gp_model) return mll, gp_model - def _optimize_acquisition_function( - self, acquisition_function: str, model: SingleTaskGP, target: torch.Tensor - ) -> Tuple[torch.Tensor, torch.Tensor]: - if acquisition_function == "ExpectedImprovement": - acqf = ExpectedImprovement(model=model, best_f=target.min()) - elif acquisition_function == "UpperConfidenceBound": - beta = 10.0 - acqf = UpperConfidenceBound(model=model, beta=beta) - elif acquisition_function == "qExpectedImprovement": - acqf = qExpectedImprovement(model=model, best_f=target.min()) - else: - raise ValueError(f"Unknown acquisition function: {acquisition_function}") - new_x, _ = optimize_acqf( - acq_function=acqf, - bounds=self.bounds, - q=self.q, - num_restarts=self.num_restarts, - raw_samples=self.raw_samples, - ) - new_designs = [ - LSMDesign(x[0].item(), np.ceil(x[1].item()), Policy.Leveling) for x in new_x - ] - # the new cost is also negated here - new_y = torch.tensor( - [ - -self.cf.calc_cost( - design, - self.system, - self.workload.z0, - self.workload.z1, - self.workload.q, - self.workload.w, - ) - for design in new_designs - ] - ) - return new_x, new_y - - def _update_best_designs(self, new_x: torch.Tensor, new_y: torch.Tensor) -> None: + def _update_best_designs(self, best_designs: List[Tuple[LSMDesign, float]], new_x: torch.Tensor, new_y: torch.Tensor) -> List[Tuple[LSMDesign, float]]: for x, y in zip(new_x, new_y): - self.best_designs.append( - ( - LSMDesign(x[0].item(), np.ceil(x[1].item()), Policy.Leveling), - y.item(), - ) - ) - - def _print_best_designs(self) -> None: - # self.best_designs.sort(key=lambda x: x[1]) - print("Best Designs Found:") - # for design, cost in self.best_designs[:5]: - for design, cost in self.best_designs: - print( - f"Design: h={design.h}, T={design.T}, Policy={design.policy}, Cost={cost}" - ) - - def _find_analytical_results(self): - solver = ClassicSolver(self.config) - nominal_design, nominal_solution = solver.get_nominal_design( - system=self.system, - z0=self.workload.z0, - z1=self.workload.z1, - q=self.workload.q, - w=self.workload.w, - ) - x = np.array([nominal_design.h, nominal_design.T]) + h, size_ratio, policy_continuous = x[0], x[1], x[2] + policy = Policy.Leveling if policy_continuous < 0.5 else Policy.Tiering + best_designs.append((LSMDesign(h.item(), np.ceil(size_ratio.item()), policy), y.item())) + return best_designs + + def _print_best_designs(self, best_designs: List[Tuple[LSMDesign, float]]) -> None: + sorted_designs = sorted(best_designs, key=lambda x: x[1]) + print("Best Design Found:") + for design, cost in sorted_designs[:1]: + print(f"Design: h={design.h}, T={design.T}, Policy={design.policy}, Cost={cost}") + with open('best_designs.txt', 'w') as file: + file.write("All Best Designs Found:\n") + for design, cost in best_designs: + file.write(f"Design: h={design.h}, T={design.T}, Policy={design.policy}, Cost={cost}\n") + + def _write_to_csv(self, best_designs: List[Tuple[LSMDesign, float]], system: Optional[System] = None, filename: str = 'best_designs.csv') -> None: + sorted_designs = sorted(best_designs, key=lambda x: x[1])[:1] + with open(filename, mode='w', newline='') as file: + writer = csv.writer(file) + writer.writerow(['Entries per page(E)', 'Range query selectivity(s)', 'Entries per page(B)', + 'Total elements(N)', 'max bits per element(H) ', 'bits per element(h)', + 'size ratio(T)', 'Policy', 'Cost']) + + for design, cost in sorted_designs: + system = system if system is not None else self.system + writer.writerow( + [system.E, round(system.s, 2), system.B, system.N, system.H, round(design.h, 2), design.T, + design.policy.name, round(cost, 2)]) + + def _find_analytical_results(self, system: System, z0: float, z1: float, q: float, w: float, + conf: Optional[dict] = None) -> Tuple[LSMDesign, float]: + conf = conf if conf is not None else self.config + solver = ClassicSolver(conf) + nominal_design, nominal_solution = solver.get_nominal_design(system, z0, z1, q, w) + x = np.array([[nominal_design.h, nominal_design.T]]) + train_x = torch.tensor(x) policy = nominal_design.policy - cost = solver.nominal_objective( - x, - policy, - self.system, - self.workload.z0, - self.workload.z1, - self.workload.q, - self.workload.w, - ) + cost = solver.nominal_objective(x[0], policy, system, z0, z1, q, w) + train_y = torch.tensor(cost, dtype=torch.float64).unsqueeze(-1) print("Cost for the nominal design using analytical solver: ", cost) print("Nominal Design suggested by analytical solver: ", nominal_design) + return nominal_design, cost + if __name__ == "__main__": config = Reader.read_config("endure.toml") @@ -221,4 +245,4 @@ def _find_analytical_results(self): log.info("Initializing Bayesian Optimization Job") bayesian_optimizer = BayesianPipeline(config) - bayesian_optimizer.run() + bayesian_optimizer.run() \ No newline at end of file diff --git a/requirements_botorch.txt b/requirements_botorch.txt new file mode 100644 index 0000000..cd5a4a0 --- /dev/null +++ b/requirements_botorch.txt @@ -0,0 +1,145 @@ +anyio==4.0.0 +appnope==0.1.3 +argon2-cffi==23.1.0 +argon2-cffi-bindings==21.2.0 +arrow==1.3.0 +asttokens==2.4.1 +async-lru==2.0.4 +attrs==23.1.0 +Babel==2.13.1 +beautifulsoup4==4.12.2 +bleach==6.1.0 +botorch==0.9.4 +certifi==2023.7.22 +cffi==1.16.0 +charset-normalizer==3.3.2 +comm==0.1.4 +contourpy==1.1.1 +cycler==0.12.1 +debugpy==1.8.0 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.7 +exceptiongroup==1.1.3 +executing==2.0.1 +fastjsonschema==2.18.1 +filelock==3.13.1 +fonttools==4.44.0 +fqdn==1.5.1 +fsspec==2023.10.0 +gpytorch==1.11 +idna==3.4 +importlib-metadata==6.8.0 +importlib-resources==6.1.0 +iniconfig==2.0.0 +ipykernel==6.26.0 +ipython==8.17.2 +ipython-genutils==0.2.0 +ipywidgets==8.1.1 +isoduration==20.11.0 +jaxtyping==0.2.24 +jedi==0.19.1 +Jinja2==3.1.2 +joblib==1.3.2 +json5==0.9.14 +jsonpointer==2.4 +jsonschema==4.19.2 +jsonschema-specifications==2023.7.1 +jupyter==1.0.0 +jupyter-console==6.6.3 +jupyter-events==0.8.0 +jupyter-lsp==2.2.0 +jupyter_client==8.5.0 +jupyter_core==5.5.0 +jupyter_server==2.9.1 +jupyter_server_terminals==0.4.4 +jupyterlab==4.0.8 +jupyterlab-pygments==0.2.2 +jupyterlab-widgets==3.0.9 +jupyterlab_server==2.25.0 +kiwisolver==1.4.5 +linear-operator==0.5.1 +llvmlite==0.41.1 +MarkupSafe==2.1.3 +matplotlib==3.8.1 +matplotlib-inline==0.1.6 +mistune==3.0.2 +mpmath==1.3.0 +multipledispatch==1.0.0 +nbclient==0.8.0 +nbconvert==7.10.0 +nbformat==5.9.2 +nest-asyncio==1.5.8 +networkx==3.2.1 +notebook==7.0.6 +notebook_shim==0.2.3 +numba==0.58.1 +numpy==1.26.1 +opt-einsum==3.3.0 +overrides==7.4.0 +packaging==23.2 +pandas==2.1.2 +pandas-stubs==2.1.1.230928 +pandocfilters==1.5.0 +parso==0.8.3 +pexpect==4.8.0 +Pillow==10.1.0 +platformdirs==3.11.0 +pluggy==1.3.0 +prometheus-client==0.18.0 +prompt-toolkit==3.0.39 +psutil==5.9.6 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==14.0.0 +pycparser==2.21 +Pygments==2.16.1 +pyparsing==3.1.1 +pyro-api==0.1.2 +pyro-ppl==1.8.6 +pytest==7.4.3 +python-dateutil==2.8.2 +python-json-logger==2.0.7 +pytz==2023.3.post1 +PyYAML==6.0.1 +pyzmq==25.1.1 +qtconsole==5.4.4 +QtPy==2.4.1 +referencing==0.30.2 +reinmax==0.1.0 +requests==2.31.0 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +rpds-py==0.10.6 +scikit-learn==1.3.2 +scipy==1.11.3 +seaborn==0.13.2 +Send2Trash==1.8.2 +six==1.16.0 +sniffio==1.3.0 +soupsieve==2.5 +stack-data==0.6.3 +sympy==1.12 +terminado==0.17.1 +threadpoolctl==3.2.0 +tinycss2==1.2.1 +toml==0.10.2 +tomli==2.0.1 +torch==2.1.0 +torchdata==0.7.0 +tornado==6.3.3 +tqdm==4.66.1 +traitlets==5.13.0 +typeguard==2.13.3 +types-python-dateutil==2.8.19.14 +types-pytz==2023.3.1.1 +typing_extensions==4.8.0 +tzdata==2023.3 +uri-template==1.3.0 +urllib3==2.0.7 +wcwidth==0.2.9 +webcolors==1.13 +webencodings==0.5.1 +websocket-client==1.6.4 +widgetsnbextension==4.0.9 +zipp==3.17.0 From 5ae6541556640b9362454b941db159630836de6a Mon Sep 17 00:00:00 2001 From: anweshasaha Date: Mon, 12 Feb 2024 18:42:08 -0500 Subject: [PATCH 2/2] added option for db name --- endure.toml | 10 +++++++++- jobs/bayesian_pipeline.py | 16 +++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/endure.toml b/endure.toml index 7741f31..498d92e 100644 --- a/endure.toml +++ b/endure.toml @@ -25,7 +25,7 @@ disable_tqdm = false # Generic IO settings for experiments, saving data, etc # ============================================================================= [io] -data_dir = "/data" +data_dir = "data" # ============================================================================= # HEADER JOB @@ -157,6 +157,14 @@ num_restarts = 200 raw_samples = 512 initial_samples = 30 max_levels = 16 +# This will take value 0 and 1 where 1 means write each cost and run details into the MySqlLite database +# and 0 means run details are not stored in the database +write_to_db = 1 +#by default the databases directory will be created inside the data director. To change this, you need to change ["io"]["data_dir"] +db_path = "databases" +# This must be a .db file for code to function. It will create a sqllite database +db_name = "db_cost.db" + # This is the q value used in BoTorch Acquisition functions. # if it is set to a value above 1 sequential processing will stop in acquisition function and batch processing will start diff --git a/jobs/bayesian_pipeline.py b/jobs/bayesian_pipeline.py index 1d556d9..2e79bf6 100644 --- a/jobs/bayesian_pipeline.py +++ b/jobs/bayesian_pipeline.py @@ -3,6 +3,7 @@ from typing import List, Optional, Tuple import logging import csv +import os from botorch.models import MixedSingleTaskGP from botorch.fit import fit_gpytorch_model @@ -42,21 +43,30 @@ def __init__(self, conf: dict) -> None: self.raw_samples: int = self.bayesian_setting["raw_samples"] self.num_iterations: int = self.bayesian_setting["num_iterations"] self.beta_value: float = self.bayesian_setting["beta_value"] - self.conn = initialize_database() + self.conn = None self.run_id: int = None + self.write_to_db = self.bayesian_setting["write_to_db"] + self.output_dir = os.path.join( + self.config["io"]["data_dir"], self.bayesian_setting["db_path"] + ) + self.db_path = os.path.join(self.output_dir, self.bayesian_setting["db_name"]) def run(self, system: Optional[System] = None, z0: Optional[float] = None, z1: Optional[float] = None, q: Optional[float] = None, w: Optional[float] = None, num_iterations: Optional[int] = None, - sample_size: Optional[int] = None, acqf: Optional[str] = None) -> Tuple[Optional[LSMDesign], Optional[float]]: - + sample_size: Optional[int] = None, acqf: Optional[str] = None) -> Tuple[Optional[LSMDesign], + Optional[float]]: + os.makedirs(self.output_dir, exist_ok=True) + self.conn = initialize_database(self.db_path) system = system if system is not None else self.system sample_size = sample_size if sample_size is not None else self.initial_samples z0 = z0 if z0 is not None else self.workload.z0 z1 = z1 if z1 is not None else self.workload.z1 q = q if q is not None else self.workload.q w = w if w is not None else self.workload.w + w = w if w is not None else self.workload.w acqf = acqf if acqf is not None else self.acquisition_function workload = Workload(z0, z1, q, w) + print("path", self.db_path) self.run_id = log_new_run(self.conn, system, workload) iterations = num_iterations if num_iterations is not None else self.num_iterations train_x, train_y, best_y = self._generate_initial_data(z0, z1, q, w, system, sample_size)