Skip to content

Commit

Permalink
Refactor cfg (#18)
Browse files Browse the repository at this point in the history
* refactor to separate cfg

* added job cfg

* fixed issues for multijob run
  • Loading branch information
aquaorifice authored Mar 5, 2024
1 parent 709752b commit 3d0ec44
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 51 deletions.
21 changes: 17 additions & 4 deletions endure.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,26 @@ def run(self):

jobs_list = self.config["job"]["to_run"]

# for job_name in jobs_list:
# job = jobs.get(job_name, None)
# if job is None:
# self.log.warn(f"No job associated with {job_name}")
# continue
# job = job(config)
# job.run()

for job_name in jobs_list:
job = jobs.get(job_name, None)
job = jobs.get(job_name)
if job is None:
self.log.warn(f"No job associated with {job_name}")
driver.log.warn(f"No job associated with {job_name}")
continue
job = job(config)
job.run()

conf_path = os.path.join("jobs", "infra", f"{job_name}.toml")
with open(conf_path) as jobfid:
job_config = toml.load(jobfid)

job_instance = job(job_config)
job_instance.run()

self.log.info("All jobs finished, exiting")

Expand Down
57 changes: 23 additions & 34 deletions jobs/bayesian_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import numpy as np
from typing import List, Optional, Tuple
import logging
import csv
import os
import time
from itertools import product
Expand All @@ -17,13 +16,13 @@

from endure.lsm.cost import EndureCost
from endure.data.io import Reader
from endure.lsm.types import LSMDesign, System, Policy, Workload, STR_POLICY_DICT
from endure.lsm.types import LSMDesign, System, Policy, Workload, LSMBounds, STR_POLICY_DICT
from endure.lcm.data.generator import ClassicGenerator, QCostGenerator, YZCostGenerator, KHybridGenerator
from endure.lsm.solver.classic_solver import ClassicSolver
from endure.lsm.solver.qlsm_solver import QLSMSolver
from endure.lsm.solver.yzlsm_solver import YZLSMSolver
from endure.lsm.solver.klsm_solver import KLSMSolver
from endure.util.db_log import initialize_database, log_new_run, log_design_cost, log_run_details
from jobs.infra.db_log import initialize_database, log_new_run, log_design_cost, log_run_details


def print_best_designs(best_designs: List[Tuple[LSMDesign, float]]) -> None:
Expand All @@ -49,18 +48,13 @@ def __init__(self, conf: dict) -> None:
self.start_time = None
self.config: dict = conf
self.bayesian_setting: dict = self.config["job"]["BayesianOptimization"]
self.max_levels = self.config['lsm']['max_levels']
self.bounds = LSMBounds()
self.max_levels = self.bounds.max_considered_levels
self.cf: EndureCost = EndureCost(self.max_levels)
self.log: logging.Logger = logging.getLogger(self.config["log"]["name"])
# self.log: logging.Logger = logging.getLogger(self.config["log"]["name"])

self.system: System = System(**self.bayesian_setting["system"])
self.workload: Workload = Workload(**self.bayesian_setting["workload"])
self.h_bounds: torch.Tensor = torch.tensor([self.bayesian_setting["bounds"]["h_min"],
self.bayesian_setting["system"]["H"]])
self.T_bounds: torch.Tensor = torch.tensor([self.bayesian_setting["bounds"]["T_min"],
self.bayesian_setting["bounds"]["T_max"]])
self.policy_bounds: torch.Tensor = torch.tensor([0.0, 1.0])
self.bounds: torch.Tensor = torch.stack([self.h_bounds, self.T_bounds, self.policy_bounds], dim=-1)
self.initial_samples: int = self.bayesian_setting["initial_samples"]
self.acquisition_function: str = self.bayesian_setting["acquisition_function"]
self.q: int = self.bayesian_setting["batch_size"]
Expand All @@ -72,57 +66,52 @@ def __init__(self, conf: dict) -> None:
self.run_id: int = 0
self.write_to_db = self.bayesian_setting["database"]["write_to_db"]
self.output_dir = os.path.join(
self.config["io"]["data_dir"], self.bayesian_setting["database"]["db_path"]
self.bayesian_setting["database"]["data_dir"], self.bayesian_setting["database"]["db_path"]
)
self.db_path = os.path.join(self.output_dir, self.bayesian_setting["database"]["db_name"])
model_type_str = self.bayesian_setting.get('model_type', 'Classic')
self.model_type = STR_POLICY_DICT.get(model_type_str, Policy.Classic)
self.num_k_values = self.bayesian_setting["num_k_values"]

def run(self, system: Optional[System] = None, z0: Optional[float] = None, z1: Optional[float] = None,
q: Optional[float] = None, w: Optional[float] = None, num_iterations: Optional[int] = None,
def run(self, system: Optional[System] = None, workload: Optional[Workload] = None, num_iterations: Optional[int] = None,
sample_size: Optional[int] = None, acqf: Optional[str] = None) -> Tuple[Optional[LSMDesign], Optional[float]]:
self.start_time = time.time()
self.initialize_environment(system, z0, z1, q, w, num_iterations, sample_size, acqf)
self.initialize_environment(system, workload, num_iterations, sample_size, acqf)
train_x, train_y, best_y = self._generate_initial_data(self.initial_samples)
best_designs = self.optimization_loop(train_x, train_y, best_y)
best_design, best_cost, elapsed_time = self.finalize_optimization(best_designs)
return best_design, best_cost

def initialize_environment(self, system: Optional[System], z0: Optional[float], z1: Optional[float],
q: Optional[float], w: Optional[float], num_iterations: Optional[int],
def initialize_environment(self, system: Optional[System], workload: Optional[Workload], num_iterations: Optional[int],
sample_size: Optional[int], acqf: Optional[str]):
os.makedirs(self.output_dir, exist_ok=True)
self.conn = initialize_database(self.db_path)
self.system = system if system is not None else self.system
self.initial_samples = sample_size if sample_size is not None else self.initial_samples
z0 = z0 if z0 is not None else self.workload.z0
z1 = z1 if z1 is not None else self.workload.z1
q = q if q is not None else self.workload.q
w = w if w is not None else self.workload.w
self.workload = Workload(z0, z1, q, w)
self.workload = workload if workload is not None else self.workload
self.acquisition_function = acqf if acqf is not None else self.acquisition_function
self.num_iterations = num_iterations if num_iterations is not None else self.num_iterations
self.run_id = log_new_run(self.conn, self.system, self.workload, self.num_iterations,
self.initial_samples, self.acquisition_function)

def generate_initial_bounds(self, system: System) -> torch.Tensor:
h_bounds = torch.tensor([self.bayesian_setting["bounds"]["h_min"], np.floor(system.H)])
t_bounds = torch.tensor([int(self.bayesian_setting["bounds"]["T_min"]),
int(self.bayesian_setting["bounds"]["T_max"])])
h_bounds = torch.tensor([self.bounds.bits_per_elem_range[0], min(np.floor(system.H)
, self.bounds.bits_per_elem_range[1])])
t_bounds = torch.tensor([self.bounds.size_ratio_range[0], self.bounds.size_ratio_range[1]])
policy_bounds = torch.tensor([0, 1])
if self.model_type == Policy.QFixed:
q_bounds = torch.tensor([1, self.bayesian_setting["bounds"]["T_max"] - 1])
q_bounds = torch.tensor([1, self.bounds.size_ratio_range[1] - 1])
bounds = torch.stack([h_bounds, t_bounds, q_bounds], dim=-1)
elif self.model_type == Policy.YZHybrid:
y_bounds = torch.tensor([1, self.bayesian_setting["bounds"]["T_max"] - 1])
z_bounds = torch.tensor([1, self.bayesian_setting["bounds"]["T_max"] - 1])
y_bounds = torch.tensor([1, self.bounds.size_ratio_range[1] - 1])
z_bounds = torch.tensor([1, self.bounds.size_ratio_range[1] - 1])
bounds = torch.stack([h_bounds, t_bounds, y_bounds, z_bounds], dim=-1)
elif self.model_type == Policy.KHybrid:
lower_limits = [self.bayesian_setting["bounds"]["h_min"], self.bayesian_setting["bounds"]["T_min"]] +\
lower_limits = [self.bounds.bits_per_elem_range[0], self.bounds.size_ratio_range[0]] +\
[1] * self.num_k_values
upper_limits = [np.floor(system.H), self.bayesian_setting["bounds"]["T_max"]] + \
[self.bayesian_setting["bounds"]["T_max"] - 1] * self.num_k_values
upper_limits = [min(np.floor(system.H), self.bounds.bits_per_elem_range[1]),
self.bounds.size_ratio_range[1]] + \
[self.bounds.size_ratio_range[1] - 1] * self.num_k_values
new_bounds_list = [lower_limits, upper_limits]
bounds = torch.tensor(new_bounds_list, dtype=torch.float64)
else:
Expand All @@ -139,8 +128,8 @@ def optimization_loop(self, train_x, train_y, best_y):
new_designs, costs = self.evaluate_new_candidates(new_candidates)
train_x, train_y, best_y, best_designs = self.update_training_data(train_x, train_y, new_candidates, costs,
best_designs)
self.log.debug(f"Iteration {i + 1}/{self.num_iterations} complete")
self.log.debug("Bayesian Optimization completed")
# self.log.debug(f"Iteration {i + 1}/{self.num_iterations} complete")
# self.log.debug("Bayesian Optimization completed")
return best_designs

def _initialize_feature_list(self, bounds):
Expand Down Expand Up @@ -290,7 +279,7 @@ def _generate_initial_data(self, n: int = 30) -> Tuple[torch.Tensor, torch.Tenso
elif self.model_type == Policy.YZHybrid:
generator = YZCostGenerator()
elif self.model_type == Policy.KHybrid:
generator = KHybridGenerator()
generator = KHybridGenerator(self.bounds)
else:
generator = ClassicGenerator()
for _ in range(n):
Expand Down
54 changes: 54 additions & 0 deletions jobs/infra/BayesianBaseline.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# -----------------------------------------------------------------------------
[job.BayesianOptimization]
# -----------------------------------------------------------------------------
num_iterations = 15
num_restarts = 2 # TODO set it to 2
# value of raw_samples determines how many initial random samples are taken from the search space before starting the optimization process
raw_samples = 3 # TODO set it to 3
initial_samples = 20
# for a true KLSM calculation - set num_k_values to the same value as max_levels. This is only consequential for the KLSM model
# This works in the following way:
# suppose num_k_values = 4 and max_levels = 20
# Then every layer till the 4th layer will have custom k values but the (num_k_values + 1) layer to the (max_levels) layer will only
# have a k value equal to 1
num_k_values = 4

# This is the q value used in BoTorch Acquisition functions.
# if it is set to a value above 1 sequential processing will stop in acquisition function and batch processing will start
# note that for batch processing tensor shape will change and will require modification of code.
# TODO: Add code to handle batch
batch_size = 1
# Acquisition function options
# [ExpectedImprovement, UpperConfidenceBound, qExpectedImprovement]
acquisition_function = "ExpectedImprovement"
beta_value = 0.3
# model_type can take values - "Classic", "QFixed", "YZHybrid", "KHybrid"
model_type = "KHybrid"
# determines how many workloads do we want to test using the bayesian pipeline
multi_jobs_number = 100
multi_job_file = "design_comparison.csv"

[job.BayesianOptimization.database]
data_dir = "databases"
# This will take value 0 and 1 where 1 means write each cost and run details into the MySqlLite database
# and 0 means run details are not stored in the database
write_to_db = 1
# by default the databases directory will be created inside the data director. To change this, you need to change ["io"]["data_dir"]
db_path = "yz_databases"
# This must be a .db file for code to function. It will create a sqllite database
db_name = "yz_db_cost.db"

[job.BayesianOptimization.system]
E = 1024
s = 1.905581e-8
B = 64.0
N = 522365629
H = 5.705814
phi = 1.0

[job.BayesianOptimization.workload]
z0 = 0.063
z1 = 0.190
q = 0.545
w = 0.202

23 changes: 10 additions & 13 deletions jobs/bo_job_runs.py → jobs/infra/bo_job_runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@
import os
import csv
import toml
import numpy as np
import torch

sys.path.append(os.path.join(sys.path[0], '../'))
sys.path.append(os.path.join(sys.path[0], '../../'))

from endure.lsm.types import LSMBounds, Workload
from endure.lcm.data.generator import LCMDataGenerator
from endure.data.io import Reader
from jobs.bayesian_pipeline import BayesianPipeline
from endure.lsm.solver.classic_solver import ClassicSolver
from endure.lsm.cost import EndureCost


Expand Down Expand Up @@ -55,25 +53,24 @@ def compare_designs(n_runs=100, csv_filename='yz_design_comparison.csv'):
print(f"Iteration {i + 1}/{n_runs} running")
system = generator._sample_system()
z0, z1, q, w = generator._sample_workload(4)
bo_design, bo_cost = bayesian_optimizer.run(system, z0, z1, q, w)
analytical_design, analytical_cost = bayesian_optimizer._find_analytical_results(system, z0, z1, q, w)
writer.writerow([system.E, system.B, system.s, system.H, system.N, z0, z1, q, w,
bo_design, bo_cost = bayesian_optimizer.run(system, Workload(z0, z1, q, w))
analytical_design, analytical_cost = bayesian_optimizer._find_analytical_results(system, workload.z0,
workload.z1, workload.q, workload.w)
writer.writerow([system.E, system.B, system.s, system.H, system.N, workload.z0, workload.z1, workload.q, workload.w,
bo_design, analytical_design, bo_cost, analytical_cost, analytical_cost - bo_cost])


if __name__ == "__main__":
file_dir = os.path.dirname(__file__)
config_path = os.path.join(file_dir, "../endure.toml")
config_path = os.path.join(file_dir, "BayesianBaseline.toml")
with open(config_path) as fid:
config = toml.load(fid)
bayesian_optimizer = BayesianPipeline(config)
generator = LCMDataGenerator()
solver = ClassicSolver(config)
bounds = LSMBounds()
generator = LCMDataGenerator(bounds)
cf = EndureCost(config)

to_cuda(bayesian_optimizer)
to_cuda(generator)
to_cuda(solver)
to_cuda(cf)

compare_designs()
compare_designs(config["job"]["BayesianOptimization"]["multi_jobs_number"], config["job"]["BayesianOptimization"]["multi_job_file"])
File renamed without changes.

0 comments on commit 3d0ec44

Please sign in to comment.