Skip to content

Commit

Permalink
[Refactor] Use LSMBounds for generator (#17)
Browse files Browse the repository at this point in the history
  • Loading branch information
ephoris authored Mar 4, 2024
1 parent 7f99db0 commit 709752b
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 36 deletions.
49 changes: 22 additions & 27 deletions endure/lcm/data/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np

from endure.lsm.types import LSMDesign, System, Policy
from endure.lsm.types import LSMDesign, System, Policy, LSMBounds
from endure.lsm.cost import EndureCost
from endure.lcm.data.input_features import (
kWORKLOAD_HEADER,
Expand All @@ -17,30 +17,24 @@
class LCMDataGenerator:
def __init__(
self,
bits_per_elem_range: Tuple[int, int] = (1, 10),
size_ratio_range: Tuple[int, int] = (2, 31),
page_sizes: List[int] = [4, 8, 16],
entry_sizes: List[int] = [1024, 2048, 4096, 8192],
memory_budget_range: Tuple[float, float] = (5.0, 20.0),
selectivity_range: Tuple[float, float] = (1e-7, 1e-9),
elements_range: Tuple[int, int] = (100000000, 1000000000),
max_levels: int = 16,
bounds: LSMBounds,
precision: int = 3,
) -> None:
self._header = None
self.precision = precision

self.bits_per_elem_min = bits_per_elem_range[0]
self.bits_per_elem_max = bits_per_elem_range[1]
self.size_ratio_min = size_ratio_range[0]
self.size_ratio_max = size_ratio_range[1]
self.entry_sizes = entry_sizes
self.memory_budget_range = memory_budget_range
self.page_sizes = page_sizes
self.selectivity_range = selectivity_range
self.elements_range = elements_range
self.max_levels = max_levels
self.cf = EndureCost(max_levels=max_levels)
self.bounds = bounds
self.bits_per_elem_min = bounds.bits_per_elem_range[0]
self.bits_per_elem_max = bounds.bits_per_elem_range[1]
self.size_ratio_min = bounds.size_ratio_range[0]
self.size_ratio_max = bounds.size_ratio_range[1]
self.entry_sizes = bounds.entry_sizes
self.memory_budget_range = bounds.memory_budget_range
self.page_sizes = bounds.page_sizes
self.selectivity_range = bounds.selectivity_range
self.elements_range = bounds.elements_range
self.max_levels = bounds.max_considered_levels
self.cf = EndureCost(max_levels=bounds.max_considered_levels)

self.header = []

Expand Down Expand Up @@ -142,10 +136,11 @@ def generate_row(self) -> dict:
class ClassicGenerator(LCMDataGenerator):
def __init__(
self,
bounds: LSMBounds,
policies: List[Policy] = [Policy.Tiering, Policy.Leveling],
**kwargs,
):
super().__init__(**kwargs)
super().__init__(bounds, **kwargs)
self.policies = policies
cost_header = self._gen_cost_header()
workload_header = self._gen_workload_header()
Expand Down Expand Up @@ -195,8 +190,8 @@ def _gen_row_data(self) -> list:


class KHybridGenerator(LCMDataGenerator):
def __init__(self, **kwargs):
super().__init__(**kwargs)
def __init__(self, bounds: LSMBounds, **kwargs):
super().__init__(bounds, **kwargs)
cost_header = self._gen_cost_header()
workload_header = self._gen_workload_header()
system_header = self._gen_system_header()
Expand Down Expand Up @@ -251,8 +246,8 @@ def _gen_row_data(self) -> list:


class QCostGenerator(LCMDataGenerator):
def __init__(self, **kwargs):
super().__init__(**kwargs)
def __init__(self, bounds: LSMBounds, **kwargs):
super().__init__(bounds, **kwargs)
cost_header = self._gen_cost_header()
workload_header = self._gen_workload_header()
system_header = self._gen_system_header()
Expand Down Expand Up @@ -301,8 +296,8 @@ def _gen_row_data(self) -> list:


class YZCostGenerator(LCMDataGenerator):
def __init__(self, **kwargs):
super().__init__(**kwargs)
def __init__(self, bounds: LSMBounds, **kwargs):
super().__init__(bounds, **kwargs)
cost_header = self._gen_cost_header()
workload_header = self._gen_workload_header()
system_header = self._gen_system_header()
Expand Down
11 changes: 8 additions & 3 deletions endure/lsm/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,14 @@ class LSMDesign:

@dataclass
class LSMBounds:
max_levels: int = 20
bpe: Tuple[float, float] = (1.0, 10.0)
size_ratio: Tuple[float, float] = (2, 31)
max_considered_levels: int = 20
bits_per_elem_range: Tuple[int, int] = (1, 10)
size_ratio_range: Tuple[int, int] = (2, 31)
page_sizes: Tuple = (4, 8, 16)
entry_sizes: Tuple = (1024, 2048, 4096, 8192)
memory_budget_range: Tuple[float, float] = (5.0, 20.0)
selectivity_range: Tuple[float, float] = (1e-7, 1e-9)
elements_range: Tuple[int, int] = (100000000, 1000000000)


@dataclass
Expand Down
13 changes: 7 additions & 6 deletions jobs/lcm_data_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import pyarrow.parquet as pq

from endure.data.io import Reader
from endure.lsm.types import Policy
from endure.lsm.types import Policy, LSMBounds
import endure.lcm.data.generator as Generators


Expand All @@ -26,16 +26,17 @@ def __init__(self, config):
def _choose_generator(self) -> Generators.LCMDataGenerator:
choice = self.setting["generator"]
max_levels = self.config["lsm"]["max_levels"]
bounds = LSMBounds()
generators = {
"TierCost": Generators.ClassicGenerator(
policies=[Policy.Tiering], max_levels=max_levels
bounds, policies=[Policy.Tiering], max_levels=max_levels
),
"LevelCost": Generators.ClassicGenerator(
policies=[Policy.Leveling], max_levels=max_levels
bounds, policies=[Policy.Leveling], max_levels=max_levels
),
"QCost": Generators.QCostGenerator(max_levels=max_levels),
"KHybridCost": Generators.KHybridGenerator(max_levels=max_levels),
"ClassicCost": Generators.ClassicGenerator(max_levels=max_levels),
"QCost": Generators.QCostGenerator(bounds, max_levels=max_levels),
"KHybridCost": Generators.KHybridGenerator(bounds, max_levels=max_levels),
"ClassicCost": Generators.ClassicGenerator(bounds, max_levels=max_levels),
}
generator = generators.get(choice, None)
if generator is None:
Expand Down

0 comments on commit 709752b

Please sign in to comment.