Skip to content

Commit

Permalink
add seeds
Browse files Browse the repository at this point in the history
  • Loading branch information
jduerholt committed Dec 18, 2023
1 parent d4c5c49 commit 379a7c7
Show file tree
Hide file tree
Showing 11 changed files with 71 additions and 34 deletions.
40 changes: 25 additions & 15 deletions bofire/data_models/domain/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def get_by_keys(self, keys: Sequence[str]) -> Features:
def get(
self,
includes: Union[Type, List[Type]] = AnyFeature,
excludes: Union[Type, List[Type]] = None,
excludes: Union[Type, List[Type]] = None, # type: ignore
exact: bool = False,
) -> Features:
"""get features of the domain
Expand Down Expand Up @@ -132,7 +132,7 @@ def get(
def get_keys(
self,
includes: Union[Type, List[Type]] = AnyFeature,
excludes: Union[Type, List[Type]] = None,
excludes: Union[Type, List[Type]] = None, # type: ignore
exact: bool = False,
) -> List[str]:
"""Method to get feature keys of the domain
Expand Down Expand Up @@ -186,6 +186,7 @@ def sample(
self,
n: int = 1,
method: SamplingMethodEnum = SamplingMethodEnum.UNIFORM,
seed: Optional[int] = None,
) -> pd.DataFrame:
"""Draw sobol samples
Expand All @@ -199,15 +200,18 @@ def sample(
"""
if method == SamplingMethodEnum.UNIFORM:
return self.validate_candidates(
pd.concat([feat.sample(n) for feat in self.get(Input)], axis=1) # type: ignore
pd.concat(
[feat.sample(n, seed=seed) for feat in self.get(Input)], # type: ignore
axis=1,
)
)
free_features = self.get_free()
if method == SamplingMethodEnum.SOBOL:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
X = Sobol(len(free_features)).random(n)
X = Sobol(len(free_features), seed=seed).random(n)
else:
X = LatinHypercube(len(free_features)).random(n)
X = LatinHypercube(len(free_features), seed=seed).random(n)
res = []
for i, feat in enumerate(free_features):
if isinstance(feat, ContinuousInput):
Expand Down Expand Up @@ -247,7 +251,9 @@ def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
for feature in self:
if feature.key not in candidates:
raise ValueError(f"no col for input feature `{feature.key}`")
candidates[feature.key] = feature.validate_candidental(candidates[feature.key]) # type: ignore
candidates[feature.key] = feature.validate_candidental( # type: ignore
candidates[feature.key]
)
if candidates[self.get_keys()].isnull().to_numpy().any():
raise ValueError("there are null values")
if candidates[self.get_keys()].isna().to_numpy().any():
Expand All @@ -260,7 +266,10 @@ def validate_experiments(
for feature in self:
if feature.key not in experiments:
raise ValueError(f"no col for input feature `{feature.key}`")
experiments[feature.key] = feature.validate_experimental(experiments[feature.key], strict=strict) # type: ignore
experiments[feature.key] = feature.validate_experimental(
experiments[feature.key],
strict=strict, # type: ignore
)
if experiments[self.get_keys()].isnull().to_numpy().any():
raise ValueError("there are null values")
if experiments[self.get_keys()].isna().to_numpy().any():
Expand All @@ -270,7 +279,7 @@ def validate_experiments(
def get_categorical_combinations(
self,
include: Union[Type, List[Type]] = Input,
exclude: Union[Type, List[Type]] = None,
exclude: Union[Type, List[Type]] = None, # type: ignore
):
"""get a list of tuples pairing the feature keys with a list of valid categories
Expand Down Expand Up @@ -361,9 +370,7 @@ def _get_transform_info(
counter += len(feat.descriptors)
elif isinstance(specs[feat.key], MolFeatures):
assert isinstance(feat, MolecularInput)
descriptor_names = specs[
feat.key
].get_descriptor_names() # type: ignore
descriptor_names = specs[feat.key].get_descriptor_names() # type: ignore
features2idx[feat.key] = tuple(
(np.array(range(len(descriptor_names))) + counter).tolist()
)
Expand Down Expand Up @@ -450,7 +457,9 @@ def inverse_transform(
transformed.append(feat.from_descriptor_encoding(experiments))
elif isinstance(specs[feat.key], MolFeatures):
assert isinstance(feat, CategoricalMolecularInput)
transformed.append(feat.from_descriptor_encoding(specs[feat.key], experiments)) # type: ignore
transformed.append(
feat.from_descriptor_encoding(specs[feat.key], experiments) # type: ignore
)

return pd.concat(transformed, axis=1)

Expand Down Expand Up @@ -574,9 +583,9 @@ def get_by_objective(
features=sorted(
filter_by_attribute(
self.get(ContinuousOutput).features,
lambda of: of.objective,
lambda of: of.objective, # type: ignore
includes,
excludes,
excludes, # type: ignore
exact,
)
)
Expand Down Expand Up @@ -682,7 +691,8 @@ def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
+ [
[f"{key}_pred", f"{key}_sd"]
for key in self.get_keys_by_objective(
excludes=Objective, includes=None # type: ignore
excludes=Objective,
includes=None, # type: ignore
)
]
)
Expand Down
7 changes: 5 additions & 2 deletions bofire/data_models/features/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def from_ordinal_encoding(self, values: pd.Series) -> pd.Series:
enc = np.array(self.categories)
return pd.Series(enc[values], index=values.index, name=self.key)

def sample(self, n: int) -> pd.Series:
def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
"""Draw random samples from the feature.
Args:
Expand All @@ -308,7 +308,10 @@ def sample(self, n: int) -> pd.Series:
pd.Series: drawn samples.
"""
return pd.Series(
name=self.key, data=np.random.choice(self.get_allowed_categories(), n)
name=self.key,
data=np.random.default_rng(seed=seed).choice(
self.get_allowed_categories(), n
),
)

def get_bounds(
Expand Down
6 changes: 4 additions & 2 deletions bofire/data_models/features/continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def validate_candidental(self, values: pd.Series) -> pd.Series:
)
return values

def sample(self, n: int) -> pd.Series:
def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
"""Draw random samples from the feature.
Args:
Expand All @@ -129,7 +129,9 @@ def sample(self, n: int) -> pd.Series:
"""
return pd.Series(
name=self.key,
data=np.random.uniform(self.lower_bound, self.upper_bound, n),
data=np.random.default_rng(seed=seed).uniform(
self.lower_bound, self.upper_bound, n
),
)

def __str__(self) -> str:
Expand Down
8 changes: 5 additions & 3 deletions bofire/data_models/features/discrete.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import ClassVar, Literal
from typing import ClassVar, Literal, Optional

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -75,7 +75,7 @@ def validate_candidental(self, values: pd.Series) -> pd.Series:
)
return values

def sample(self, n: int) -> pd.Series:
def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
"""Draw random samples from the feature.
Args:
Expand All @@ -84,7 +84,9 @@ def sample(self, n: int) -> pd.Series:
Returns:
pd.Series: drawn samples.
"""
return pd.Series(name=self.key, data=np.random.choice(self.values, n))
return pd.Series(
name=self.key, data=np.random.default_rng(seed=seed).choice(self.values, n)
)

def from_continuous(self, values: pd.DataFrame) -> pd.Series:
"""Rounds continuous values to the closest discrete ones.
Expand Down
2 changes: 1 addition & 1 deletion bofire/data_models/features/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def validate_candidental(self, values: pd.Series) -> pd.Series:
pass

@abstractmethod
def sample(self, n: int) -> pd.Series:
def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
"""Sample a series of allowed values.
Args:
Expand Down
2 changes: 1 addition & 1 deletion bofire/data_models/features/molecular.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def is_fixed(self) -> bool:
def fixed_value(self, transform_type: Optional[AnyMolFeatures] = None) -> None:
return None

def sample(self, n: int) -> pd.Series:
def sample(self, n: int, seed: Optional[int] = None) -> pd.Series:
raise ValueError("Sampling not supported for `MolecularInput`")

def get_bounds(
Expand Down
8 changes: 5 additions & 3 deletions bofire/strategies/samplers/polytope.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ def __init__(

def _ask(self, n: int) -> pd.DataFrame:
if len(self.domain.constraints) == 0:
return self.domain.inputs.sample(n, self.fallback_sampling_method)
return self.domain.inputs.sample(
n, self.fallback_sampling_method, seed=self._get_seed()
)

# check if we have pseudo fixed features in the linear equality constraints
# a pseudo fixed is a linear euquality constraint with only one feature included
Expand Down Expand Up @@ -142,7 +144,7 @@ def _ask(self, n: int) -> pd.DataFrame:
equality_constraints=combined_eqs if len(combined_eqs) > 0 else None,
n_burnin=self.n_burnin,
thinning=self.n_thinning,
seed=self.rng.integers(1, 1000),
seed=self._get_seed(),
).squeeze(dim=0)

# check that the random generated candidates are not always the same
Expand All @@ -163,7 +165,7 @@ def _ask(self, n: int) -> pd.DataFrame:

# setup the categoricals and discrete ones as uniform sampled vals
for feat in self.domain.get_features([CategoricalInput, DiscreteInput]):
samples[feat.key] = feat.sample(n) # type: ignore
samples[feat.key] = feat.sample(n, seed=self._get_seed()) # type: ignore

# setup the fixed continuous ones
for key, value in fixed_features.items():
Expand Down
8 changes: 6 additions & 2 deletions bofire/strategies/samplers/rejection.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,19 @@ def __init__(

def _ask(self, n: int) -> pd.DataFrame:
if len(self.domain.constraints) == 0:
return self.domain.inputs.sample(n, self.sampling_method)
return self.domain.inputs.sample(
n, self.sampling_method, seed=self._get_seed()
)
n_iters = 0
n_found = 0
valid_samples = []
while n_found < n:
if n_iters > self.max_iters:
raise ValueError("Maximum iterations exceeded in rejection sampling.")
samples = self.domain.inputs.sample(
self.num_base_samples, method=self.sampling_method
self.num_base_samples,
method=self.sampling_method,
seed=self._get_seed(),
)
valid = self.domain.constraints.is_fulfilled(samples)
n_found += np.sum(valid)
Expand Down
7 changes: 6 additions & 1 deletion bofire/strategies/samplers/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,12 @@ def ask(
raise_validation_error=raise_validation_error,
)
return self.domain.validate_candidates(
samples.sample(n=candidate_count, replace=False, ignore_index=True),
samples.sample(
n=candidate_count,
replace=False,
ignore_index=True,
random_state=self._get_seed(),
),
only_inputs=True,
raise_validation_error=raise_validation_error,
)
Expand Down
9 changes: 9 additions & 0 deletions bofire/strategies/strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ def __init__(
self._experiments = None
self._candidates = None

def _get_seed(self) -> int:
"""Returns an integer sampled from the strategies random number generator,
that can be used to seed dependent generators.
Returns:
int: random seed.
"""
return int(self.rng.integers(1, 100000))

@classmethod
def from_spec(cls, data_model: DataModel) -> "Strategy":
"""Used by the mapper to map from data model to functional strategy."""
Expand Down
8 changes: 4 additions & 4 deletions tests/bofire/data_models/test_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,6 @@ def test_categorical_descriptor_from_descriptor_encoding(key, categories, descri
data=[[1.05, 2.5, 6], [4, 4.5, 9]],
)
samples = c1.from_descriptor_encoding(descriptor_values)
print(samples)
assert np.all(samples == pd.Series([categories[0], categories[1]]))

c2 = CategoricalDescriptorInput(
Expand All @@ -837,7 +836,6 @@ def test_categorical_descriptor_from_descriptor_encoding(key, categories, descri
)

samples = c2.from_descriptor_encoding(descriptor_values)
print(samples)
assert np.all(samples == pd.Series([categories[1], categories[1]]))


Expand Down Expand Up @@ -1426,14 +1424,16 @@ def test_inputs_get_free(features, expected):
inputs,
Inputs(features=[if1, if2, if3, if4, if5, if7]),
]
for num_samples in [1, 2, 1024]
for num_samples in [1, 2, 64]
for method in ["UNIFORM", "SOBOL", "LHS"]
],
)
def test_inputs_sample(features: Inputs, num_samples, method):
samples = features.sample(num_samples, method=method)
samples = features.sample(num_samples, method=method, seed=42)
assert samples.shape == (num_samples, len(features))
assert list(samples.columns) == features.get_keys()
samples2 = features.sample(num_samples, method=method, seed=42)
assert_frame_equal(samples2, samples)


@pytest.mark.parametrize(
Expand Down

0 comments on commit 379a7c7

Please sign in to comment.