From aa4ff23cfaa0669f0c99b4fb072870126521754a Mon Sep 17 00:00:00 2001 From: Shahar Bar Date: Sun, 11 Aug 2024 08:04:51 +0300 Subject: [PATCH] Refactor MAB and Strategy Classes with Cold Start Methods and Enhanced Validation Change log: 1. Moved Strategy, Model, and MAB to strategy.py, model.py, and to the new mab.py. base.py is now only for definitions and abstract PyBanditsBaseModel. The abstract MAB now allows for all childs to either accept strategy instance as parameter, or to get the strategy parameters and instantiate correspondingly. 2. The from_state functionality is now directly inherited by all MABs from BaseMab. 3. Replaced all cold_start methods in cmab.py and smab.py with cold_start stemming from BaseMab. Correspondingly, updated test cases to use the new cold_start_instantiate methods. 4. Introduced numerize_field and get_expected_value_from_state methods in the Strategy class to handle default values and state extraction. Added field_validator for exploit_p in BestActionIdentification and subsidy_factor in CostControlBandit to ensure proper default handling and validation. 5. Merged common functionality into a new CostControlStrategy abstract class, which is now inherited by CostControlBandit and MultiObjectiveCostControlBandit. Simplified the select_action methods by using helper methods like _evaluate_and_select and _reduce. 6. Plugged get_pareto_front into a new MultiObjectiveStrategy abstract class, which is now inherited by MultiObjectiveBandit and MultiObjectiveCostControlBandit. 7. In model.py. Removed the redundant BaseBetaMO and BaseBayesianLogisticRegression. Added cold_start_instantiate method to BetaMO and BayesianLogisticRegression models. 8. Added extract_argument_names_from_function under utils.py to allow extract function parameter names by handle. 9. Changed test_base.py into test_mab.py. 10. Updated deprecated linter settings in pyproject.toml. 11. Added test_smab_mo_cc_update test on test_smab.py. 12. Changed version to 1.0.0 on pyproject.toml. --- docs/tutorials/mab.ipynb | 8 +- docs/tutorials/smab_mo_cc.ipynb | 9 +- pybandits/base.py | 229 +---------------- pybandits/cmab.py | 222 +--------------- pybandits/consts.py | 1 + pybandits/mab.py | 382 ++++++++++++++++++++++++++++ pybandits/model.py | 154 ++++++----- pybandits/smab.py | 308 +--------------------- pybandits/strategy.py | 299 +++++++++++++++------- pybandits/utils.py | 25 +- pyproject.toml | 2 +- tests/test_cmab.py | 274 +++++++++----------- tests/{test_base.py => test_mab.py} | 127 ++++++++- tests/test_model.py | 37 ++- tests/test_smab.py | 139 +++++----- tests/test_strategy.py | 58 +++-- 16 files changed, 1082 insertions(+), 1192 deletions(-) create mode 100644 pybandits/consts.py create mode 100644 pybandits/mab.py rename tests/{test_base.py => test_mab.py} (54%) diff --git a/docs/tutorials/mab.ipynb b/docs/tutorials/mab.ipynb index 2f0fb76..22c5666 100644 --- a/docs/tutorials/mab.ipynb +++ b/docs/tutorials/mab.ipynb @@ -20,7 +20,7 @@ "from rich import print\n", "\n", "from pybandits.model import Beta\n", - "from pybandits.smab import SmabBernoulli, create_smab_bernoulli_cold_start" + "from pybandits.smab import SmabBernoulli" ] }, { @@ -73,8 +73,6 @@ "metadata": {}, "outputs": [], "source": [ - "n_objectives = 2\n", - "\n", "mab = SmabBernoulli(\n", " actions={\n", " \"a1\": Beta(n_successes=1, n_failures=1),\n", @@ -137,7 +135,7 @@ "id": "564914fd-73cc-4854-8ec7-548970f794a6", "metadata": {}, "source": [ - "You can initialize the bandit via the utility function `create_smab_bernoulli_mo_cc_cold_start()`. This is particulary useful in a cold start setting when there is no prior knowledge on the Beta distruibutions. In this case for all Betas `n_successes` and `n_failures` are set to `1`." + "You can initialize the bandit via the utility function `SmabBernoulliMOCC.cold_start()`. This is particulary useful in a cold start setting when there is no prior knowledge on the Beta distruibutions. In this case for all Betas `n_successes` and `n_failures` are set to `1`." ] }, { @@ -148,7 +146,7 @@ "outputs": [], "source": [ "# generate a smab bernoulli in cold start settings\n", - "mab = create_smab_bernoulli_cold_start(action_ids=[\"a1\", \"a2\", \"a3\"])" + "mab = SmabBernoulli.cold_start(action_ids=[\"a1\", \"a2\", \"a3\"])" ] }, { diff --git a/docs/tutorials/smab_mo_cc.ipynb b/docs/tutorials/smab_mo_cc.ipynb index ae4436c..880654c 100644 --- a/docs/tutorials/smab_mo_cc.ipynb +++ b/docs/tutorials/smab_mo_cc.ipynb @@ -20,7 +20,7 @@ "from rich import print\n", "\n", "from pybandits.model import Beta, BetaMOCC\n", - "from pybandits.smab import SmabBernoulliMOCC, create_smab_bernoulli_mo_cc_cold_start" + "from pybandits.smab import SmabBernoulliMOCC" ] }, { @@ -72,8 +72,6 @@ "metadata": {}, "outputs": [], "source": [ - "n_objectives = 2\n", - "\n", "mab = SmabBernoulliMOCC(\n", " actions={\n", " \"a1\": BetaMOCC(counters=[Beta(n_successes=1, n_failures=1), Beta(n_successes=1, n_failures=1)], cost=30),\n", @@ -153,7 +151,7 @@ "id": "564914fd-73cc-4854-8ec7-548970f794a6", "metadata": {}, "source": [ - "You can initialize the bandit via the utility function `create_smab_bernoulli_mo_cc_cold_start()`. This is particulary useful in a cold start setting when there is no prior knowledge on the Beta distruibutions. In this case for all Betas `n_successes` and `n_failures` are set to `1`." + "You can initialize the bandit via the utility function `SmabBernoulliMOCC.cold_start()`. This is particulary useful in a cold start setting when there is no prior knowledge on the Beta distruibutions. In this case for all Betas `n_successes` and `n_failures` are set to `1`." ] }, { @@ -165,10 +163,9 @@ "source": [ "# list of action IDs with their cost\n", "action_ids_cost = {\"a1\": 30, \"a2\": 10, \"a3\": 20}\n", - "n_objectives = 2\n", "\n", "# generate a smab bernoulli in cold start settings\n", - "mab = create_smab_bernoulli_mo_cc_cold_start(action_ids_cost=action_ids_cost, n_objectives=n_objectives)" + "mab = SmabBernoulliMOCC.cold_start(action_ids_cost=action_ids_cost)" ] }, { diff --git a/pybandits/base.py b/pybandits/base.py index 97e42d8..cc56a9e 100644 --- a/pybandits/base.py +++ b/pybandits/base.py @@ -21,233 +21,26 @@ # SOFTWARE. -from abc import ABC, abstractmethod -from typing import Any, Dict, List, NewType, Optional, Set, Tuple, Union +from typing import Dict, List, NewType, Tuple, Union -import numpy as np -from pydantic import ( - BaseModel, - NonNegativeInt, - confloat, - conint, - constr, - field_validator, - model_validator, - validate_call, -) +from pydantic import BaseModel, confloat, conint, constr ActionId = NewType("ActionId", constr(min_length=1)) Float01 = NewType("Float_0_1", confloat(ge=0, le=1)) Probability = NewType("Probability", Float01) -Predictions = NewType("Predictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]]]) +SmabPredictions = NewType("SmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]]]) +CmabPredictions = NewType( + "CmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]], List[Dict[ActionId, float]]] +) +Predictions = NewType("Predictions", Union[SmabPredictions, CmabPredictions]) BinaryReward = NewType("BinaryReward", conint(ge=0, le=1)) +ActionRewardLikelihood = NewType( + "ActionRewardLikelihood", + Union[Dict[ActionId, float], Dict[ActionId, Probability], Dict[ActionId, List[Probability]]], +) class PyBanditsBaseModel(BaseModel, extra="forbid"): """ BaseModel of the PyBandits library. """ - - -class Model(PyBanditsBaseModel, ABC): - """ - Class to model the prior distributions. - """ - - @abstractmethod - def sample_proba(self) -> Probability: - """ - Sample the probability of getting a positive reward. - """ - - @abstractmethod - def update(self, rewards: List[Any]): - """ - Update the model parameters. - """ - - -class Strategy(PyBanditsBaseModel, ABC): - """ - Strategy to select actions in multi-armed bandits. - """ - - @abstractmethod - def select_action(self, p: Dict[ActionId, Probability], actions: Optional[Dict[ActionId, Model]]) -> ActionId: - """ - Select the action. - """ - - -class BaseMab(PyBanditsBaseModel, ABC): - """ - Multi-armed bandit superclass. - - Parameters - ---------- - actions: Dict[ActionId, Model] - The list of possible actions, and their associated Model. - strategy: Strategy - The strategy used to select actions. - epsilon: Optional[Float01] - The probability of selecting a random action. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - """ - - actions: Dict[ActionId, Model] - strategy: Strategy - epsilon: Optional[Float01] - default_action: Optional[ActionId] - - @field_validator("actions", mode="before") - @classmethod - def at_least_2_actions_are_defined(cls, v): - if len(v) < 2: - raise AttributeError("At least 2 actions should be defined.") - return v - - @model_validator(mode="after") - def check_default_action(self): - if not self.epsilon and self.default_action: - raise AttributeError("A default action should only be defined when epsilon is defined.") - if self.default_action and self.default_action not in self.actions: - raise AttributeError("The default action should be defined in the actions.") - return self - - def _get_valid_actions(self, forbidden_actions: Optional[Set[ActionId]]) -> Set[ActionId]: - """ - Given a set of forbidden action IDs, return a set of valid action IDs. - - Parameters - ---------- - forbidden_actions: Optional[Set[ActionId]] - The set of forbidden action IDs. - - Returns - ------- - valid_actions: Set[ActionId] - The list of valid (i.e. not forbidden) action IDs. - """ - if forbidden_actions is None: - forbidden_actions = set() - - if not all(a in self.actions.keys() for a in forbidden_actions): - raise ValueError("forbidden_actions contains invalid action IDs.") - valid_actions = set(self.actions.keys()) - forbidden_actions - if len(valid_actions) == 0: - raise ValueError("All actions are forbidden. You must allow at least 1 action.") - if self.default_action and self.default_action not in valid_actions: - raise ValueError("The default action is forbidden.") - - return valid_actions - - def _check_update_params(self, actions: List[ActionId], rewards: List[Union[NonNegativeInt, List[NonNegativeInt]]]): - """ - Verify that the given list of action IDs is a subset of the currently defined actions. - - Parameters - ---------- - actions : List[ActionId] - The selected action for each sample. - rewards: List[Union[BinaryReward, List[BinaryReward]]] - The reward for each sample. - """ - invalid = set(actions) - set(self.actions.keys()) - if invalid: - raise AttributeError(f"The following invalid action(s) were specified: {invalid}.") - if len(actions) != len(rewards): - raise AttributeError(f"Shape mismatch: actions and rewards should have the same length {len(actions)}.") - - @abstractmethod - @validate_call - def update(self, actions: List[ActionId], rewards: List[Union[BinaryReward, List[BinaryReward]]], *args, **kwargs): - """ - Update the stochastic multi-armed bandit model. - - actions: List[ActionId] - The selected action for each sample. - rewards: List[Union[BinaryReward, List[BinaryReward]]] - The reward for each sample. - """ - - @abstractmethod - @validate_call - def predict(self, forbidden_actions: Optional[Set[ActionId]] = None): - """ - Predict actions. - - Parameters - ---------- - forbidden_actions : Optional[Set[ActionId]], default=None - Set of forbidden actions. If specified, the model will discard the forbidden_actions and it will only - consider the remaining allowed_actions. By default, the model considers all actions as allowed_actions. - Note that: actions = allowed_actions U forbidden_actions. - - Returns - ------- - actions: List[ActionId] of shape (n_samples,) - The actions selected by the multi-armed bandit model. - probs: List[Dict[ActionId, float]] of shape (n_samples,) - The probabilities of getting a positive reward for each action. - """ - - def get_state(self) -> (str, dict): - """ - Access the complete model internal state, enough to create an exact copy of the same model from it. - Returns - ------- - model_class_name: str - The name of the class of the model. - model_state: dict - The internal state of the model (actions, scores, etc.). - """ - model_name = self.__class__.__name__ - state: dict = self.dict() - return model_name, state - - @validate_call - def _select_epsilon_greedy_action( - self, - p: Union[Dict[ActionId, float], Dict[ActionId, Probability], Dict[ActionId, List[Probability]]], - actions: Optional[Dict[ActionId, Model]] = None, - ) -> ActionId: - """ - Wraps self.strategy.select_action function with epsilon-greedy strategy, - such that with probability epsilon a default_action is selected, - and with probability 1-epsilon the select_action function is triggered to choose action. - If no default_action is provided, a random action is selected. - - Reference: Reinforcement Learning: An Introduction, Ch. 2 (Sutton and Burto, 2018) - https://web.stanford.edu/class/psych209/Readings/SuttonBartoIPRLBook2ndEd.pdf&ved=2ahUKEwjMy8WV9N2HAxVe0gIHHVjjG5sQFnoECEMQAQ&usg=AOvVaw3bKK-Y_1kf6XQVwR-UYrBY - - Parameters - ---------- - p: Union[Dict[ActionId, float], Dict[ActionId, Probability], Dict[ActionId, List[Probability]]] - The dictionary or actions and their sampled probability of getting a positive reward. - For MO strategy, the sampled probability is a list with elements corresponding to the objectives. - actions: Optional[Dict[ActionId, Model]] - The dictionary of actions and their associated Model. - - Returns - ------- - selected_action: ActionId - The selected action. - - Raises - ------ - KeyError - If self.default_action is not present as a key in the probabilities dictionary. - """ - - if self.epsilon: - if self.default_action and self.default_action not in p.keys(): - raise KeyError(f"Default action {self.default_action} not in actions.") - if np.random.binomial(1, self.epsilon): - selected_action = self.default_action if self.default_action else np.random.choice(list(p.keys())) - else: - selected_action = self.strategy.select_action(p=p, actions=actions) - else: - selected_action = self.strategy.select_action(p=p, actions=actions) - return selected_action diff --git a/pybandits/cmab.py b/pybandits/cmab.py index d26a9b9..f34cabc 100644 --- a/pybandits/cmab.py +++ b/pybandits/cmab.py @@ -20,21 +20,16 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from typing import Dict, List, Optional, Set, Tuple, Union +from typing import Dict, List, Optional, Set, Union from numpy import array from numpy.random import choice from numpy.typing import ArrayLike -from pydantic import NonNegativeFloat, PositiveInt, field_validator, validate_call - -from pybandits.base import ActionId, BaseMab, BinaryReward, Float01, Probability -from pybandits.model import ( - BaseBayesianLogisticRegression, - BayesianLogisticRegression, - BayesianLogisticRegressionCC, - create_bayesian_logistic_regression_cc_cold_start, - create_bayesian_logistic_regression_cold_start, -) +from pydantic import field_validator, validate_call + +from pybandits.base import ActionId, BinaryReward, CmabPredictions, Probability +from pybandits.mab import BaseMab +from pybandits.model import BayesianLogisticRegression, BayesianLogisticRegressionCC from pybandits.strategy import ( BestActionIdentification, ClassicBandit, @@ -48,7 +43,7 @@ class BaseCmabBernoulli(BaseMab): Parameters ---------- - actions: Dict[ActionId, BaseBayesianLogisticRegression] + actions: Dict[ActionId, BayesianLogisticRegression] The list of possible actions, and their associated Model. strategy: Strategy The strategy used to select actions. @@ -59,7 +54,7 @@ class BaseCmabBernoulli(BaseMab): bandit strategy. """ - actions: Dict[ActionId, BaseBayesianLogisticRegression] + actions: Dict[ActionId, BayesianLogisticRegression] predict_with_proba: bool predict_actions_randomly: bool @@ -77,7 +72,7 @@ def predict( self, context: ArrayLike, forbidden_actions: Optional[Set[ActionId]] = None, - ) -> Tuple[List[ActionId], List[Dict[ActionId, Probability]]]: + ) -> CmabPredictions: """ Predict actions. @@ -96,6 +91,8 @@ def predict( The actions selected by the multi-armed bandit model. probs: List[Dict[ActionId, Probability]] of shape (n_samples,) The probabilities of getting a positive reward for each action. + ws : List[Dict[ActionId, float]] + The weighted sum of logistic regression logits. """ valid_actions = self._get_valid_actions(forbidden_actions) @@ -165,7 +162,7 @@ def update( If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2): rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...] """ - self._check_update_params(actions=actions, rewards=rewards) + self._validate_update_params(actions=actions, rewards=rewards) if len(context) != len(rewards): raise AttributeError(f"Shape mismatch: actions and rewards should have the same length {len(actions)}.") @@ -209,22 +206,6 @@ class CmabBernoulli(BaseCmabBernoulli): predict_with_proba: bool = False predict_actions_randomly: bool = False - def __init__( - self, - actions: Dict[ActionId, BaseBayesianLogisticRegression], - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, - ): - super().__init__(actions=actions, strategy=ClassicBandit(), epsilon=epsilon, default_action=default_action) - - @classmethod - def from_state(cls, state: dict) -> "CmabBernoulli": - return cls(actions=state["actions"]) - - @validate_call(config=dict(arbitrary_types_allowed=True)) - def update(self, context: ArrayLike, actions: List[ActionId], rewards: List[BinaryReward]): - super().update(context=context, actions=actions, rewards=rewards) - class CmabBernoulliBAI(BaseCmabBernoulli): """ @@ -251,26 +232,7 @@ class CmabBernoulliBAI(BaseCmabBernoulli): predict_with_proba: bool = False predict_actions_randomly: bool = False - def __init__( - self, - actions: Dict[ActionId, BayesianLogisticRegression], - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, - exploit_p: Optional[Float01] = None, - ): - strategy = BestActionIdentification() if exploit_p is None else BestActionIdentification(exploit_p=exploit_p) - super().__init__(actions=actions, strategy=strategy, epsilon=epsilon, default_action=default_action) - - @classmethod - def from_state(cls, state: dict) -> "CmabBernoulliBAI": - return cls(actions=state["actions"], exploit_p=state["strategy"].get("exploit_p", None)) - @validate_call(config=dict(arbitrary_types_allowed=True)) - def update(self, context: ArrayLike, actions: List[ActionId], rewards: List[BinaryReward]): - super().update(context=context, actions=actions, rewards=rewards) - - -# TODO: add tests class CmabBernoulliCC(BaseCmabBernoulli): """ Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling, and Cost Control strategy. @@ -303,163 +265,3 @@ class CmabBernoulliCC(BaseCmabBernoulli): strategy: CostControlBandit predict_with_proba: bool = True predict_actions_randomly: bool = False - - def __init__( - self, - actions: Dict[ActionId, BayesianLogisticRegressionCC], - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, - subsidy_factor: Optional[Float01] = None, - ): - strategy = CostControlBandit() if subsidy_factor is None else CostControlBandit(subsidy_factor=subsidy_factor) - super().__init__(actions=actions, strategy=strategy, epsilon=epsilon, default_action=default_action) - - @classmethod - def from_state(cls, state: dict) -> "CmabBernoulliCC": - return cls(actions=state["actions"], subsidy_factor=state["strategy"].get("subsidy_factor", None)) - - @validate_call(config=dict(arbitrary_types_allowed=True)) - def update(self, context: ArrayLike, actions: List[ActionId], rewards: List[BinaryReward]): - super().update(context=context, actions=actions, rewards=rewards) - - -@validate_call -def create_cmab_bernoulli_cold_start( - action_ids: Set[ActionId], - n_features: PositiveInt, - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, -) -> CmabBernoulli: - """ - Utility function to create a Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling, with default - parameters. Until the very first update the model will predict actions randomly, where each action has equal - probability to be selected. - - Parameters - ---------- - action_ids: Set[ActionId] - The list of possible actions. - n_features: PositiveInt - The number of features expected after in the context matrix. This is also the number of betas of the - Bayesian Logistic Regression model. - epsilon: Optional[Float01] - epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - - Returns - ------- - cmab: CmabBernoulli - Contextual Multi-Armed Bandit with strategy = ClassicBandit - """ - actions = {} - for a in set(action_ids): - actions[a] = create_bayesian_logistic_regression_cold_start(n_betas=n_features) - mab = CmabBernoulli(actions=actions, epsilon=epsilon, default_action=default_action) - mab.predict_actions_randomly = True - return mab - - -@validate_call -def create_cmab_bernoulli_bai_cold_start( - action_ids: Set[ActionId], - n_features: PositiveInt, - exploit_p: Optional[Float01] = None, - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, -) -> CmabBernoulliBAI: - """ - Utility function to create a Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling, and Best Action - Identification strategy, with default parameters. Until the very first update the model will predict actions - randomly, where each action has equal probability to be selected. - - Reference: Analysis of Thompson Sampling for the Multi-armed Bandit Problem (Agrawal and Goyal, 2012) - http://proceedings.mlr.press/v23/agrawal12/agrawal12.pdf - - Parameters - ---------- - action_ids: Set[ActionId] - The list of possible actions. - n_features: PositiveInt - The number of features expected after in the context matrix. This is also the number of betas of the - Bayesian Logistic Regression model. - exploit_p: Float_0_1 (default=0.5) - Number in [0, 1] which specifies the amount of exploitation. - If exploit_p is 1, the bandits always selects the action with highest probability of getting a positive reward, - (it behaves as a Greedy strategy). - If exploit_p is 0, the bandits always select the action with 2nd highest probability of getting a positive - reward. - epsilon: Optional[Float01] - epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - - Returns - ------- - cmab: CmabBernoulliBAI - Contextual Multi-Armed Bandit with strategy = BestActionIdentification - """ - actions = {} - for a in set(action_ids): - actions[a] = create_bayesian_logistic_regression_cold_start(n_betas=n_features) - mab = CmabBernoulliBAI(actions=actions, exploit_p=exploit_p, epsilon=epsilon, default_action=default_action) - mab.predict_actions_randomly = True - return mab - - -@validate_call -def create_cmab_bernoulli_cc_cold_start( - action_ids_cost: Dict[ActionId, NonNegativeFloat], - n_features: PositiveInt, - subsidy_factor: Optional[Float01] = None, - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, -) -> CmabBernoulliCC: - """ - Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling, and Cost Control - strategy, with default parameters. - - The sMAB is extended to include a control of the action cost. Each action is associated with a predefined "cost". - At prediction time, the model considers the actions whose expected rewards is above a pre-defined lower bound. Among - these actions, the one with the lowest associated cost is recommended. The expected reward interval for feasible - actions is defined as [(1-subsidy_factor) * max_p, max_p], where max_p is the highest expected reward sampled value. - - Reference: Thompson Sampling for Contextual Bandit Problems with Auxiliary Safety Constraints (Daulton et al., 2019) - https://arxiv.org/abs/1911.00638 - - Multi-Armed Bandits with Cost Subsidy (Sinha et al., 2021) - https://arxiv.org/abs/2011.01488 - - Parameters - ---------- - action_ids_cost: Dict[ActionId, NonNegativeFloat] - The list of possible actions, and their cost. - n_features: PositiveInt - The number of features expected after in the context matrix. This is also the number of betas of the - Bayesian Logistic Regression model. - subsidy_factor: Optional[Float_0_1], default=0.5 - Number in [0, 1] to define smallest tolerated probability reward, hence the set of feasible actions. - If subsidy_factor is 1, the bandits always selects the action with the minimum cost. - If subsidy_factor is 0, the bandits always selects the action with highest probability of getting a positive - reward (it behaves as a classic Bernoulli bandit). - epsilon: Optional[Float01] - epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - - Returns - ------- - cmab: CmabBernoulliCC - Contextual Multi-Armed Bandit with strategy = CostControl - """ - actions = {} - for a, cost in action_ids_cost.items(): - actions[a] = create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=cost) - mab = CmabBernoulliCC( - actions=actions, subsidy_factor=subsidy_factor, epsilon=epsilon, default_action=default_action - ) - mab.predict_actions_randomly = True - return mab diff --git a/pybandits/consts.py b/pybandits/consts.py new file mode 100644 index 0000000..eba43a4 --- /dev/null +++ b/pybandits/consts.py @@ -0,0 +1 @@ +ACTION_IDS_PREFIX = "action_ids_" diff --git a/pybandits/mab.py b/pybandits/mab.py new file mode 100644 index 0000000..ca7c0fc --- /dev/null +++ b/pybandits/mab.py @@ -0,0 +1,382 @@ +# MIT License +# +# Copyright (c) 2023 Playtika Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +import warnings +from abc import ABC, abstractmethod +from collections import defaultdict +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union, get_args + +import numpy as np +from pydantic import field_validator, model_validator, validate_call + +from pybandits.base import ( + ActionId, + ActionRewardLikelihood, + BinaryReward, + Float01, + Predictions, + PyBanditsBaseModel, +) +from pybandits.consts import ACTION_IDS_PREFIX +from pybandits.model import Model +from pybandits.strategy import Strategy +from pybandits.utils import extract_argument_names_from_function + + +class BaseMab(PyBanditsBaseModel, ABC): + """ + Multi-armed bandit superclass. + + Parameters + ---------- + actions : Dict[ActionId, Model] + The list of possible actions, and their associated Model. + strategy : Strategy + The strategy used to select actions. + epsilon : Optional[Float01], 0 if not specified. + The probability of selecting a random action. + default_action : Optional[ActionId], None if not specified. + The default action to select with a probability of epsilon when using the epsilon-greedy approach. + If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. + """ + + actions: Dict[ActionId, Model] + strategy: Strategy + epsilon: Optional[Float01] = None + default_action: Optional[ActionId] = None + + def __init__( + self, + actions: Dict[ActionId, Model], + epsilon: Optional[Float01] = None, + default_action: Optional[ActionId] = None, + **strategy_kwargs, + ): + if "strategy" in strategy_kwargs: + strategy = strategy_kwargs["strategy"] + if len(strategy_kwargs) > 1: + raise ValueError("strategy should be the only keyword argument.") + else: + strategy_class = self.model_fields["strategy"].annotation + strategy = strategy_class(**strategy_kwargs) + + super().__init__(actions=actions, strategy=strategy, epsilon=epsilon, default_action=default_action) + + ############################################ Instance Input Validators ############################################# + + @field_validator("actions", mode="before") + @classmethod + def at_least_2_actions_are_defined(cls, v): + if len(v) == 0: + raise AttributeError("At least one action should be defined.") + elif len(v) == 1: + warnings.warn("Only a single action was supplied. This MAB will be deterministic.") + return v + + @model_validator(mode="after") + def validate_default_action(self): + if not self.epsilon and self.default_action: + raise AttributeError("A default action should only be defined when epsilon is defined.") + if self.default_action and self.default_action not in self.actions: + raise AttributeError("The default action should be defined in the actions.") + return self + + ############################################# Method Input Validators ############################################## + + def _get_valid_actions(self, forbidden_actions: Optional[Set[ActionId]]) -> Set[ActionId]: + """ + Given a set of forbidden action IDs, return a set of valid action IDs. + + Parameters + ---------- + forbidden_actions: Optional[Set[ActionId]] + The set of forbidden action IDs. + + Returns + ------- + valid_actions: Set[ActionId] + The list of valid (i.e. not forbidden) action IDs. + """ + if forbidden_actions is None: + forbidden_actions = set() + + if not all(a in self.actions.keys() for a in forbidden_actions): + raise ValueError("forbidden_actions contains invalid action IDs.") + valid_actions = set(self.actions.keys()) - forbidden_actions + if len(valid_actions) == 0: + raise ValueError("All actions are forbidden. You must allow at least 1 action.") + if self.default_action and self.default_action not in valid_actions: + raise ValueError("The default action is forbidden.") + + return valid_actions + + def _validate_update_params( + self, actions: List[ActionId], rewards: Union[List[BinaryReward], List[List[BinaryReward]]] + ): + """ + Verify that the given list of action IDs is a subset of the currently defined actions and that + the rewards type matches the strategy type. + + Parameters + ---------- + actions : List[ActionId] + The selected action for each sample. + rewards: List[Union[BinaryReward, List[BinaryReward]]] + The reward for each sample. + """ + invalid = set(actions) - set(self.actions.keys()) + if invalid: + raise AttributeError(f"The following invalid action(s) were specified: {invalid}.") + if len(actions) != len(rewards): + raise AttributeError(f"Shape mismatch: actions and rewards should have the same length {len(actions)}.") + + #################################################################################################################### + + @abstractmethod + @validate_call + def update( + self, actions: List[ActionId], rewards: Union[List[BinaryReward], List[List[BinaryReward]]], *args, **kwargs + ): + """ + Update the multi-armed bandit model. + + actions: List[ActionId] + The selected action for each sample. + rewards: List[Union[BinaryReward, List[BinaryReward]]] + The reward for each sample. + """ + + @abstractmethod + @validate_call + def predict(self, forbidden_actions: Optional[Set[ActionId]] = None) -> Predictions: + """ + Predict actions. + + Parameters + ---------- + forbidden_actions : Optional[Set[ActionId]], default=None + Set of forbidden actions. If specified, the model will discard the forbidden_actions and it will only + consider the remaining allowed_actions. By default, the model considers all actions as allowed_actions. + Note that: actions = allowed_actions U forbidden_actions. + + Returns + ------- + actions: List[ActionId] of shape (n_samples,) + The actions selected by the multi-armed bandit model. + probs: List[Dict[ActionId, Probability]] of shape (n_samples,) + The probabilities of getting a positive reward for each action + ws : List[Dict[ActionId, float]], only relevant for some of the MABs + The weighted sum of logistic regression logits.. + """ + + def get_state(self) -> (str, dict): + """ + Access the complete model internal state, enough to create an exact copy of the same model from it. + Returns + ------- + model_class_name: str + The name of the class of the model. + model_state: dict + The internal state of the model (actions, scores, etc.). + """ + model_name = self.__class__.__name__ + state: dict = self.model_dump() + return model_name, state + + @validate_call + def _select_epsilon_greedy_action( + self, + p: ActionRewardLikelihood, + actions: Optional[Dict[ActionId, Model]] = None, + ) -> ActionId: + """ + Wraps self.strategy.select_action function with epsilon-greedy strategy, + such that with probability epsilon a default_action is selected, + and with probability 1-epsilon the select_action function is triggered to choose action. + If no default_action is provided, a random action is selected. + + Reference: Reinforcement Learning: An Introduction, Ch. 2 (Sutton and Burto, 2018) + https://web.stanford.edu/class/psych209/Readings/SuttonBartoIPRLBook2ndEd.pdf&ved=2ahUKEwjMy8WV9N2HAxVe0gIHHVjjG5sQFnoECEMQAQ&usg=AOvVaw3bKK-Y_1kf6XQVwR-UYrBY + + Parameters + ---------- + p: Union[Dict[ActionId, float], Dict[ActionId, Probability], Dict[ActionId, List[Probability]]] + The dictionary or actions and their sampled probability of getting a positive reward. + For MO strategy, the sampled probability is a list with elements corresponding to the objectives. + actions: Optional[Dict[ActionId, Model]] + The dictionary of actions and their associated Model. + + Returns + ------- + selected_action: ActionId + The selected action. + + Raises + ------ + KeyError + If self.default_action is not present as a key in the probabilities dictionary. + """ + + if self.epsilon: + if self.default_action and self.default_action not in p.keys(): + raise KeyError(f"Default action {self.default_action} not in actions.") + if np.random.binomial(1, self.epsilon): + selected_action = self.default_action or np.random.choice(list(p.keys())) + else: + selected_action = self.strategy.select_action(p=p, actions=actions) + else: + selected_action = self.strategy.select_action(p=p, actions=actions) + return selected_action + + @classmethod + def from_state(cls, state: dict) -> "BaseMab": + """ + Create a new instance of the class from a given model state. + The state can be obtained by applying get_state() to a model. + + Parameters + ---------- + state: dict + The internal state of a model (actions, strategy, etc.) of the same type. + + Returns + ------- + model: BaseMab + The new model instance. + + """ + model_attributes = extract_argument_names_from_function(cls.__init__, True) + strategy_attributes = list(state["strategy"].keys()) + attributes_mapping = {k: state[k] for k in model_attributes if k not in strategy_attributes and k in state} + attributes_mapping.update({k: state["strategy"][k] for k in strategy_attributes}) + return cls(**attributes_mapping) + + @classmethod + def cold_start( + cls, + action_ids: Optional[Set[ActionId]] = None, + epsilon: Optional[Float01] = None, + default_action: Optional[ActionId] = None, + **kwargs, + ) -> "BaseMab": + """ + Factory method to create a Multi-Armed Bandit with Thompson Sampling, with default + parameters. + + Parameters + ---------- + action_ids: Optional[Set[ActionId]] + The list of possible actions. + epsilon: Optional[Float01] + epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. + default_action: Optional[ActionId] + The default action to select with a probability of epsilon when using the epsilon-greedy approach. + If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. + kwargs: Dict[str, Any] + Additional parameters for the mab and for the action model. + + Returns + ------- + mab: BaseMab + Multi-Armed Bandit + """ + action_specific_kwargs, kwargs = cls._extract_action_specific_kwargs(**kwargs) + + # Extract inner_action_ids + inner_action_ids = action_ids or set(action_specific_kwargs.keys()) + if not inner_action_ids: + raise ValueError( + "inner_action_ids should be provided either directly or via keyword argument in the form of " + "action_id_{model argument name} = {action_id: value}." + ) + + # Assign model for each action + action_model_cold_start, action_general_kwargs = cls._extract_action_model_class_and_attributes(**kwargs) + actions = {} + for a in inner_action_ids: + actions[a] = action_model_cold_start(**action_general_kwargs, **action_specific_kwargs.get(a, {})) + + # Instantiate the MAB + strategy_kwargs = {k: kwargs[k] for k in kwargs.keys() if k not in action_general_kwargs.keys()} + strategy_class = cls.model_fields["strategy"].annotation + strategy = strategy_class(**strategy_kwargs) + mab = cls(actions=actions, strategy=strategy, epsilon=epsilon, default_action=default_action) + # For contextual multi-armed bandit, until the very first update the model will predict actions randomly, + # where each action has equal probability to be selected. + if hasattr(mab, "predict_actions_randomly"): + mab.predict_actions_randomly = True + return mab + + @staticmethod + def _extract_action_specific_kwargs(**kwargs) -> Tuple[Dict[str, Dict], Dict[str, Any]]: + """ + Utility function to extract kwargs that are specific for each action when constructing the action model. + + Parameters + ---------- + kwargs : Dict[str, Any] + Additional parameters for the mab and for the action model. + + Returns + ------- + action_specific_kwargs : Dict[str, Dict] + Dictionary of actions and the parameters of their associated model. + kwargs : Dict[str, Any] + Dictionary of parameters and their values, without the action_specific_kwargs. + """ + action_specific_kwargs = defaultdict(dict) + for keyword in list(kwargs): + argument = kwargs[keyword] + if keyword.startswith(ACTION_IDS_PREFIX) and type(argument) is dict: + kwargs.pop(keyword) + inner_keyword = keyword.split(ACTION_IDS_PREFIX)[1] + for action_id, value in argument.items(): + action_specific_kwargs[action_id][inner_keyword] = value + return dict(action_specific_kwargs), kwargs + + @classmethod + def _extract_action_model_class_and_attributes(cls, **kwargs) -> Tuple[Callable, Dict[str, Dict]]: + """ + Utility function to extract kwargs that are specific for each action when constructing the action model. + + Parameters + ---------- + kwargs : Dict[str, Any] + Additional parameters for the mab and for the action model. + + Returns + ------- + action_model_cold_start : Callable + Function handle for factoring the required action model. + action_general_kwargs : Dict[str, any] + Dictionary of parameters and their values for the action model. + """ + action_model_class = get_args(cls.model_fields["actions"].annotation)[1] + if hasattr(action_model_class, "cold_start"): + action_model_cold_start_init = action_model_cold_start = action_model_class.cold_start + else: + action_model_cold_start_init = action_model_class.__init__ + action_model_cold_start = action_model_class + + action_model_attributes = extract_argument_names_from_function(action_model_cold_start_init, True) + + action_general_kwargs = {k: kwargs[k] for k in action_model_attributes if k in kwargs.keys()} + return action_model_cold_start, action_general_kwargs diff --git a/pybandits/model.py b/pybandits/model.py index c94ba1f..29becce 100644 --- a/pybandits/model.py +++ b/pybandits/model.py @@ -21,8 +21,9 @@ # SOFTWARE. +from abc import ABC, abstractmethod from random import betavariate -from typing import List, Tuple +from typing import Any, List, Tuple from numpy import array, c_, exp, insert, mean, multiply, ones, sqrt, std from numpy.typing import ArrayLike @@ -41,7 +42,25 @@ from pytensor.tensor import dot from scipy.stats import t -from pybandits.base import BinaryReward, Model, Probability, PyBanditsBaseModel +from pybandits.base import BinaryReward, Probability, PyBanditsBaseModel + + +class Model(PyBanditsBaseModel, ABC): + """ + Class to model the prior distributions. + """ + + @abstractmethod + def sample_proba(self) -> Probability: + """ + Sample the probability of getting a positive reward. + """ + + @abstractmethod + def update(self, rewards: List[Any]): + """ + Update the model parameters. + """ class BaseBeta(Model): @@ -131,7 +150,7 @@ class BetaCC(BaseBeta): cost: NonNegativeFloat -class BaseBetaMO(Model): +class BetaMO(Model): """ Beta Distribution model for Bernoulli multi-armed bandits with multi-objectives. @@ -173,19 +192,36 @@ def update(self, rewards: List[List[BinaryReward]]): for i, counter in enumerate(self.counters): counter.update([r[i] for r in rewards]) + @classmethod + def cold_start(cls, n_objectives: PositiveInt, **kwargs) -> "BetaMO": + """ + Utility function to create a Bayesian Logistic Regression model or child model with cost control, + with default parameters. -class BetaMO(BaseBetaMO): - """ - Beta Distribution model for Bernoulli multi-armed bandits with multi-objectives. + It is modeled as: - Parameters - ---------- - counters: List[Beta] of shape (n_objectives,) - List of Beta distributions. - """ + y = sigmoid(alpha + beta1 * x1 + beta2 * x2 + ... + betaN * xN) + + where the alpha and betas coefficients are Student's t-distributions. + + Parameters + ---------- + n_betas : PositiveInt + The number of betas of the Bayesian Logistic Regression model. This is also the number of features expected + after in the context matrix. + kwargs: Dict[str, Any] + Additional arguments for the Bayesian Logistic Regression child model. + + Returns + ------- + blr: BayesianLogisticRegrssion + The Bayesian Logistic Regression model. + """ + counters = n_objectives * [Beta()] + return cls(counters=counters, **kwargs) -class BetaMOCC(BaseBetaMO): +class BetaMOCC(BetaMO): """ Beta Distribution model for Bernoulli multi-armed bandits with multi-objectives and cost control. @@ -219,7 +255,7 @@ class StudentT(PyBanditsBaseModel): nu: confloat(allow_inf_nan=False) = 5.0 -class BaseBayesianLogisticRegression(Model): +class BayesianLogisticRegression(Model): """ Base Bayesian Logistic Regression model. @@ -240,7 +276,7 @@ class BaseBayesianLogisticRegression(Model): """ alpha: StudentT - betas: List[StudentT] = Field(..., min_items=1) + betas: List[StudentT] = Field(..., min_length=1) @validate_call(config=dict(arbitrary_types_allowed=True)) def check_context_matrix(self, context: ArrayLike): @@ -379,29 +415,35 @@ def update( self.betas[i].mu = mean(trace["beta" + str(i)]) self.betas[i].sigma = std(trace["beta" + str(i)], ddof=1) + @classmethod + def cold_start(cls, n_features: PositiveInt, **kwargs) -> "BayesianLogisticRegression": + """ + Utility function to create a Bayesian Logistic Regression model or child model with cost control, + with default parameters. -class BayesianLogisticRegression(BaseBayesianLogisticRegression): - """ - Bayesian Logistic Regression model. + It is modeled as: - It is modeled as: + y = sigmoid(alpha + beta1 * x1 + beta2 * x2 + ... + betaN * xN) - y = sigmoid(alpha + beta1 * x1 + beta2 * x2 + ... + betaN * xN) + where the alpha and betas coefficients are Student's t-distributions. - where the alpha and betas coefficients are Student's t-distributions. + Parameters + ---------- + n_features : PositiveInt + The number of betas of the Bayesian Logistic Regression model. This is also the number of features expected + after in the context matrix. + kwargs: Dict[str, Any] + Additional arguments for the Bayesian Logistic Regression child model. - Parameters - ---------- - alpha: StudentT - Student's t-distribution of the alpha coefficient. - betas: StudentT - Student's t-distributions of the betas coefficients. - params_sample: Dict - Parameters for the function pymc.sample() - """ + Returns + ------- + blr: BayesianLogisticRegrssion + The Bayesian Logistic Regression model. + """ + return cls(alpha=StudentT(), betas=[StudentT() for _ in range(n_features)], **kwargs) -class BayesianLogisticRegressionCC(BaseBayesianLogisticRegression): +class BayesianLogisticRegressionCC(BayesianLogisticRegression): """ Bayesian Logistic Regression model with cost control. @@ -424,55 +466,3 @@ class BayesianLogisticRegressionCC(BaseBayesianLogisticRegression): """ cost: NonNegativeFloat - - -def create_bayesian_logistic_regression_cold_start(n_betas: PositiveInt) -> BayesianLogisticRegression: - """ - Utility function to create a Bayesian Logistic Regression model, with default parameters. - - It is modeled as: - - y = sigmoid(alpha + beta1 * x1 + beta2 * x2 + ... + betaN * xN) - - where the alpha and betas coefficients are Student's t-distributions. - - Parameters - ---------- - n_betas : PositiveInt - The number of betas of the Bayesian Logistic Regression model. This is also the number of features expected - after in the context matrix. - - Returns - ------- - blr: BayesianLogisticRegression - The Bayesian Logistic Regression model. - """ - return BayesianLogisticRegression(alpha=StudentT(), betas=[StudentT() for _ in range(n_betas)]) - - -def create_bayesian_logistic_regression_cc_cold_start( - n_betas: PositiveInt, cost: NonNegativeFloat -) -> BayesianLogisticRegressionCC: - """ - Utility function to create a Bayesian Logistic Regression model with cost control, with default parameters. - - It is modeled as: - - y = sigmoid(alpha + beta1 * x1 + beta2 * x2 + ... + betaN * xN) - - where the alpha and betas coefficients are Student's t-distributions. - - Parameters - ---------- - n_betas : PositiveInt - The number of betas of the Bayesian Logistic Regression model. This is also the number of features expected - after in the context matrix. - cost: NonNegativeFloat - Cost associated to the Bayesian Logistic Regression model. - - Returns - ------- - blr: BayesianLogisticRegressionCC - The Bayesian Logistic Regression model. - """ - return BayesianLogisticRegressionCC(alpha=StudentT(), betas=[StudentT() for _ in range(n_betas)], cost=cost) diff --git a/pybandits/smab.py b/pybandits/smab.py index 65e4bb1..00ded40 100644 --- a/pybandits/smab.py +++ b/pybandits/smab.py @@ -20,26 +20,27 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. + from collections import defaultdict -from typing import Dict, List, Optional, Set, Tuple, Union +from typing import Dict, List, Optional, Set, Union -from pydantic import NonNegativeFloat, PositiveInt, field_validator, validate_call +from pydantic import PositiveInt, field_validator, validate_call from pybandits.base import ( ActionId, - BaseMab, BinaryReward, - Float01, Probability, - Strategy, + SmabPredictions, ) -from pybandits.model import BaseBeta, BaseBetaMO, Beta, BetaCC, BetaMO, BetaMOCC +from pybandits.mab import BaseMab +from pybandits.model import BaseBeta, Beta, BetaCC, BetaMO, BetaMOCC from pybandits.strategy import ( BestActionIdentification, ClassicBandit, CostControlBandit, MultiObjectiveBandit, MultiObjectiveCostControlBandit, + Strategy, ) @@ -62,7 +63,7 @@ def predict( self, n_samples: PositiveInt = 1, forbidden_actions: Optional[Set[ActionId]] = None, - ) -> Tuple[List[ActionId], List[Dict[ActionId, Probability]]]: + ) -> SmabPredictions: """ Predict actions. @@ -95,7 +96,7 @@ def predict( return selected_actions, probs @validate_call - def update(self, actions: List[ActionId], rewards: List[Union[BinaryReward, List[BinaryReward]]]): + def update(self, actions: List[ActionId], rewards: Union[List[BinaryReward], List[List[BinaryReward]]]): """ Update the stochastic Bernoulli bandit given the list of selected actions and their corresponding binary rewards. @@ -111,7 +112,8 @@ def update(self, actions: List[ActionId], rewards: List[Union[BinaryReward, List If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2): rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...] """ - self._check_update_params(actions=actions, rewards=rewards) + + self._validate_update_params(actions=actions, rewards=rewards) rewards_dict = defaultdict(list) @@ -140,22 +142,6 @@ class SmabBernoulli(BaseSmabBernoulli): actions: Dict[ActionId, Beta] strategy: ClassicBandit - def __init__( - self, - actions: Dict[ActionId, Beta], - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, - ): - super().__init__(actions=actions, strategy=ClassicBandit(), epsilon=epsilon, default_action=default_action) - - @classmethod - def from_state(cls, state: dict) -> "SmabBernoulli": - return cls(actions=state["actions"]) - - @validate_call - def update(self, actions: List[ActionId], rewards: List[BinaryReward]): - super().update(actions=actions, rewards=rewards) - class SmabBernoulliBAI(BaseSmabBernoulli): """ @@ -175,24 +161,6 @@ class SmabBernoulliBAI(BaseSmabBernoulli): actions: Dict[ActionId, Beta] strategy: BestActionIdentification - def __init__( - self, - actions: Dict[ActionId, Beta], - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, - exploit_p: Optional[Float01] = None, - ): - strategy = BestActionIdentification() if exploit_p is None else BestActionIdentification(exploit_p=exploit_p) - super().__init__(actions=actions, strategy=strategy, epsilon=epsilon, default_action=default_action) - - @classmethod - def from_state(cls, state: dict) -> "SmabBernoulliBAI": - return cls(actions=state["actions"], exploit_p=state["strategy"].get("exploit_p", None)) - - @validate_call - def update(self, actions: List[ActionId], rewards: List[BinaryReward]): - super().update(actions=actions, rewards=rewards) - class SmabBernoulliCC(BaseSmabBernoulli): """ @@ -220,24 +188,6 @@ class SmabBernoulliCC(BaseSmabBernoulli): actions: Dict[ActionId, BetaCC] strategy: CostControlBandit - def __init__( - self, - actions: Dict[ActionId, BetaCC], - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, - subsidy_factor: Optional[Float01] = None, - ): - strategy = CostControlBandit() if subsidy_factor is None else CostControlBandit(subsidy_factor=subsidy_factor) - super().__init__(actions=actions, strategy=strategy, epsilon=epsilon, default_action=default_action) - - @classmethod - def from_state(cls, state: dict) -> "SmabBernoulliCC": - return cls(actions=state["actions"], subsidy_factor=state["strategy"].get("subsidy_factor", None)) - - @validate_call - def update(self, actions: List[ActionId], rewards: List[BinaryReward]): - super().update(actions=actions, rewards=rewards) - class BaseSmabBernoulliMO(BaseSmabBernoulli): """ @@ -252,21 +202,17 @@ class BaseSmabBernoulliMO(BaseSmabBernoulli): The strategy used to select actions. """ - actions: Dict[ActionId, BaseBetaMO] + actions: Dict[ActionId, BetaMO] strategy: Strategy @field_validator("actions", mode="after") @classmethod - def all_actions_have_same_number_of_objectives(cls, actions: Dict[ActionId, BaseBetaMO]): + def all_actions_have_same_number_of_objectives(cls, actions: Dict[ActionId, BetaMO]): n_objs_per_action = [len(beta.counters) for beta in actions.values()] if len(set(n_objs_per_action)) != 1: raise ValueError("All actions should have the same number of objectives") return actions - @validate_call - def update(self, actions: List[ActionId], rewards: List[List[BinaryReward]]): - super().update(actions=actions, rewards=rewards) - class SmabBernoulliMO(BaseSmabBernoulliMO): """ @@ -291,20 +237,6 @@ class SmabBernoulliMO(BaseSmabBernoulliMO): actions: Dict[ActionId, BetaMO] strategy: MultiObjectiveBandit - def __init__( - self, - actions: Dict[ActionId, Beta], - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, - ): - super().__init__( - actions=actions, strategy=MultiObjectiveBandit(), epsilon=epsilon, default_action=default_action - ) - - @classmethod - def from_state(cls, state: dict) -> "SmabBernoulliMO": - return cls(actions=state["actions"]) - class SmabBernoulliMOCC(BaseSmabBernoulliMO): """ @@ -324,217 +256,3 @@ class SmabBernoulliMOCC(BaseSmabBernoulliMO): actions: Dict[ActionId, BetaMOCC] strategy: MultiObjectiveCostControlBandit - - def __init__( - self, - actions: Dict[ActionId, Beta], - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, - ): - super().__init__( - actions=actions, strategy=MultiObjectiveCostControlBandit(), epsilon=epsilon, default_action=default_action - ) - - @classmethod - def from_state(cls, state: dict) -> "SmabBernoulliMOCC": - return cls(actions=state["actions"]) - - -@validate_call -def create_smab_bernoulli_cold_start( - action_ids: Set[ActionId], epsilon: Optional[Float01] = None, default_action: Optional[ActionId] = None -) -> SmabBernoulli: - """ - Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling, with default - parameters. - - Parameters - ---------- - action_ids: Set[ActionId] - The list of possible actions. - epsilon: Optional[Float01] - epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - - Returns - ------- - smab: SmabBernoulli - Stochastic Multi-Armed Bandit with strategy = ClassicBandit - """ - actions = {} - for a in set(action_ids): - actions[a] = Beta() - return SmabBernoulli(actions=actions, epsilon=epsilon, default_action=default_action) - - -@validate_call -def create_smab_bernoulli_bai_cold_start( - action_ids: Set[ActionId], - exploit_p: Optional[Float01] = None, - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, -) -> SmabBernoulliBAI: - """ - Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling, and Best Action - Identification strategy, with default parameters. - - Reference: Analysis of Thompson Sampling for the Multi-armed Bandit Problem (Agrawal and Goyal, 2012) - http://proceedings.mlr.press/v23/agrawal12/agrawal12.pdf - - Parameters - ---------- - action_ids: Set[ActionId] - The list of possible actions. - exploit_p: Float_0_1 (default=0.5) - Number in [0, 1] which specifies the amount of exploitation. - If exploit_p is 1, the bandits always selects the action with highest probability of getting a positive reward, - (it behaves as a Greedy strategy). - If exploit_p is 0, the bandits always select the action with 2nd highest probability of getting a positive - reward. - epsilon: Optional[Float01] - epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - - Returns - ------- - smab: SmabBernoulliBAI - Stochastic Multi-Armed Bandit with strategy = BestActionIdentification - """ - actions = {} - for a in set(action_ids): - actions[a] = Beta() - return SmabBernoulliBAI(actions=actions, epsilon=epsilon, default_action=default_action, exploit_p=exploit_p) - - -@validate_call -def create_smab_bernoulli_cc_cold_start( - action_ids_cost: Dict[ActionId, NonNegativeFloat], - subsidy_factor: Optional[Float01] = None, - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, -) -> SmabBernoulliCC: - """ - Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling, and Cost Control - strategy, with default parameters. - - The sMAB is extended to include a control of the action cost. Each action is associated with a predefined "cost". - At prediction time, the model considers the actions whose expected rewards is above a pre-defined lower bound. Among - these actions, the one with the lowest associated cost is recommended. The expected reward interval for feasible - actions is defined as [(1-subsidy_factor) * max_p, max_p], where max_p is the highest expected reward sampled value. - - Reference: Thompson Sampling for Contextual Bandit Problems with Auxiliary Safety Constraints (Daulton et al., 2019) - https://arxiv.org/abs/1911.00638 - - Multi-Armed Bandits with Cost Subsidy (Sinha et al., 2021) - https://arxiv.org/abs/2011.01488 - - Parameters - ---------- - action_ids_cost: Dict[ActionId, NonNegativeFloat] - The list of possible actions, and their cost. - subsidy_factor: Optional[Float_0_1], default=0.5 - Number in [0, 1] to define smallest tolerated probability reward, hence the set of feasible actions. - If subsidy_factor is 1, the bandits always selects the action with the minimum cost. - If subsidy_factor is 0, the bandits always selects the action with highest probability of getting a positive - reward (it behaves as a classic Bernoulli bandit). - epsilon: Optional[Float01] - epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - - Returns - ------- - smab: SmabBernoulliCC - Stochastic Multi-Armed Bandit with strategy = CostControlBandit - """ - actions = {} - for a, cost in action_ids_cost.items(): - actions[a] = BetaCC(cost=cost) - return SmabBernoulliCC( - actions=actions, epsilon=epsilon, default_action=default_action, subsidy_factor=subsidy_factor - ) - - -@validate_call -def create_smab_bernoulli_mo_cold_start( - action_ids: Set[ActionId], - n_objectives: PositiveInt, - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, -) -> SmabBernoulliMO: - """ - Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling, and Multi-Objectives - strategy, with default parameters. - - The reward pertaining to an action is a multidimensional vector instead of a scalar value. In this setting, - different actions are compared according to Pareto order between their expected reward vectors, and those actions - whose expected rewards are not inferior to that of any other actions are called Pareto optimal actions, all of which - constitute the Pareto front. - - Reference: Thompson Sampling for Multi-Objective Multi-Armed Bandits Problem (Yahyaa and Manderick, 2015) - https://www.researchgate.net/publication/272823659_Thompson_Sampling_for_Multi-Objective_Multi-Armed_Bandits_Problem - - Parameters - ---------- - action_ids: Set[ActionId] - The list of possible actions. - n_objectives: PositiveInt - The number of objectives to optimize. The bandit assumes the same number of objectives for all actions. - epsilon: Optional[Float01] - epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - - Returns - ------- - smab: SmabBernoulliMO - Stochastic Multi-Armed Bandit with strategy = MultiObjectiveBandit - """ - actions = {} - for a in set(action_ids): - actions[a] = BetaMO(counters=n_objectives * [Beta()]) - return SmabBernoulliMO(actions=actions, epsilon=epsilon, default_action=default_action) - - -@validate_call -def create_smab_bernoulli_mo_cc_cold_start( - action_ids_cost: Dict[ActionId, NonNegativeFloat], - n_objectives: PositiveInt, - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, -) -> SmabBernoulliMOCC: - """ - Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling implementation for - Multi-Objective (MO) with Cost Control (CC) strategy, with default parameters. - - This Bandit allows the reward to be a multidimensional vector and include a control of the action cost. It merges - the Multi-Objective and Cost Control strategies. - - Parameters - ---------- - action_ids_cost: Dict[ActionId, NonNegativeFloat] - The list of possible actions, and their cost. - n_objectives: PositiveInt - The number of objectives to optimize. The bandit assumes the same number of objectives for all actions. - epsilon: Optional[Float01] - epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - - - Returns - ------- - smab: SmabBernoulliMO - Stochastic Multi-Armed Bandit with strategy = MultiObjectiveBandit - """ - actions = {} - for a, cost in action_ids_cost.items(): - actions[a] = BetaMOCC(counters=n_objectives * [Beta()], cost=cost) - return SmabBernoulliMOCC(actions=actions, epsilon=epsilon, default_action=default_action) diff --git a/pybandits/strategy.py b/pybandits/strategy.py index bc225f2..232c3ad 100644 --- a/pybandits/strategy.py +++ b/pybandits/strategy.py @@ -20,15 +20,59 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from abc import ABC, abstractmethod from random import random -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional, Self, Union import numpy as np -from pydantic import validate_call +from pydantic import field_validator, validate_call from scipy.stats import ttest_ind_from_stats -from pybandits.base import ActionId, Float01, Model, Probability, Strategy -from pybandits.model import Beta, BetaMOCC +from pybandits.base import ActionId, Float01, Probability, PyBanditsBaseModel +from pybandits.model import Beta, BetaMOCC, Model + + +class Strategy(PyBanditsBaseModel, ABC): + """ + Strategy to select actions in multi-armed bandits. + """ + + def _with_argument(self, argument_name: str, argument_value: Any) -> Self: + """ + Instantiate a mutated strategy with an altered argument_value for argument_name. + + Parameters + ---------- + argument_name: str + The name of the argument. + argument_value: Any + The value of the argument. + + Returns + ------- + mutated_strategy: Strategy + The mutated strategy. + """ + state: dict = self.model_dump() + state[argument_name] = argument_value + mutated_strategy = self.__class__(**state) + return mutated_strategy + + @abstractmethod + def select_action(self, p: Dict[ActionId, Probability], actions: Optional[Dict[ActionId, Model]]) -> ActionId: + """ + Select the action. + """ + + @classmethod + @validate_call + def numerize_field(cls, v, field_name: str): + return v if v is not None else cls.model_fields[field_name].default + + @classmethod + @validate_call + def get_expected_value_from_state(cls, state: Dict[str, Any], field_name: str) -> float: + return cls.numerize_field(state["strategy"].get(field_name), field_name) class ClassicBandit(Strategy): @@ -53,8 +97,10 @@ def select_action( Parameters ---------- - p: Dict[ActionId, Probability] - The dictionary or actions and their sampled probability of getting a positive reward. + p : Dict[ActionId, Probability] + The dictionary of actions and their sampled probability of getting a positive reward. + actions : Optional[Dict[ActionId, Model]] + The dictionary of actions and their associated model. Returns ------- @@ -73,32 +119,44 @@ class BestActionIdentification(Strategy): Parameters ---------- - exploit_p: Float_0_1 (default=0.5) + exploit_p: Optional[Float01], 0.5 if not specified Tuning parameter taking value in [0, 1] which specifies the probability of selecting the best or an alternative action. - If exploit_p is 1, the bandits always selects the action with highest probability of getting a positive reward, - (it behaves as a Greedy strategy). - If exploit_p is 0, the bandits always select the action with 2nd highest probability of getting a positive + If exploit_p is 1, the bandit always selects the action with the highest probability of + getting a positive reward. That is, it behaves as a Greedy strategy. + If exploit_p is 0, the bandit always select the action with 2nd highest probability of getting a positive reward. """ - exploit_p: Float01 = 0.5 + exploit_p: Optional[Float01] = 0.5 + + @field_validator("exploit_p", mode="before") + @classmethod + def numerize_exploit_p(cls, v): + return cls.numerize_field(v, "exploit_p") @validate_call - def set_exploit_p(self, exploit_p: Float01): + def with_exploit_p(self, exploit_p: Optional[Float01]) -> Self: """ - Set exploit_p. + Instantiate a mutated cost control bandit strategy with an altered subsidy factor. Parameters ---------- - exploit_p: Float_0_1 (default=0.5) - Number in [0, 1] which specifies the amount of exploitation. - If exploit_p is 1, the bandits always selects the action with highest probability of getting a positive - reward (it behaves as a Greedy strategy). - If exploit_p is 0, the bandits always select the action with 2nd highest probability of getting a positive + exploit_p: Optional[Float01], 0.5 if not specified + Tuning parameter taking value in [0, 1] which specifies the probability of selecting the best or an alternative + action. + If exploit_p is 1, the bandit always selects the action with the highest probability of + getting a positive reward. That is, it behaves as a Greedy strategy. + If exploit_p is 0, the bandit always select the action with 2nd highest probability of getting a positive reward. + + Returns + ------- + mutated_best_action_identification : BestActionIdentification + The mutated best action identification strategy. """ - self.exploit_p = exploit_p + mutated_best_action_identification = self._with_argument("exploit_p", exploit_p) + return mutated_best_action_identification @validate_call def select_action( @@ -113,8 +171,10 @@ def select_action( Parameters ---------- - p: Dict[ActionId, Probability] - The dictionary or actions and their sampled probability of getting a positive reward. + p : Dict[ActionId, Probability] + The dictionary of actions and their sampled probability of getting a positive reward. + actions : Optional[Dict[ActionId, Model]] + The dictionary of actions and their associated model. Returns ------- @@ -167,7 +227,56 @@ def compare_best_actions(self, actions: Dict[ActionId, Beta]) -> float: return pvalue -class CostControlBandit(Strategy): +class CostControlStrategy(Strategy, ABC): + """ + Cost Control (CC) strategy for multi-armed bandits. + + Bandits are extended to include a control of the action cost. Each action is associated with a predefined "cost". + """ + + @classmethod + @validate_call + def _average(cls, p_of_action: Union[Probability, List[Probability]]): + return np.mean(p_of_action) + + @classmethod + @validate_call + def _evaluate_and_select( + cls, + p: Union[Dict[ActionId, Probability], Dict[ActionId, List[Probability]]], + actions: Dict[ActionId, Model], + feasible_actions: List[ActionId], + ) -> ActionId: + """ + Evaluate the feasible actions and select the one with the minimum cost. + + Parameters + ---------- + p: Union[Dict[ActionId, Probability], Dict[ActionId, List[Probability]]] + The dictionary of actions and their sampled probability of getting a positive reward. + actions: Dict[ActionId, Model] + The dictionary of actions and their associated model. + feasible_actions: List[ActionId] + The list of feasible actions. + + Returns + ------- + selected_action: ActionId + The selected action. + """ + # feasible actions enriched with their characteristics (cost, np.mean(probabilities), action_id) + # the negative probability ensures that if we order the actions based on their minimum values the one with + # higher probability will be selected + sortable_actions = [(actions[a].cost, -cls._average(p[a]), a) for a in feasible_actions] + + # select the action with the min cost (and the highest mean of probabilities in case of cost equality) + _, _, selected_action = sorted(sortable_actions)[0] + + # return cheapest action from the set of feasible actions + return selected_action + + +class CostControlBandit(CostControlStrategy): """ Cost Control (CC) strategy for multi-armed bandits. @@ -185,18 +294,40 @@ class CostControlBandit(Strategy): Parameters ---------- - subsidy_factor: Optional[Float_0_1], default=0.5 + subsidy_factor: Optional[Float01], 0.5 if not specified Number in [0, 1] to define smallest tolerated probability reward, hence the set of feasible actions. If subsidy_factor is 1, the bandits always selects the action with the minimum cost. If subsidy_factor is 0, the bandits always selects the action with highest probability of getting a positive reward (it behaves as a classic Bernoulli bandit). """ - subsidy_factor: Float01 = 0.5 + subsidy_factor: Optional[Float01] = 0.5 + + @field_validator("subsidy_factor", mode="before") + @classmethod + def numerize_subsidy_factor(cls, v): + return cls.numerize_field(v, "subsidy_factor") @validate_call - def set_subsidy_factor(self, subsidy_factor: Float01): - self.subsidy_factor = subsidy_factor + def with_subsidy_factor(self, subsidy_factor: Optional[Float01]) -> Self: + """ + Instantiate a mutated cost control bandit strategy with an altered subsidy factor. + + Parameters + ---------- + subsidy_factor : Optional[Float01], 0.5 if not specified + Number in [0, 1] to define smallest tolerated probability reward, hence the set of feasible actions. + If subsidy_factor is 1, the bandits always selects the action with the minimum cost. + If subsidy_factor is 0, the bandits always selects the action with highest probability of getting a positive + reward (it behaves as a classic Bernoulli bandit). + + Returns + ------- + mutated_cost_control_bandit : CostControlBandit + The mutated cost control bandit strategy. + """ + mutated_cost_control_bandit = self._with_argument("subsidy_factor", subsidy_factor) + return mutated_cost_control_bandit @validate_call def select_action(self, p: Dict[ActionId, Probability], actions: Dict[ActionId, Model]) -> ActionId: @@ -223,65 +354,63 @@ def select_action(self, p: Dict[ActionId, Probability], actions: Dict[ActionId, # define the set of feasible actions feasible_actions = [a for a in p.keys() if p[a] >= (1 - self.subsidy_factor) * max_p] - # feasible actions enriched with their characteristics (cost, -probability, action_id) - # the negative probability ensures that if we order the actions based on their minimum values the one with - # higher proba will be selected - sortable_actions = [(actions[a].cost, -p[a], a) for a in feasible_actions] - - # select the action with the cheapest cost (and the highest probability in case of cost equality) - _, _, selected_action = sorted(sortable_actions)[0] - - # return cheapest action from the set of feasible actions + selected_action = self._evaluate_and_select(p, actions, feasible_actions) return selected_action -@validate_call -def get_pareto_front(p: Dict[ActionId, List[Probability]]) -> List[ActionId]: +class MultiObjectiveStrategy(Strategy, ABC): """ - Create Pareto optimal set of actions (Pareto front) A* identified as actions that are not dominated by any action - out of the set A*. - - Parameters: - ----------- - p: Dict[ActionId, Probability] - The dictionary or actions and their sampled probability of getting a positive reward for each objective. - - Return - ------ - pareto_front: set - The list of Pareto optimal actions + Multi Objective Strategy to select actions in multi-armed bandits. """ - # store non dominated actions - pareto_front = [] - for this_action in p.keys(): - is_pareto = True # we assume that action is Pareto Optimal until proven otherwise - other_actions = [a for a in p.keys() if a != this_action] + @classmethod + @validate_call + def get_pareto_front(cls, p: Dict[ActionId, List[Probability]]) -> List[ActionId]: + """ + Create Pareto optimal set of actions (Pareto front) A* identified as actions that are not dominated by + any action out of the set A*. - for other_action in other_actions: - # check if this_action is not dominated by other_action based on - # multiple objectives reward prob vectors - is_dominated = not ( - # an action cannot be dominated by an identical one - (p[this_action] == p[other_action]) - # otherwise, apply the classical definition - or any(p[this_action][i] > p[other_action][i] for i in range(len(p[this_action]))) - ) + Parameters: + ----------- + p: Dict[ActionId, Probability] + The dictionary or actions and their sampled probability of getting a positive reward for each objective. - if is_dominated: - # this_action dominated by at least one other_action, - # this_action is not pareto optimal - is_pareto = False - break + Return + ------ + pareto_front: set + The list of Pareto optimal actions + """ + # store non dominated actions + pareto_front = [] + + for this_action in p.keys(): + is_pareto = True # we assume that action is Pareto Optimal until proven otherwise + other_actions = [a for a in p.keys() if a != this_action] + + for other_action in other_actions: + # check if this_action is not dominated by other_action based on + # multiple objectives reward prob vectors + is_dominated = not ( + # an action cannot be dominated by an identical one + (p[this_action] == p[other_action]) + # otherwise, apply the classical definition + or any(p[this_action][i] > p[other_action][i] for i in range(len(p[this_action]))) + ) - if is_pareto: - # this_action is pareto optimal - pareto_front.append(this_action) + if is_dominated: + # this_action dominated by at least one other_action, + # this_action is not pareto optimal + is_pareto = False + break - return pareto_front + if is_pareto: + # this_action is pareto optimal + pareto_front.append(this_action) + return pareto_front -class MultiObjectiveBandit(Strategy): + +class MultiObjectiveBandit(MultiObjectiveStrategy): """ Multi-Objective (MO) strategy for multi-armed bandits. @@ -292,11 +421,6 @@ class MultiObjectiveBandit(Strategy): Reference: Thompson Sampling for Multi-Objective Multi-Armed Bandits Problem (Yahyaa and Manderick, 2015) https://www.researchgate.net/publication/272823659_Thompson_Sampling_for_Multi-Objective_Multi-Armed_Bandits_Problem - - Parameters - ---------- - n_objectives: int - Number of objectives to be solved by the bandit (n_objectives must be >= 1). """ @validate_call @@ -316,20 +440,15 @@ def select_action(self, p: Dict[ActionId, List[Probability]], **kwargs) -> Actio selected_action: ActionId The selected action. """ - return np.random.choice(get_pareto_front(p=p)) + return np.random.choice(self.get_pareto_front(p=p)) -class MultiObjectiveCostControlBandit(Strategy): +class MultiObjectiveCostControlBandit(MultiObjectiveStrategy, CostControlStrategy): """ Multi-Objective (MO) with Cost Control (CC) strategy for multi-armed bandits. This strategy allows the reward to be a multidimensional vector and include a control of the action cost. It merges the Multi-Objective and Cost Control strategies. - - Parameters - ---------- - n_objectives: int - Number of objectives to be solved by the bandit (n_objectives must be >= 1) """ @validate_call @@ -349,13 +468,7 @@ def select_action(self, p: Dict[ActionId, List[Probability]], actions: Dict[Acti selected_action: ActionId The selected action. """ - pareto_set = get_pareto_front(p=p) + pareto_set = self.get_pareto_front(p=p) - # feasible actions enriched with their characteristics (cost, np.mean(probabilities), action_id) - sortable_actions = [(actions[a].cost, -np.mean(p[a]), a) for a in pareto_set] - - # select the action with the min cost (and the highest mean of probabilities in case of cost equality) - _, _, selected_action = sorted(sortable_actions)[0] - - # return cheapest action from the set of feasible actions + selected_action = self._evaluate_and_select(p, actions, pareto_set) return selected_action diff --git a/pybandits/utils.py b/pybandits/utils.py index 45a69a6..62e6af7 100644 --- a/pybandits/utils.py +++ b/pybandits/utils.py @@ -1,5 +1,5 @@ import json -from typing import Any, Dict, List, Union +from typing import Any, Callable, Dict, List, Union from pydantic import validate_call @@ -19,3 +19,26 @@ def to_serializable_dict(d: Dict[str, Any]) -> Dict[str, JSONSerializable]: """ return json.loads(json.dumps(d, default=dict)) + + +@validate_call +def extract_argument_names_from_function(function_handle: Callable, is_class_method: bool = False) -> List[str]: + """ + Extract the argument names from a function handle. + + Parameters + ---------- + function_handle : Callable + Handle of a function to extract the argument names from + + is_class_method : bool, defaults to False + Whether the function is a class method + + Returns + ------- + argument_names : List[str] + List of argument names + """ + start_index = int(is_class_method) + argument_names = function_handle.__code__.co_varnames[start_index : function_handle.__code__.co_argcount] + return argument_names diff --git a/pyproject.toml b/pyproject.toml index 565164e..38022e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pybandits" -version = "0.5.1" +version = "1.0.0" description = "Python Multi-Armed Bandit Library" authors = [ "Dario d'Andrea ", diff --git a/tests/test_cmab.py b/tests/test_cmab.py index 5fe15e4..96099c9 100644 --- a/tests/test_cmab.py +++ b/tests/test_cmab.py @@ -28,26 +28,9 @@ from pydantic import NonNegativeFloat, ValidationError from pybandits.base import Float01 -from pybandits.cmab import ( - CmabBernoulli, - CmabBernoulliBAI, - CmabBernoulliCC, - create_cmab_bernoulli_bai_cold_start, - create_cmab_bernoulli_cc_cold_start, - create_cmab_bernoulli_cold_start, -) -from pybandits.model import ( - BayesianLogisticRegression, - BayesianLogisticRegressionCC, - StudentT, - create_bayesian_logistic_regression_cc_cold_start, - create_bayesian_logistic_regression_cold_start, -) -from pybandits.strategy import ( - BestActionIdentification, - ClassicBandit, - CostControlBandit, -) +from pybandits.cmab import CmabBernoulli, CmabBernoulliBAI, CmabBernoulliCC +from pybandits.model import BayesianLogisticRegression, BayesianLogisticRegressionCC, StudentT +from pybandits.strategy import BestActionIdentification, ClassicBandit, CostControlBandit from pybandits.utils import to_serializable_dict from tests.test_utils import is_serializable @@ -63,13 +46,13 @@ def test_create_cmab_bernoulli_cold_start(a_int): # n_features must be > 0 if a_int <= 0: with pytest.raises(ValidationError): - create_cmab_bernoulli_cold_start(action_ids={"a1", "a2"}, n_features=a_int) + CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=a_int) else: - mab1 = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2"}, n_features=a_int) + mab1 = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=a_int) mab2 = CmabBernoulli( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=a_int), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=a_int), + "a1": BayesianLogisticRegression.cold_start(n_features=a_int), + "a2": BayesianLogisticRegression.cold_start(n_features=a_int), } ) mab2.predict_actions_randomly = True @@ -83,21 +66,13 @@ def test_cmab_can_instantiate(n_features): CmabBernoulli() with pytest.raises(AttributeError): CmabBernoulli(actions={}) - with pytest.raises(AttributeError): - CmabBernoulli(actions={"a1": create_bayesian_logistic_regression_cold_start(n_betas=2)}) - with pytest.raises(TypeError): # strategy is not an argument of init + with pytest.warns(UserWarning): + CmabBernoulli(actions={"a1": BayesianLogisticRegression.cold_start(n_features=2)}) + with pytest.raises(ValidationError): # predict_with_proba is not an argument of init CmabBernoulli( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - }, - strategy=ClassicBandit(), - ) - with pytest.raises(TypeError): # predict_with_proba is not an argument of init - CmabBernoulli( - actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a1": BayesianLogisticRegression.cold_start(n_features=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), }, predict_with_proba=True, ) @@ -108,15 +83,22 @@ def test_cmab_can_instantiate(n_features): "a2": None, }, ) + CmabBernoulli( + actions={ + "a1": BayesianLogisticRegression.cold_start(n_features=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), + }, + strategy=ClassicBandit(), + ) mab = CmabBernoulli( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a1": BayesianLogisticRegression.cold_start(n_features=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), } ) - assert mab.actions["a1"] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) - assert mab.actions["a2"] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) + assert mab.actions["a1"] == BayesianLogisticRegression.cold_start(n_features=n_features) + assert mab.actions["a2"] == BayesianLogisticRegression.cold_start(n_features=n_features) assert not mab.predict_actions_randomly assert not mab.predict_with_proba mab.predict_with_proba = True @@ -133,17 +115,17 @@ def test_cmab_init_with_wrong_blr_models(a, b): with pytest.raises(AttributeError): CmabBernoulli( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=a), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=a), - "a3": create_bayesian_logistic_regression_cold_start(n_betas=b), + "a1": BayesianLogisticRegression.cold_start(n_features=a), + "a2": BayesianLogisticRegression.cold_start(n_features=a), + "a3": BayesianLogisticRegression.cold_start(n_features=b), } ) else: CmabBernoulli( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=a), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=b), - "a3": create_bayesian_logistic_regression_cold_start(n_betas=b), + "a1": BayesianLogisticRegression.cold_start(n_features=a), + "a2": BayesianLogisticRegression.cold_start(n_features=b), + "a3": BayesianLogisticRegression.cold_start(n_features=b), } ) @@ -153,13 +135,13 @@ def test_cmab_update(n_samples=100, n_features=3): rewards = np.random.choice([0, 1], size=n_samples).tolist() def run_update(context): - mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2"}, n_features=n_features) + mab = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=n_features) assert all( - [mab.actions[a] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) for a in set(actions)] + [mab.actions[a] == BayesianLogisticRegression.cold_start(n_features=n_features) for a in set(actions)] ) mab.update(context=context, actions=actions, rewards=rewards) assert all( - [mab.actions[a] != create_bayesian_logistic_regression_cold_start(n_betas=n_features) for a in set(actions)] + [mab.actions[a] != BayesianLogisticRegression.cold_start(n_features=n_features) for a in set(actions)] ) assert not mab.predict_actions_randomly @@ -183,13 +165,13 @@ def test_cmab_update_not_all_actions(n_samples=100, n_feat=3): actions = np.random.choice(["a3", "a4"], size=n_samples).tolist() rewards = np.random.choice([0, 1], size=n_samples).tolist() context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_feat)) - mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2", "a3", "a4"}, n_features=n_feat) + mab = CmabBernoulli.cold_start(action_ids={"a1", "a2", "a3", "a4"}, n_features=n_feat) mab.update(context=context, actions=actions, rewards=rewards) - assert mab.actions["a1"] == create_bayesian_logistic_regression_cold_start(n_betas=n_feat) - assert mab.actions["a2"] == create_bayesian_logistic_regression_cold_start(n_betas=n_feat) - assert mab.actions["a3"] != create_bayesian_logistic_regression_cold_start(n_betas=n_feat) - assert mab.actions["a4"] != create_bayesian_logistic_regression_cold_start(n_betas=n_feat) + assert mab.actions["a1"] == BayesianLogisticRegression.cold_start(n_features=n_feat) + assert mab.actions["a2"] == BayesianLogisticRegression.cold_start(n_features=n_feat) + assert mab.actions["a3"] != BayesianLogisticRegression.cold_start(n_features=n_feat) + assert mab.actions["a4"] != BayesianLogisticRegression.cold_start(n_features=n_feat) @settings(deadline=500) @@ -198,7 +180,7 @@ def test_cmab_update_shape_mismatch(n_samples, n_features): actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() rewards = np.random.choice([0, 1], size=n_samples).tolist() context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2"}, n_features=n_features) + mab = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=n_features) with pytest.raises(AttributeError): # actions shape mismatch mab.update(context=context, actions=actions[1:], rewards=rewards) @@ -216,7 +198,7 @@ def test_cmab_update_shape_mismatch(n_samples, n_features): @given(st.integers(min_value=1, max_value=1000), st.integers(min_value=1, max_value=100)) def test_cmab_predict_cold_start(n_samples, n_features): def run_predict(context): - mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2"}, n_features=n_features) + mab = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=n_features) selected_actions, probs, weighted_sums = mab.predict(context=context) assert mab.predict_actions_randomly assert all([a in ["a1", "a2"] for a in selected_actions]) @@ -247,7 +229,7 @@ def run_predict(context): mab = CmabBernoulli( actions={ "a1": BayesianLogisticRegression(alpha=StudentT(mu=1, sigma=2), betas=n_features * [StudentT()]), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), }, ) assert not mab.predict_actions_randomly @@ -274,7 +256,7 @@ def run_predict(context): @given(st.integers(min_value=1, max_value=10)) def test_cmab_predict_shape_mismatch(a_int): context = np.random.uniform(low=-1.0, high=1.0, size=(100, a_int - 1)) - mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2"}, n_features=a_int) + mab = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=a_int) with pytest.raises(AttributeError): mab.predict(context=context) with pytest.raises(AttributeError): @@ -299,20 +281,20 @@ def run_predict(mab): assert set(mab.predict(n_samples=1000, forbidden_actions={"a5", "a4", "a2", "a3", "a1"})[0]) # cold start mab - mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2", "a3", "a4", "a5"}, n_features=n_features) + mab = CmabBernoulli.cold_start(action_ids={"a1", "a2", "a3", "a4", "a5"}, n_features=n_features) run_predict(mab=mab) # not cold start mab mab = CmabBernoulli( actions={ "a1": BayesianLogisticRegression(alpha=StudentT(mu=1, sigma=2), betas=[StudentT(), StudentT(), StudentT()]), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a3": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), + "a3": BayesianLogisticRegression.cold_start(n_features=n_features), "a4": BayesianLogisticRegression(alpha=StudentT(mu=4, sigma=5), betas=[StudentT(), StudentT(), StudentT()]), - "a5": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a5": BayesianLogisticRegression.cold_start(n_features=n_features), }, ) - assert mab != create_cmab_bernoulli_cold_start(action_ids={"a1", "a2", "a3", "a4", "a5"}, n_features=n_features) + assert mab != CmabBernoulli.cold_start(action_ids={"a1", "a2", "a3", "a4", "a5"}, n_features=n_features) run_predict(mab=mab) @@ -321,7 +303,7 @@ def run_predict(mab): def test_cmab_get_state(mu, sigma, n_features): actions: dict = { "a1": BayesianLogisticRegression(alpha=StudentT(mu=mu, sigma=sigma), betas=n_features * [StudentT()]), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), } cmab = CmabBernoulli(actions=actions) @@ -402,25 +384,25 @@ def test_create_cmab_bernoulli_bai_cold_start(a_int): # n_features must be > 0 if a_int <= 0: with pytest.raises(ValidationError): - create_cmab_bernoulli_bai_cold_start(action_ids={"a1", "a2"}, n_features=a_int) + CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=a_int) else: # default exploit_p - mab1 = create_cmab_bernoulli_bai_cold_start(action_ids={"a1", "a2"}, n_features=a_int) + mab1 = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=a_int) mab2 = CmabBernoulliBAI( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=a_int), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=a_int), + "a1": BayesianLogisticRegression.cold_start(n_features=a_int), + "a2": BayesianLogisticRegression.cold_start(n_features=a_int), } ) mab2.predict_actions_randomly = True assert mab1 == mab2 # set exploit_p - mab1 = create_cmab_bernoulli_bai_cold_start(action_ids={"a1", "a2"}, n_features=a_int, exploit_p=0.42) + mab1 = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=a_int, exploit_p=0.42) mab2 = CmabBernoulliBAI( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=a_int), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=a_int), + "a1": BayesianLogisticRegression.cold_start(n_features=a_int), + "a2": BayesianLogisticRegression.cold_start(n_features=a_int), }, exploit_p=0.42, ) @@ -435,21 +417,13 @@ def test_cmab_bai_can_instantiate(n_features): CmabBernoulliBAI() with pytest.raises(AttributeError): CmabBernoulliBAI(actions={}) - with pytest.raises(AttributeError): - CmabBernoulliBAI(actions={"a1": create_bayesian_logistic_regression_cold_start(n_betas=2)}) - with pytest.raises(TypeError): # strategy is not an argument of init - CmabBernoulliBAI( - actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - }, - strategy=BestActionIdentification(), - ) - with pytest.raises(TypeError): # predict_with_proba is not an argument of init + with pytest.warns(UserWarning): + CmabBernoulliBAI(actions={"a1": BayesianLogisticRegression.cold_start(n_features=2)}) + with pytest.raises(ValidationError): # predict_with_proba is not an argument of init CmabBernoulliBAI( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a1": BayesianLogisticRegression.cold_start(n_features=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), }, predict_with_proba=True, ) @@ -460,27 +434,34 @@ def test_cmab_bai_can_instantiate(n_features): "a2": None, }, ) + CmabBernoulliBAI( + actions={ + "a1": BayesianLogisticRegression.cold_start(n_features=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), + }, + strategy=BestActionIdentification(), + ) mab = CmabBernoulliBAI( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a1": BayesianLogisticRegression.cold_start(n_features=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), } ) - assert mab.actions["a1"] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) - assert mab.actions["a2"] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) + assert mab.actions["a1"] == BayesianLogisticRegression.cold_start(n_features=n_features) + assert mab.actions["a2"] == BayesianLogisticRegression.cold_start(n_features=n_features) assert not mab.predict_actions_randomly assert not mab.predict_with_proba assert mab.strategy == BestActionIdentification() mab = CmabBernoulliBAI( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a1": BayesianLogisticRegression.cold_start(n_features=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), }, exploit_p=0.42, ) - assert mab.actions["a1"] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) - assert mab.actions["a2"] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) + assert mab.actions["a1"] == BayesianLogisticRegression.cold_start(n_features=n_features) + assert mab.actions["a2"] == BayesianLogisticRegression.cold_start(n_features=n_features) assert not mab.predict_actions_randomly assert not mab.predict_with_proba assert mab.strategy == BestActionIdentification(exploit_p=0.42) @@ -492,7 +473,7 @@ def test_cmab_bai_predict(n_samples, n_features): context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) # cold start - mab = create_cmab_bernoulli_bai_cold_start(action_ids={"a1", "a2"}, n_features=n_features) + mab = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=n_features) selected_actions, probs, weighted_sums = mab.predict(context=context) assert mab.predict_actions_randomly assert all([a in ["a1", "a2"] for a in selected_actions]) @@ -503,8 +484,8 @@ def test_cmab_bai_predict(n_samples, n_features): # not cold start mab = CmabBernoulliBAI( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a1": BayesianLogisticRegression.cold_start(n_features=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), }, exploit_p=0.42, ) @@ -517,15 +498,11 @@ def test_cmab_bai_update(n_samples=100, n_features=3): actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() rewards = np.random.choice([0, 1], size=n_samples).tolist() context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = create_cmab_bernoulli_bai_cold_start(action_ids={"a1", "a2"}, n_features=n_features) + mab = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=n_features) assert mab.predict_actions_randomly - assert all( - [mab.actions[a] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) for a in set(actions)] - ) + assert all([mab.actions[a] == BayesianLogisticRegression.cold_start(n_features=n_features) for a in set(actions)]) mab.update(context=context, actions=actions, rewards=rewards) - assert all( - [mab.actions[a] != create_bayesian_logistic_regression_cold_start(n_betas=n_features) for a in set(actions)] - ) + assert all([mab.actions[a] != BayesianLogisticRegression.cold_start(n_features=n_features) for a in set(actions)]) assert not mab.predict_actions_randomly @@ -539,7 +516,7 @@ def test_cmab_bai_update(n_samples=100, n_features=3): def test_cmab_bai_get_state(mu, sigma, n_features, exploit_p: Float01): actions: dict = { "a1": BayesianLogisticRegression(alpha=StudentT(mu=mu, sigma=sigma), betas=n_features * [StudentT()]), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), } cmab = CmabBernoulliBAI(actions=actions, exploit_p=exploit_p) @@ -606,9 +583,7 @@ def test_cmab_bai_from_state(state): expected_actions = state["actions"] actual_actions = to_serializable_dict(cmab.actions) # Normalize the dict assert expected_actions == actual_actions - expected_exploit_p = ( - state["strategy"].get("exploit_p", 0.5) if state["strategy"].get("exploit_p") is not None else 0.5 - ) # Covers both not existing and existing + None + expected_exploit_p = cmab.strategy.get_expected_value_from_state(state, "exploit_p") actual_exploit_p = cmab.strategy.exploit_p assert expected_exploit_p == actual_exploit_p @@ -630,27 +605,25 @@ def test_create_cmab_bernoulli_cc_cold_start(a_int): # n_features must be > 0 if a_int <= 0: with pytest.raises(ValidationError): - create_cmab_bernoulli_cc_cold_start(action_ids_cost=action_ids_cost, n_features=a_int) + CmabBernoulliCC.cold_start(action_ids_cost=action_ids_cost, n_features=a_int) else: # default subsidy_factor - mab1 = create_cmab_bernoulli_cc_cold_start(action_ids_cost=action_ids_cost, n_features=a_int) + mab1 = CmabBernoulliCC.cold_start(action_ids_cost=action_ids_cost, n_features=a_int) mab2 = CmabBernoulliCC( actions={ - "a1": create_bayesian_logistic_regression_cc_cold_start(n_betas=a_int, cost=action_ids_cost["a1"]), - "a2": create_bayesian_logistic_regression_cc_cold_start(n_betas=a_int, cost=action_ids_cost["a2"]), + "a1": BayesianLogisticRegressionCC.cold_start(n_features=a_int, cost=action_ids_cost["a1"]), + "a2": BayesianLogisticRegressionCC.cold_start(n_features=a_int, cost=action_ids_cost["a2"]), } ) mab2.predict_actions_randomly = True assert mab1 == mab2 # set subsidy_factor - mab1 = create_cmab_bernoulli_cc_cold_start( - action_ids_cost=action_ids_cost, n_features=a_int, subsidy_factor=0.42 - ) + mab1 = CmabBernoulliCC.cold_start(action_ids_cost=action_ids_cost, n_features=a_int, subsidy_factor=0.42) mab2 = CmabBernoulliCC( actions={ - "a1": create_bayesian_logistic_regression_cc_cold_start(n_betas=a_int, cost=action_ids_cost["a1"]), - "a2": create_bayesian_logistic_regression_cc_cold_start(n_betas=a_int, cost=action_ids_cost["a2"]), + "a1": BayesianLogisticRegressionCC.cold_start(n_features=a_int, cost=action_ids_cost["a1"]), + "a2": BayesianLogisticRegressionCC.cold_start(n_features=a_int, cost=action_ids_cost["a2"]), }, subsidy_factor=0.42, ) @@ -665,21 +638,13 @@ def test_cmab_cc_can_instantiate(n_features): CmabBernoulliCC() with pytest.raises(AttributeError): CmabBernoulliCC(actions={}) - with pytest.raises(AttributeError): - CmabBernoulliCC(actions={"a1": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10)}) - with pytest.raises(TypeError): # strategy is not an argument of init + with pytest.warns(UserWarning): + CmabBernoulliCC(actions={"a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10)}) + with pytest.raises(ValidationError): # predict_with_proba is not an argument of init CmabBernoulliCC( actions={ - create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), - create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), - }, - strategy=CostControlBandit(), - ) - with pytest.raises(TypeError): # predict_with_proba is not an argument of init - CmabBernoulliCC( - actions={ - "a1": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), - "a2": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), + "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), + "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), }, predict_with_proba=True, ) @@ -690,27 +655,34 @@ def test_cmab_cc_can_instantiate(n_features): "a2": None, }, ) + CmabBernoulliCC( + actions={ + "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), + "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), + }, + strategy=CostControlBandit(), + ) mab = CmabBernoulliCC( actions={ - "a1": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), - "a2": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), + "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), + "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), } ) - assert mab.actions["a1"] == create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10) - assert mab.actions["a2"] == create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10) + assert mab.actions["a1"] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) + assert mab.actions["a2"] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) assert not mab.predict_actions_randomly assert mab.predict_with_proba assert mab.strategy == CostControlBandit() mab = CmabBernoulliCC( actions={ - "a1": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), - "a2": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), + "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), + "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), }, subsidy_factor=0.42, ) - assert mab.actions["a1"] == create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10) - assert mab.actions["a2"] == create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10) + assert mab.actions["a1"] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) + assert mab.actions["a2"] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) assert not mab.predict_actions_randomly assert mab.predict_with_proba assert mab.strategy == CostControlBandit(subsidy_factor=0.42) @@ -722,7 +694,7 @@ def test_cmab_cc_predict(n_samples, n_features): context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) # cold start - mab = create_cmab_bernoulli_cc_cold_start(action_ids_cost={"a1": 10, "a2": 20.5}, n_features=n_features) + mab = CmabBernoulliCC.cold_start(action_ids_cost={"a1": 10, "a2": 20.5}, n_features=n_features) selected_actions, probs, weighted_sums = mab.predict(context=context) assert mab.predict_actions_randomly assert all([a in ["a1", "a2"] for a in selected_actions]) @@ -733,8 +705,8 @@ def test_cmab_cc_predict(n_samples, n_features): # not cold start mab = CmabBernoulliCC( actions={ - "a1": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), - "a2": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=20.5), + "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), + "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=20.5), }, subsidy_factor=0.42, ) @@ -747,18 +719,18 @@ def test_cmab_cc_update(n_samples=100, n_features=3): actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() rewards = np.random.choice([0, 1], size=n_samples).tolist() context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = create_cmab_bernoulli_cc_cold_start(action_ids_cost={"a1": 10, "a2": 10}, n_features=n_features) + mab = CmabBernoulliCC.cold_start(action_ids_cost={"a1": 10, "a2": 10}, n_features=n_features) assert mab.predict_actions_randomly assert all( [ - mab.actions[a] == create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10) + mab.actions[a] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) for a in set(actions) ] ) mab.update(context=context, actions=actions, rewards=rewards) assert all( [ - mab.actions[a] != create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10) + mab.actions[a] != BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) for a in set(actions) ] ) @@ -781,7 +753,7 @@ def test_cmab_cc_get_state( "a1": BayesianLogisticRegressionCC( alpha=StudentT(mu=mu, sigma=sigma), betas=n_features * [StudentT()], cost=cost_1 ), - "a2": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=cost_2), + "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=cost_2), } cmab = CmabBernoulliCC(actions=actions, subsidy_factor=subsidy_factor) @@ -849,9 +821,7 @@ def test_cmab_cc_from_state(state): expected_actions = state["actions"] actual_actions = to_serializable_dict(cmab.actions) # Normalize the dict assert expected_actions == actual_actions - expected_subsidy_factor = ( - state["strategy"].get("subsidy_factor", 0.5) if state["strategy"].get("subsidy_factor") is not None else 0.5 - ) # Covers both not existing and existing + None + expected_subsidy_factor = cmab.strategy.get_expected_value_from_state(state, "subsidy_factor") actual_subsidy_factor = cmab.strategy.subsidy_factor assert expected_subsidy_factor == actual_subsidy_factor @@ -871,9 +841,7 @@ def test_cmab_cc_from_state(state): def test_epsilon_greedy_cmab_predict_cold_start(n_samples, n_features): context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = create_cmab_bernoulli_cold_start( - action_ids={"a1", "a2"}, n_features=n_features, epsilon=0.1, default_action="a1" - ) + mab = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=n_features, epsilon=0.1, default_action="a1") selected_actions, probs, weighted_sums = mab.predict(context=context) assert mab.predict_actions_randomly assert all([a in ["a1", "a2"] for a in selected_actions]) @@ -887,9 +855,7 @@ def test_epsilon_greedy_cmab_predict_cold_start(n_samples, n_features): def test_epsilon_greedy_cmab_bai_predict(n_samples, n_features): context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = create_cmab_bernoulli_bai_cold_start( - action_ids={"a1", "a2"}, n_features=n_features, epsilon=0.1, default_action="a1" - ) + mab = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=n_features, epsilon=0.1, default_action="a1") selected_actions, probs, weighted_sums = mab.predict(context=context) assert mab.predict_actions_randomly assert all([a in ["a1", "a2"] for a in selected_actions]) @@ -904,7 +870,7 @@ def test_epsilon_greedy_cmab_cc_predict(n_samples, n_features): context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) # cold start - mab = create_cmab_bernoulli_cc_cold_start( + mab = CmabBernoulliCC.cold_start( action_ids_cost={"a1": 10, "a2": 20.5}, n_features=n_features, epsilon=0.1, default_action="a1" ) selected_actions, probs, weighted_sums = mab.predict(context=context) diff --git a/tests/test_base.py b/tests/test_mab.py similarity index 54% rename from tests/test_base.py rename to tests/test_mab.py index dbec460..a1ea652 100644 --- a/tests/test_base.py +++ b/tests/test_mab.py @@ -20,16 +20,18 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from typing import Dict, List, Optional, Set +from typing import Dict, List, Optional, Set, Union import hypothesis.strategies as st import numpy as np import pytest from hypothesis import given -from pydantic import NonNegativeInt, ValidationError +from pydantic import ValidationError from pytest_mock import MockerFixture -from pybandits.base import ActionId, BaseMab, Float01, Probability +from pybandits.base import ActionId, BinaryReward, Float01, Probability +from pybandits.consts import ACTION_IDS_PREFIX +from pybandits.mab import BaseMab from pybandits.model import Beta from pybandits.strategy import ClassicBandit @@ -38,9 +40,8 @@ class DummyMab(BaseMab): epsilon: Optional[Float01] = None default_action: Optional[ActionId] = None - def update(self, actions: List[ActionId], rewards: List[NonNegativeInt]): - super().update(actions=actions, rewards=rewards) - pass + def update(self, actions: List[ActionId], rewards: Union[List[BinaryReward], List[List[BinaryReward]]]): + self._validate_update_params(actions=actions, rewards=rewards) def predict( self, @@ -56,17 +57,17 @@ def get_state(self) -> (str, dict): def test_base_mab_raise_on_less_than_2_actions(): - with pytest.raises(ValidationError): + with pytest.raises(TypeError): DummyMab(actions={"a1": Beta(), "a2": Beta()}) with pytest.raises(ValidationError): DummyMab(actions={"": Beta(), "a2": Beta()}, strategy=ClassicBandit()) with pytest.raises(AttributeError): DummyMab(actions={}, strategy=ClassicBandit()) - with pytest.raises(AttributeError): + with pytest.raises(ValidationError): DummyMab(actions={"a1": None}, strategy=ClassicBandit()) with pytest.raises(ValidationError): DummyMab(actions={"a1": None, "a2": None}, strategy=ClassicBandit()) - with pytest.raises(AttributeError): + with pytest.warns(UserWarning): DummyMab(actions={"a1": Beta()}, strategy=ClassicBandit()) @@ -74,12 +75,12 @@ def test_base_mab_check_update_params(): dummy_mab = DummyMab(actions={"a1": Beta(), "a2": Beta()}, strategy=ClassicBandit()) with pytest.raises(AttributeError): # actionId doesn't exist - dummy_mab._check_update_params(actions=["a1", "a3"], rewards=[1, 1]) + dummy_mab._validate_update_params(actions=["a1", "a3"], rewards=[1, 1]) with pytest.raises(AttributeError): # actionId cannot be empty - dummy_mab._check_update_params(actions=[""], rewards=[1]) + dummy_mab._validate_update_params(actions=[""], rewards=[1]) with pytest.raises(AttributeError): - dummy_mab._check_update_params(actions=["a1", "a2"], rewards=[1]) + dummy_mab._validate_update_params(actions=["a1", "a2"], rewards=[1]) @given(r1=st.integers(min_value=0, max_value=1), r2=st.integers(min_value=0, max_value=1)) @@ -92,6 +93,108 @@ def test_base_mab_update_ok(r1, r2): ######################################################################################################################## +# BaseMab._extract_action_specific_kwargs functionality tests + + +def test_returns_empty_dict_when_no_action_specific_kwargs(): + kwargs = {"param1": 1, "param2": 2} + result, _ = BaseMab._extract_action_specific_kwargs(**kwargs) + assert result == {} + + +def test_processes_kwargs_with_non_dict_values(): + kwargs = { + f"{ACTION_IDS_PREFIX}param1": "not_a_dict", + } + result, _ = BaseMab._extract_action_specific_kwargs(**kwargs) + assert result == {} + + +def test_manages_kwargs_with_empty_dicts(): + kwargs = {f"{ACTION_IDS_PREFIX}param1": {}, f"{ACTION_IDS_PREFIX}param2": {}} + result, _ = BaseMab._extract_action_specific_kwargs(**kwargs) + assert result == {} + + +def test_extracts_action_specific_kwargs_with_valid_keys(): + kwargs = { + f"{ACTION_IDS_PREFIX}param1": {"action1": 1, "action2": 2}, + f"{ACTION_IDS_PREFIX}param2": {"action1": 3, "action2": 4}, + } + expected_output = {"action1": {"param1": 1, "param2": 3}, "action2": {"param1": 2, "param2": 4}} + result, _ = BaseMab._extract_action_specific_kwargs(**kwargs) + assert result == expected_output + + +######################################################################################################################## + + +# BaseMab._extract_action_model_class_and_attributes functionality tests + + +def test_extracts_action_model_class_and_attributes_with_valid_kwargs(mocker: MockerFixture): + class MockActionModel: + def __init__(self, param1, param2): + pass + + mocker.patch("pybandits.mab.get_args", return_value=(None, MockActionModel)) + mocker.patch("pybandits.mab.extract_argument_names_from_function", return_value=["param1", "param2"]) + + kwargs = {"param1": 1, "param2": 2} + action_model_cold_start, action_general_kwargs = BaseMab._extract_action_model_class_and_attributes(**kwargs) + + assert action_model_cold_start == MockActionModel + assert action_general_kwargs == {"param1": 1, "param2": 2} + + +def test_returns_callable_for_action_model_cold_start_instantiation(mocker: MockerFixture): + class MockActionModel: + @classmethod + def cold_start(cls): + pass + + mocker.patch("pybandits.mab.get_args", return_value=(None, MockActionModel)) + mocker.patch("pybandits.mab.extract_argument_names_from_function", return_value=[]) + + kwargs = {} + action_model_cold_start, _ = BaseMab._extract_action_model_class_and_attributes(**kwargs) + + assert callable(action_model_cold_start) + + +def test_handles_empty_kwargs_gracefully(mocker: MockerFixture): + class MockActionModel: + def __init__(self): + pass + + mocker.patch("pybandits.mab.get_args", return_value=(None, MockActionModel)) + mocker.patch("pybandits.mab.extract_argument_names_from_function", return_value=[]) + + kwargs = {} + action_model_cold_start, action_general_kwargs = BaseMab._extract_action_model_class_and_attributes(**kwargs) + + assert action_model_cold_start == MockActionModel + assert action_general_kwargs == {} + + +def test_handles_kwargs_with_no_matching_action_model_attributes(mocker: MockerFixture): + class MockActionModel: + def __init__(self): + pass + + mocker.patch("pybandits.mab.get_args", return_value=(None, MockActionModel)) + mocker.patch("pybandits.mab.extract_argument_names_from_function", return_value=[]) + + kwargs = {"irrelevant_param": 1} + action_model_cold_start, action_general_kwargs = BaseMab._extract_action_model_class_and_attributes(**kwargs) + + assert action_model_cold_start == MockActionModel + assert action_general_kwargs == {} + + +######################################################################################################################## + + # Epsilon-greedy functionality tests diff --git a/tests/test_model.py b/tests/test_model.py index ed8827a..2041cf2 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -28,7 +28,6 @@ from pydantic import ValidationError from pybandits.model import ( - BaseBetaMO, BayesianLogisticRegression, BayesianLogisticRegressionCC, Beta, @@ -36,8 +35,6 @@ BetaMO, BetaMOCC, StudentT, - create_bayesian_logistic_regression_cc_cold_start, - create_bayesian_logistic_regression_cold_start, ) ######################################################################################################################## @@ -110,26 +107,26 @@ def test_can_init_betaCC(a_float): ######################################################################################################################## -# BaseBetaMO +# BetaMO def test_can_init_base_beta_mo(): # init with default params - b = BaseBetaMO(counters=[Beta(), Beta()]) + b = BetaMO(counters=[Beta(), Beta()]) assert b.counters[0].n_successes == 1 and b.counters[0].n_failures == 1 assert b.counters[1].n_successes == 1 and b.counters[1].n_failures == 1 # init with empty dict - b = BaseBetaMO(counters=[{}, {}]) + b = BetaMO(counters=[{}, {}]) assert b.counters[0] == Beta() # invalid init with BetaCC instead of Beta with pytest.raises(ValidationError): - BaseBetaMO(counters=[BetaCC(cost=1), BetaCC(cost=1)]) + BetaMO(counters=[BetaCC(cost=1), BetaCC(cost=1)]) def test_calculate_proba_beta_mo(): - b = BaseBetaMO(counters=[Beta(), Beta()]) + b = BetaMO(counters=[Beta(), Beta()]) b.sample_proba() @@ -142,11 +139,11 @@ def test_beta_update_mo(rewards1, rewards2): rewards1, rewards2 = rewards1[:min_len], rewards2[:min_len] rewards = [[a, b] for a, b in zip(rewards1, rewards2)] - b = BaseBetaMO(counters=[Beta(n_successes=11, n_failures=22), Beta(n_successes=33, n_failures=44)]) + b = BetaMO(counters=[Beta(n_successes=11, n_failures=22), Beta(n_successes=33, n_failures=44)]) b.update(rewards=rewards) - assert b == BaseBetaMO( + assert b == BetaMO( counters=[ Beta(n_successes=11 + sum(rewards1), n_failures=22 + len(rewards1) - sum(rewards1)), Beta(n_successes=33 + sum(rewards2), n_failures=44 + len(rewards2) - sum(rewards2)), @@ -247,15 +244,15 @@ def test_create_default_instance_bayesian_logistic_regression(a_int): # at least one beta must be specified if a_int <= 0: with pytest.raises(ValidationError): - create_bayesian_logistic_regression_cold_start(n_betas=a_int) + BayesianLogisticRegression.cold_start(n_features=a_int) else: - blr = create_bayesian_logistic_regression_cold_start(n_betas=a_int) + blr = BayesianLogisticRegression.cold_start(n_features=a_int) assert blr == BayesianLogisticRegression(alpha=StudentT(), betas=[StudentT() for _ in range(a_int)]) @given(st.integers(min_value=1, max_value=1000), st.integers(min_value=1, max_value=100)) def test_check_context_matrix(n_samples, n_features): - blr = create_bayesian_logistic_regression_cold_start(n_betas=n_features) + blr = BayesianLogisticRegression.cold_start(n_features=n_features) # context is numpy array context = np.random.uniform(low=-100.0, high=100.0, size=(n_samples, n_features)) @@ -276,7 +273,7 @@ def test_check_context_matrix(n_samples, n_features): with pytest.raises(AttributeError): blr.check_context_matrix(context=context.loc[:, 1:]) - blr = create_bayesian_logistic_regression_cold_start(n_betas=2) + blr = BayesianLogisticRegression.cold_start(n_features=2) with pytest.raises(AttributeError): blr.check_context_matrix(context=[[1], [2, 3]]) # context has shape mismatch @@ -294,10 +291,10 @@ def sample_proba(context): prob, weighted_sum = blr.sample_proba(context=context) assert type(prob) is type(weighted_sum) is np.ndarray # type of the returns must be np.ndarray - assert len(prob) == len(weighted_sum) == n_samples # return 1 sampled proba and ws per each sample + assert len(prob) == len(weighted_sum) == n_samples # return 1 sampled probability and ws per each sample assert all([0 <= p <= 1 for p in prob]) # probs must be in the interval [0, 1] - blr = create_bayesian_logistic_regression_cold_start(n_betas=n_features) + blr = BayesianLogisticRegression.cold_start(n_features=n_features) # context is numpy array context = np.random.uniform(low=-100.0, high=100.0, size=(n_samples, n_features)) @@ -317,7 +314,7 @@ def sample_proba(context): def test_blr_update(n_samples=100, n_features=3): def update(context, rewards): - blr = create_bayesian_logistic_regression_cold_start(n_betas=n_features) + blr = BayesianLogisticRegression.cold_start(n_features=n_features) assert blr.alpha == StudentT(mu=0.0, sigma=10.0, nu=5.0) assert blr.betas == [ StudentT(mu=0.0, sigma=10.0, nu=5.0), @@ -353,7 +350,7 @@ def update(context, rewards): # raise an error if len(context) != len(rewards) with pytest.raises(ValueError): - blr = create_bayesian_logistic_regression_cold_start(n_betas=n_features) + blr = BayesianLogisticRegression.cold_start(n_features=n_features) blr.update(context=context, rewards=rewards[1:]) @@ -379,9 +376,9 @@ def test_create_default_instance_bayesian_logistic_regression_cc(n_betas, cost): # at least one beta must be specified if n_betas <= 0 or cost < 0: with pytest.raises(ValidationError): - create_bayesian_logistic_regression_cc_cold_start(n_betas=n_betas, cost=cost) + BayesianLogisticRegressionCC.cold_start(n_features=n_betas, cost=cost) else: - blr = create_bayesian_logistic_regression_cc_cold_start(n_betas=n_betas, cost=cost) + blr = BayesianLogisticRegressionCC.cold_start(n_features=n_betas, cost=cost) assert blr == BayesianLogisticRegressionCC( alpha=StudentT(), betas=[StudentT() for _ in range(n_betas)], cost=cost ) diff --git a/tests/test_smab.py b/tests/test_smab.py index 369d016..2f4c949 100644 --- a/tests/test_smab.py +++ b/tests/test_smab.py @@ -30,27 +30,17 @@ from pybandits.base import BinaryReward, Float01 from pybandits.model import Beta, BetaCC, BetaMO, BetaMOCC -from pybandits.smab import ( - SmabBernoulli, - SmabBernoulliBAI, - SmabBernoulliCC, - SmabBernoulliMO, - SmabBernoulliMOCC, - create_smab_bernoulli_bai_cold_start, - create_smab_bernoulli_cc_cold_start, - create_smab_bernoulli_cold_start, - create_smab_bernoulli_mo_cc_cold_start, - create_smab_bernoulli_mo_cold_start, -) -from pybandits.strategy import ( - ClassicBandit, - CostControlBandit, - MultiObjectiveBandit, - MultiObjectiveCostControlBandit, -) +from pybandits.smab import SmabBernoulli, SmabBernoulliBAI, SmabBernoulliCC, SmabBernoulliMO, SmabBernoulliMOCC +from pybandits.strategy import ClassicBandit, CostControlBandit, MultiObjectiveBandit, MultiObjectiveCostControlBandit from pybandits.utils import to_serializable_dict from tests.test_utils import is_serializable + +@pytest.fixture(scope="session") +def n_samples() -> int: + return 1000 + + ######################################################################################################################## @@ -58,7 +48,7 @@ def test_create_smab_bernoulli_cold_start(): - assert create_smab_bernoulli_cold_start(action_ids={"a1", "a2"}) == SmabBernoulli( + assert SmabBernoulli.cold_start(action_ids={"a1", "a2"}) == SmabBernoulli( actions={"a1": Beta(), "a2": Beta()}, ) @@ -75,16 +65,8 @@ def test_can_instantiate_smab(): SmabBernoulli() with pytest.raises(AttributeError): SmabBernoulli(actions={}) - with pytest.raises(AttributeError): + with pytest.warns(UserWarning): SmabBernoulli(actions={"action1": Beta()}) - with pytest.raises(TypeError): # strategy is not an argument of init - SmabBernoulli( - actions={ - "action1": Beta(), - "action2": Beta(), - }, - strategy=ClassicBandit(), - ) with pytest.raises(ValidationError): SmabBernoulli( actions={ @@ -92,11 +74,18 @@ def test_can_instantiate_smab(): "action2": None, }, ) - smab = SmabBernoulli( + SmabBernoulli( actions={ "action1": Beta(), "action2": Beta(), }, + strategy=ClassicBandit(), + ) + smab = SmabBernoulli( + actions={ + "action1": Beta(), + "action2": Beta(), + } ) assert smab.actions["action1"] == Beta() @@ -131,8 +120,7 @@ def test_smab_predict_raise_when_all_actions_forbidden(): s.predict(n_samples=10, forbidden_actions=["a1", "a2"]) -def test_smab_predict(): - n_samples = 1000 +def test_smab_predict(n_samples: int): s = SmabBernoulli( actions={ "a0": Beta(), @@ -244,7 +232,7 @@ def test_smab_from_state(state): assert isinstance(smab, SmabBernoulli) expected_actions = state["actions"] - actual_actions = json.loads(json.dumps(smab.actions, default=dict)) # Normalize the dict + actual_actions = to_serializable_dict(smab.actions) # Normalize the dict assert expected_actions == actual_actions # Ensure get_state and from_state compatibility @@ -260,11 +248,11 @@ def test_smab_from_state(state): def test_create_smab_bernoulli_bai(): # default exploit_p - assert create_smab_bernoulli_bai_cold_start(action_ids={"a1", "a2"}) == SmabBernoulliBAI( + assert SmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}) == SmabBernoulliBAI( actions={"a1": Beta(), "a2": Beta()}, ) # set exploit_p - assert create_smab_bernoulli_bai_cold_start(action_ids={"a1", "a2"}, exploit_p=0.2) == SmabBernoulliBAI( + assert SmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, exploit_p=0.2) == SmabBernoulliBAI( actions={"a1": Beta(), "a2": Beta()}, exploit_p=0.2, ) @@ -296,8 +284,7 @@ def test_can_init_smabbai(): assert s.strategy.exploit_p == 0.3 -def test_smabbai_predict(): - n_samples = 1000 +def test_smabbai_predict(n_samples: int): s = SmabBernoulliBAI(actions={"a1": Beta(), "a2": Beta()}) _, _ = s.predict(n_samples=n_samples) @@ -370,11 +357,9 @@ def test_smab_bai_from_state(state): assert isinstance(smab, SmabBernoulliBAI) expected_actions = state["actions"] - actual_actions = json.loads(json.dumps(smab.actions, default=dict)) # Normalize the dict + actual_actions = to_serializable_dict(smab.actions) # Normalize the dict assert expected_actions == actual_actions - expected_exploit_p = ( - state["strategy"].get("exploit_p", 0.5) if state["strategy"].get("exploit_p") is not None else 0.5 - ) # Covers both not existing and existing + None + expected_exploit_p = smab.strategy.get_expected_value_from_state(state, "exploit_p") actual_exploit_p = smab.strategy.exploit_p assert expected_exploit_p == actual_exploit_p @@ -390,7 +375,7 @@ def test_smab_bai_from_state(state): def test_create_smab_bernoulli_cc(): - assert create_smab_bernoulli_cc_cold_start( + assert SmabBernoulliCC.cold_start( action_ids_cost={"a1": 10, "a2": 20}, subsidy_factor=0.2, ) == SmabBernoulliCC( @@ -398,7 +383,7 @@ def test_create_smab_bernoulli_cc(): subsidy_factor=0.2, ) - assert create_smab_bernoulli_cc_cold_start(action_ids_cost={"a1": 10, "a2": 20}) == SmabBernoulliCC( + assert SmabBernoulliCC.cold_start(action_ids_cost={"a1": 10, "a2": 20}) == SmabBernoulliCC( actions={"a1": BetaCC(cost=10), "a2": BetaCC(cost=20)}, ) @@ -429,8 +414,7 @@ def test_can_init_smabcc(): assert s.strategy.subsidy_factor == 0.7 -def test_smabcc_predict(): - n_samples = 1000 +def test_smabcc_predict(n_samples: int): s = SmabBernoulliCC( actions={ "a1": BetaCC(n_successes=1, n_failures=2, cost=10), @@ -508,9 +492,7 @@ def test_smab_cc_from_state(state): expected_actions = state["actions"] actual_actions = json.loads(json.dumps(smab.actions, default=dict)) # Normalize the dict assert expected_actions == actual_actions - expected_subsidy_factor = ( - state["strategy"].get("subsidy_factor", 0.5) if state["strategy"].get("subsidy_factor") is not None else 0.5 - ) # Covers both not existing and existing + None + expected_subsidy_factor = smab.strategy.get_expected_value_from_state(state, "subsidy_factor") actual_subsidy_factor = smab.strategy.subsidy_factor assert expected_subsidy_factor == actual_subsidy_factor @@ -568,17 +550,15 @@ def test_all_actions_must_have_same_number_of_objectives_smab_mo(): with pytest.raises(ValueError): SmabBernoulliMO( actions={ - "action 1": BetaMO(counters=[Beta(), Beta()]), - "action 2": BetaMO(counters=[Beta(), Beta()]), - "action 3": BetaMO(counters=[Beta(), Beta(), Beta()]), + "a1": BetaMO(counters=[Beta(), Beta()]), + "a2": BetaMO(counters=[Beta(), Beta()]), + "a3": BetaMO(counters=[Beta(), Beta(), Beta()]), }, ) -def test_smab_mo_predict(): - n_samples = 1000 - - s = create_smab_bernoulli_mo_cold_start(action_ids={"a1", "a2"}, n_objectives=3) +def test_smab_mo_predict(n_samples: int, n_objectives=3): + s = SmabBernoulliMO.cold_start(action_ids={"a1", "a2"}, n_objectives=n_objectives) forbidden = None s.predict(n_samples=n_samples, forbidden_actions=forbidden) @@ -601,9 +581,13 @@ def test_smab_mo_predict(): s.predict(n_samples=n_samples, forbidden_actions=forbidden) -def test_smab_mo_update(): - mab = create_smab_bernoulli_mo_cold_start(action_ids={"a1", "a2"}, n_objectives=3) - mab.update(actions=["a1", "a1"], rewards=[[1, 0, 1], [1, 1, 0]]) +def test_smab_mo_update(n_objectives=3): + action_ids = {"a1", "a2"} + mab = SmabBernoulliMO.cold_start(action_ids=action_ids, n_objectives=n_objectives) + assert all([mab.actions[a] == BetaMO.cold_start(n_objectives=n_objectives) for a in action_ids]) + + mab.update(actions=["a1", "a2"], rewards=[[1, 0, 1], [1, 1, 0]]) + assert all([mab.actions[a] != BetaMO.cold_start(n_objectives=n_objectives) for a in set(action_ids)]) @given(st.lists(st.integers(min_value=1), min_size=6, max_size=6)) @@ -741,10 +725,10 @@ def test_all_actions_must_have_same_number_of_objectives_smab_mo_cc(): ) -def test_smab_mo_cc_predict(): +def test_smab_mo_cc_predict(n_samples: int): n_samples = 1000 - s = create_smab_bernoulli_mo_cc_cold_start(action_ids_cost={"a1": 1, "a2": 2}, n_objectives=2) + s = SmabBernoulliMOCC.cold_start(action_ids_cost={"a1": 1, "a2": 2}, n_objectives=2) forbidden = None s.predict(n_samples=n_samples, forbidden_actions=forbidden) @@ -767,8 +751,27 @@ def test_smab_mo_cc_predict(): s.predict(n_samples=n_samples, forbidden_actions=forbidden) +def test_smab_mo_cc_update(n_objectives=3): + action_ids_cost = {"a1": 1, "a2": 2} + mab = SmabBernoulliMOCC.cold_start(action_ids_cost=action_ids_cost, n_objectives=n_objectives) + assert all( + [ + mab.actions[a] == BetaMOCC.cold_start(n_objectives=n_objectives, cost=action_ids_cost[a]) + for a in action_ids_cost.keys() + ] + ) + + mab.update(actions=["a1", "a2"], rewards=[[1, 0, 1], [1, 1, 0]]) + assert all( + [ + mab.actions[a] != BetaMOCC.cold_start(n_objectives=n_objectives, cost=action_ids_cost[a]) + for a in action_ids_cost.keys() + ] + ) + + @given(st.lists(st.integers(min_value=1), min_size=8, max_size=8)) -def test_smab_mocc_get_state(a_list): +def test_smab_mo_cc_get_state(a_list): a, b, c, d, e, f, g, h = a_list actions = { @@ -837,7 +840,7 @@ def test_smab_mo_cc_from_state(state): assert isinstance(smab, SmabBernoulliMOCC) expected_actions = state["actions"] - actual_actions = json.loads(json.dumps(smab.actions, default=dict)) # Normalize the dict + actual_actions = to_serializable_dict(smab.actions) # Normalize the dict assert expected_actions == actual_actions # Ensure get_state and from_state compatibility @@ -868,7 +871,7 @@ def test_can_instantiate_epsilon_greddy_smab_with_params(a, b): assert s.actions["action1"] == s.actions["action2"] -def test_epsilon_greedy_smab_predict(): +def test_epsilon_greedy_smab_predict(n_samples: int): n_samples = 1000 s = SmabBernoulli( @@ -888,13 +891,13 @@ def test_epsilon_greedy_smab_predict(): _, _ = s.predict(n_samples=n_samples, forbidden_actions=forbidden_actions) -def test_epsilon_greddy_smabbai_predict(): +def test_epsilon_greddy_smabbai_predict(n_samples: int): n_samples = 1000 s = SmabBernoulliBAI(actions={"a1": Beta(), "a2": Beta()}, epsilon=0.1, default_action="a1") _, _ = s.predict(n_samples=n_samples) -def test_epsilon_greddy_smabcc_predict(): +def test_epsilon_greddy_smabcc_predict(n_samples: int): n_samples = 1000 s = SmabBernoulliCC( actions={ @@ -908,19 +911,19 @@ def test_epsilon_greddy_smabcc_predict(): _, _ = s.predict(n_samples=n_samples) -def test_epsilon_greddy_smab_mo_predict(): +def test_epsilon_greddy_smab_mo_predict(n_samples: int): n_samples = 1000 - s = create_smab_bernoulli_mo_cold_start(action_ids={"a1", "a2"}, n_objectives=3, epsilon=0.1, default_action="a1") + s = SmabBernoulliMO.cold_start(action_ids={"a1", "a2"}, n_objectives=3, epsilon=0.1, default_action="a1") forbidden = None s.predict(n_samples=n_samples, forbidden_actions=forbidden) -def test_epsilon_greddy_smab_mo_cc_predict(): +def test_epsilon_greddy_smab_mo_cc_predict(n_samples: int): n_samples = 1000 - s = create_smab_bernoulli_mo_cc_cold_start( + s = SmabBernoulliMOCC.cold_start( action_ids_cost={"a1": 1, "a2": 2}, n_objectives=2, epsilon=0.1, default_action="a1" ) diff --git a/tests/test_strategy.py b/tests/test_strategy.py index 3df84e6..26b46ac 100644 --- a/tests/test_strategy.py +++ b/tests/test_strategy.py @@ -36,7 +36,7 @@ CostControlBandit, MultiObjectiveBandit, MultiObjectiveCostControlBandit, - get_pareto_front, + MultiObjectiveStrategy, ) ######################################################################################################################## @@ -81,17 +81,19 @@ def test_can_init_best_action_identification(a_float): @given(st.floats()) -def test_set_exploit_p(a_float): +def test_with_exploit_p(a_float): b = BestActionIdentification() # set with invalid float if a_float < 0 or a_float > 1 or np.isnan(a_float) or np.isinf(a_float): with pytest.raises(ValidationError): - b.set_exploit_p(exploit_p=a_float) + b.with_exploit_p(exploit_p=a_float) # set with valid float else: - b.set_exploit_p(exploit_p=a_float) - assert b.exploit_p == a_float + mutated_b = b.with_exploit_p(exploit_p=a_float) + assert b.exploit_p != a_float + assert mutated_b.exploit_p == a_float + assert mutated_b is not b @given( @@ -117,9 +119,9 @@ def test_select_action_logic(a_float1, a_float2, a_float3): assert max(p, key=p.get) == b.select_action(p=p) # if exploit_p factor is 0 => return the action with 2nd highest prob (not 1st highest prob) - b.set_exploit_p(exploit_p=0) - assert max(p, key=p.get) != b.select_action(p=p) - assert sorted(p.items(), key=lambda x: x[1], reverse=True)[1][0] == b.select_action(p=p) + mutated_b = b.with_exploit_p(exploit_p=0) + assert max(p, key=p.get) != mutated_b.select_action(p=p) + assert sorted(p.items(), key=lambda x: x[1], reverse=True)[1][0] == mutated_b.select_action(p=p) def test_select_action_logic_all_probs_equal(): @@ -130,8 +132,8 @@ def test_select_action_logic_all_probs_equal(): assert "a1" == b.select_action(p=p) # if exploit_p is 0 => return the action with 2nd highest prob (not 1st highest prob) - b.set_exploit_p(exploit_p=0) - assert "a2" == b.select_action(p=p) + mutated_b = b.with_exploit_p(exploit_p=0) + assert "a2" == mutated_b.select_action(p=p) @given(st.builds(Beta), st.builds(Beta), st.builds(Beta)) @@ -166,17 +168,19 @@ def test_can_init_cost_control(a_float): @given(st.floats()) -def test_set_subsidy_factor(a_float): +def test_with_subsidy_factor(a_float): c = CostControlBandit() # set with invalid float if a_float < 0 or a_float > 1 or np.isnan(a_float) or np.isinf(a_float): with pytest.raises(ValidationError): - c.set_subsidy_factor(subsidy_factor=a_float) + c.with_subsidy_factor(subsidy_factor=a_float) # set with valid float else: - c.set_subsidy_factor(subsidy_factor=a_float) - assert c.subsidy_factor == a_float + mutated_c = c.with_subsidy_factor(subsidy_factor=a_float) + assert c.subsidy_factor != a_float + assert mutated_c.subsidy_factor == a_float + assert mutated_c is not c @given( @@ -210,12 +214,12 @@ def test_select_action_logic_cc(): assert "a4" == c.select_action(p=p, actions=actions) # if subsidy_factor is 0 => return the action with highest p (classic bandit) - c.set_subsidy_factor(subsidy_factor=0) - assert "a2" == c.select_action(p=p, actions=actions) + mutated_c = c.with_subsidy_factor(subsidy_factor=0) + assert "a2" == mutated_c.select_action(p=p, actions=actions) # otherwise, return the cheapest feasible action with the highest sampled probability - c.set_subsidy_factor(subsidy_factor=0.5) - assert "a5" == c.select_action(p=p, actions=actions) + mutated_c = c.with_subsidy_factor(subsidy_factor=0.5) + assert "a5" == mutated_c.select_action(p=p, actions=actions) @given( @@ -247,20 +251,20 @@ def test_select_action_logic_corner_cases(a_list_p, a_list_cost): assert sorted(actions_cost_proba)[0][-1] == c.select_action(p=p, actions=actions) # if cost factor is 0: - c.set_subsidy_factor(subsidy_factor=0) + mutated_c = c.with_subsidy_factor(subsidy_factor=0) # get the keys of the max p.values() (there might be more max_p_values) max_p_values = [k for k, v in p.items() if v == max(p.values())] # if cost factor is 0 and only 1 max_value => return the action with highest p (classic bandit) # e.g. p={"a1": 0.5, "a2": 0.2} => return always "a1" if len(max_p_values) == 1: - assert max(p, key=p.get) == c.select_action(p=p, actions=actions) + assert max(p, key=p.get) == mutated_c.select_action(p=p, actions=actions) # if cost factor is 0 and only 1+ max_values => return the action with highest p and min cost # e.g. p={"a1": 0.5, "a2": 0.5} and cost={"a1": 20, "a2": 10} => return always "a2" else: actions_cost_max = {k: actions_cost[k] for k in max_p_values} - min(actions_cost_max, key=actions_cost_max.get) == c.select_action(p=p, actions=actions) + assert min(actions_cost_max, key=actions_cost_max.get) == mutated_c.select_action(p=p, actions=actions) ######################################################################################################################## @@ -282,7 +286,7 @@ def test_can_init_multiobjective(): ) def test_select_action_mo(p: Dict[ActionId, List[Probability]]): m = MultiObjectiveBandit() - assert m.select_action(p=p) in get_pareto_front(p=p) + assert m.select_action(p=p) in m.get_pareto_front(p=p) def test_pareto_front(): @@ -310,7 +314,7 @@ def test_pareto_front(): "a7": [0.1, 0.1], } - assert get_pareto_front(p2d) == ["a0", "a1", "a4", "a5"] + assert MultiObjectiveStrategy.get_pareto_front(p2d) == ["a0", "a1", "a4", "a5"] p2d = { "a0": [0.1, 0.1], @@ -318,7 +322,7 @@ def test_pareto_front(): "a2": [0.3, 0.3], } - assert get_pareto_front(p2d) == ["a1", "a2"] + assert MultiObjectiveStrategy.get_pareto_front(p2d) == ["a1", "a2"] # works in 3D p3d = { @@ -332,7 +336,7 @@ def test_pareto_front(): "a7": [0.1, 0.1, 0.3], } - assert get_pareto_front(p3d) == ["a0", "a1", "a4", "a5", "a7"] + assert MultiObjectiveStrategy.get_pareto_front(p3d) == ["a0", "a1", "a4", "a5", "a7"] ######################################################################################################################## @@ -363,7 +367,7 @@ def test_select_action_mo_cc(): "a5": [0.6, 0.1, 0.5], } # within the pareto front ("a3", "a4", "a5") select the action with min cost ("a4") - assert get_pareto_front(p) == ["a3", "a4", "a5"] + assert m.get_pareto_front(p) == ["a3", "a4", "a5"] assert m.select_action(p=p, actions=actions) == "a4" actions = { @@ -377,5 +381,5 @@ def test_select_action_mo_cc(): "a3": [0.0, 0.1, 0.9], } # within the actions with the min cost ("a1" or "a2") select the action the highest mean of probabilities ("a2") - assert get_pareto_front(p) == ["a1", "a2", "a3"] + assert m.get_pareto_front(p) == ["a1", "a2", "a3"] assert m.select_action(p=p, actions=actions) == "a2"