diff --git a/docs/tutorials/mab.ipynb b/docs/tutorials/mab.ipynb index 2f0fb76..22c5666 100644 --- a/docs/tutorials/mab.ipynb +++ b/docs/tutorials/mab.ipynb @@ -20,7 +20,7 @@ "from rich import print\n", "\n", "from pybandits.model import Beta\n", - "from pybandits.smab import SmabBernoulli, create_smab_bernoulli_cold_start" + "from pybandits.smab import SmabBernoulli" ] }, { @@ -73,8 +73,6 @@ "metadata": {}, "outputs": [], "source": [ - "n_objectives = 2\n", - "\n", "mab = SmabBernoulli(\n", " actions={\n", " \"a1\": Beta(n_successes=1, n_failures=1),\n", @@ -137,7 +135,7 @@ "id": "564914fd-73cc-4854-8ec7-548970f794a6", "metadata": {}, "source": [ - "You can initialize the bandit via the utility function `create_smab_bernoulli_mo_cc_cold_start()`. This is particulary useful in a cold start setting when there is no prior knowledge on the Beta distruibutions. In this case for all Betas `n_successes` and `n_failures` are set to `1`." + "You can initialize the bandit via the utility function `SmabBernoulliMOCC.cold_start()`. This is particulary useful in a cold start setting when there is no prior knowledge on the Beta distruibutions. In this case for all Betas `n_successes` and `n_failures` are set to `1`." ] }, { @@ -148,7 +146,7 @@ "outputs": [], "source": [ "# generate a smab bernoulli in cold start settings\n", - "mab = create_smab_bernoulli_cold_start(action_ids=[\"a1\", \"a2\", \"a3\"])" + "mab = SmabBernoulli.cold_start(action_ids=[\"a1\", \"a2\", \"a3\"])" ] }, { diff --git a/docs/tutorials/smab_mo_cc.ipynb b/docs/tutorials/smab_mo_cc.ipynb index ae4436c..880654c 100644 --- a/docs/tutorials/smab_mo_cc.ipynb +++ b/docs/tutorials/smab_mo_cc.ipynb @@ -20,7 +20,7 @@ "from rich import print\n", "\n", "from pybandits.model import Beta, BetaMOCC\n", - "from pybandits.smab import SmabBernoulliMOCC, create_smab_bernoulli_mo_cc_cold_start" + "from pybandits.smab import SmabBernoulliMOCC" ] }, { @@ -72,8 +72,6 @@ "metadata": {}, "outputs": [], "source": [ - "n_objectives = 2\n", - "\n", "mab = SmabBernoulliMOCC(\n", " actions={\n", " \"a1\": BetaMOCC(counters=[Beta(n_successes=1, n_failures=1), Beta(n_successes=1, n_failures=1)], cost=30),\n", @@ -153,7 +151,7 @@ "id": "564914fd-73cc-4854-8ec7-548970f794a6", "metadata": {}, "source": [ - "You can initialize the bandit via the utility function `create_smab_bernoulli_mo_cc_cold_start()`. This is particulary useful in a cold start setting when there is no prior knowledge on the Beta distruibutions. In this case for all Betas `n_successes` and `n_failures` are set to `1`." + "You can initialize the bandit via the utility function `SmabBernoulliMOCC.cold_start()`. This is particulary useful in a cold start setting when there is no prior knowledge on the Beta distruibutions. In this case for all Betas `n_successes` and `n_failures` are set to `1`." ] }, { @@ -165,10 +163,9 @@ "source": [ "# list of action IDs with their cost\n", "action_ids_cost = {\"a1\": 30, \"a2\": 10, \"a3\": 20}\n", - "n_objectives = 2\n", "\n", "# generate a smab bernoulli in cold start settings\n", - "mab = create_smab_bernoulli_mo_cc_cold_start(action_ids_cost=action_ids_cost, n_objectives=n_objectives)" + "mab = SmabBernoulliMOCC.cold_start(action_ids_cost=action_ids_cost)" ] }, { diff --git a/pybandits/base.py b/pybandits/base.py index 97e42d8..cc56a9e 100644 --- a/pybandits/base.py +++ b/pybandits/base.py @@ -21,233 +21,26 @@ # SOFTWARE. -from abc import ABC, abstractmethod -from typing import Any, Dict, List, NewType, Optional, Set, Tuple, Union +from typing import Dict, List, NewType, Tuple, Union -import numpy as np -from pydantic import ( - BaseModel, - NonNegativeInt, - confloat, - conint, - constr, - field_validator, - model_validator, - validate_call, -) +from pydantic import BaseModel, confloat, conint, constr ActionId = NewType("ActionId", constr(min_length=1)) Float01 = NewType("Float_0_1", confloat(ge=0, le=1)) Probability = NewType("Probability", Float01) -Predictions = NewType("Predictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]]]) +SmabPredictions = NewType("SmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]]]) +CmabPredictions = NewType( + "CmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]], List[Dict[ActionId, float]]] +) +Predictions = NewType("Predictions", Union[SmabPredictions, CmabPredictions]) BinaryReward = NewType("BinaryReward", conint(ge=0, le=1)) +ActionRewardLikelihood = NewType( + "ActionRewardLikelihood", + Union[Dict[ActionId, float], Dict[ActionId, Probability], Dict[ActionId, List[Probability]]], +) class PyBanditsBaseModel(BaseModel, extra="forbid"): """ BaseModel of the PyBandits library. """ - - -class Model(PyBanditsBaseModel, ABC): - """ - Class to model the prior distributions. - """ - - @abstractmethod - def sample_proba(self) -> Probability: - """ - Sample the probability of getting a positive reward. - """ - - @abstractmethod - def update(self, rewards: List[Any]): - """ - Update the model parameters. - """ - - -class Strategy(PyBanditsBaseModel, ABC): - """ - Strategy to select actions in multi-armed bandits. - """ - - @abstractmethod - def select_action(self, p: Dict[ActionId, Probability], actions: Optional[Dict[ActionId, Model]]) -> ActionId: - """ - Select the action. - """ - - -class BaseMab(PyBanditsBaseModel, ABC): - """ - Multi-armed bandit superclass. - - Parameters - ---------- - actions: Dict[ActionId, Model] - The list of possible actions, and their associated Model. - strategy: Strategy - The strategy used to select actions. - epsilon: Optional[Float01] - The probability of selecting a random action. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - """ - - actions: Dict[ActionId, Model] - strategy: Strategy - epsilon: Optional[Float01] - default_action: Optional[ActionId] - - @field_validator("actions", mode="before") - @classmethod - def at_least_2_actions_are_defined(cls, v): - if len(v) < 2: - raise AttributeError("At least 2 actions should be defined.") - return v - - @model_validator(mode="after") - def check_default_action(self): - if not self.epsilon and self.default_action: - raise AttributeError("A default action should only be defined when epsilon is defined.") - if self.default_action and self.default_action not in self.actions: - raise AttributeError("The default action should be defined in the actions.") - return self - - def _get_valid_actions(self, forbidden_actions: Optional[Set[ActionId]]) -> Set[ActionId]: - """ - Given a set of forbidden action IDs, return a set of valid action IDs. - - Parameters - ---------- - forbidden_actions: Optional[Set[ActionId]] - The set of forbidden action IDs. - - Returns - ------- - valid_actions: Set[ActionId] - The list of valid (i.e. not forbidden) action IDs. - """ - if forbidden_actions is None: - forbidden_actions = set() - - if not all(a in self.actions.keys() for a in forbidden_actions): - raise ValueError("forbidden_actions contains invalid action IDs.") - valid_actions = set(self.actions.keys()) - forbidden_actions - if len(valid_actions) == 0: - raise ValueError("All actions are forbidden. You must allow at least 1 action.") - if self.default_action and self.default_action not in valid_actions: - raise ValueError("The default action is forbidden.") - - return valid_actions - - def _check_update_params(self, actions: List[ActionId], rewards: List[Union[NonNegativeInt, List[NonNegativeInt]]]): - """ - Verify that the given list of action IDs is a subset of the currently defined actions. - - Parameters - ---------- - actions : List[ActionId] - The selected action for each sample. - rewards: List[Union[BinaryReward, List[BinaryReward]]] - The reward for each sample. - """ - invalid = set(actions) - set(self.actions.keys()) - if invalid: - raise AttributeError(f"The following invalid action(s) were specified: {invalid}.") - if len(actions) != len(rewards): - raise AttributeError(f"Shape mismatch: actions and rewards should have the same length {len(actions)}.") - - @abstractmethod - @validate_call - def update(self, actions: List[ActionId], rewards: List[Union[BinaryReward, List[BinaryReward]]], *args, **kwargs): - """ - Update the stochastic multi-armed bandit model. - - actions: List[ActionId] - The selected action for each sample. - rewards: List[Union[BinaryReward, List[BinaryReward]]] - The reward for each sample. - """ - - @abstractmethod - @validate_call - def predict(self, forbidden_actions: Optional[Set[ActionId]] = None): - """ - Predict actions. - - Parameters - ---------- - forbidden_actions : Optional[Set[ActionId]], default=None - Set of forbidden actions. If specified, the model will discard the forbidden_actions and it will only - consider the remaining allowed_actions. By default, the model considers all actions as allowed_actions. - Note that: actions = allowed_actions U forbidden_actions. - - Returns - ------- - actions: List[ActionId] of shape (n_samples,) - The actions selected by the multi-armed bandit model. - probs: List[Dict[ActionId, float]] of shape (n_samples,) - The probabilities of getting a positive reward for each action. - """ - - def get_state(self) -> (str, dict): - """ - Access the complete model internal state, enough to create an exact copy of the same model from it. - Returns - ------- - model_class_name: str - The name of the class of the model. - model_state: dict - The internal state of the model (actions, scores, etc.). - """ - model_name = self.__class__.__name__ - state: dict = self.dict() - return model_name, state - - @validate_call - def _select_epsilon_greedy_action( - self, - p: Union[Dict[ActionId, float], Dict[ActionId, Probability], Dict[ActionId, List[Probability]]], - actions: Optional[Dict[ActionId, Model]] = None, - ) -> ActionId: - """ - Wraps self.strategy.select_action function with epsilon-greedy strategy, - such that with probability epsilon a default_action is selected, - and with probability 1-epsilon the select_action function is triggered to choose action. - If no default_action is provided, a random action is selected. - - Reference: Reinforcement Learning: An Introduction, Ch. 2 (Sutton and Burto, 2018) - https://web.stanford.edu/class/psych209/Readings/SuttonBartoIPRLBook2ndEd.pdf&ved=2ahUKEwjMy8WV9N2HAxVe0gIHHVjjG5sQFnoECEMQAQ&usg=AOvVaw3bKK-Y_1kf6XQVwR-UYrBY - - Parameters - ---------- - p: Union[Dict[ActionId, float], Dict[ActionId, Probability], Dict[ActionId, List[Probability]]] - The dictionary or actions and their sampled probability of getting a positive reward. - For MO strategy, the sampled probability is a list with elements corresponding to the objectives. - actions: Optional[Dict[ActionId, Model]] - The dictionary of actions and their associated Model. - - Returns - ------- - selected_action: ActionId - The selected action. - - Raises - ------ - KeyError - If self.default_action is not present as a key in the probabilities dictionary. - """ - - if self.epsilon: - if self.default_action and self.default_action not in p.keys(): - raise KeyError(f"Default action {self.default_action} not in actions.") - if np.random.binomial(1, self.epsilon): - selected_action = self.default_action if self.default_action else np.random.choice(list(p.keys())) - else: - selected_action = self.strategy.select_action(p=p, actions=actions) - else: - selected_action = self.strategy.select_action(p=p, actions=actions) - return selected_action diff --git a/pybandits/cmab.py b/pybandits/cmab.py index d26a9b9..f34cabc 100644 --- a/pybandits/cmab.py +++ b/pybandits/cmab.py @@ -20,21 +20,16 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from typing import Dict, List, Optional, Set, Tuple, Union +from typing import Dict, List, Optional, Set, Union from numpy import array from numpy.random import choice from numpy.typing import ArrayLike -from pydantic import NonNegativeFloat, PositiveInt, field_validator, validate_call - -from pybandits.base import ActionId, BaseMab, BinaryReward, Float01, Probability -from pybandits.model import ( - BaseBayesianLogisticRegression, - BayesianLogisticRegression, - BayesianLogisticRegressionCC, - create_bayesian_logistic_regression_cc_cold_start, - create_bayesian_logistic_regression_cold_start, -) +from pydantic import field_validator, validate_call + +from pybandits.base import ActionId, BinaryReward, CmabPredictions, Probability +from pybandits.mab import BaseMab +from pybandits.model import BayesianLogisticRegression, BayesianLogisticRegressionCC from pybandits.strategy import ( BestActionIdentification, ClassicBandit, @@ -48,7 +43,7 @@ class BaseCmabBernoulli(BaseMab): Parameters ---------- - actions: Dict[ActionId, BaseBayesianLogisticRegression] + actions: Dict[ActionId, BayesianLogisticRegression] The list of possible actions, and their associated Model. strategy: Strategy The strategy used to select actions. @@ -59,7 +54,7 @@ class BaseCmabBernoulli(BaseMab): bandit strategy. """ - actions: Dict[ActionId, BaseBayesianLogisticRegression] + actions: Dict[ActionId, BayesianLogisticRegression] predict_with_proba: bool predict_actions_randomly: bool @@ -77,7 +72,7 @@ def predict( self, context: ArrayLike, forbidden_actions: Optional[Set[ActionId]] = None, - ) -> Tuple[List[ActionId], List[Dict[ActionId, Probability]]]: + ) -> CmabPredictions: """ Predict actions. @@ -96,6 +91,8 @@ def predict( The actions selected by the multi-armed bandit model. probs: List[Dict[ActionId, Probability]] of shape (n_samples,) The probabilities of getting a positive reward for each action. + ws : List[Dict[ActionId, float]] + The weighted sum of logistic regression logits. """ valid_actions = self._get_valid_actions(forbidden_actions) @@ -165,7 +162,7 @@ def update( If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2): rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...] """ - self._check_update_params(actions=actions, rewards=rewards) + self._validate_update_params(actions=actions, rewards=rewards) if len(context) != len(rewards): raise AttributeError(f"Shape mismatch: actions and rewards should have the same length {len(actions)}.") @@ -209,22 +206,6 @@ class CmabBernoulli(BaseCmabBernoulli): predict_with_proba: bool = False predict_actions_randomly: bool = False - def __init__( - self, - actions: Dict[ActionId, BaseBayesianLogisticRegression], - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, - ): - super().__init__(actions=actions, strategy=ClassicBandit(), epsilon=epsilon, default_action=default_action) - - @classmethod - def from_state(cls, state: dict) -> "CmabBernoulli": - return cls(actions=state["actions"]) - - @validate_call(config=dict(arbitrary_types_allowed=True)) - def update(self, context: ArrayLike, actions: List[ActionId], rewards: List[BinaryReward]): - super().update(context=context, actions=actions, rewards=rewards) - class CmabBernoulliBAI(BaseCmabBernoulli): """ @@ -251,26 +232,7 @@ class CmabBernoulliBAI(BaseCmabBernoulli): predict_with_proba: bool = False predict_actions_randomly: bool = False - def __init__( - self, - actions: Dict[ActionId, BayesianLogisticRegression], - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, - exploit_p: Optional[Float01] = None, - ): - strategy = BestActionIdentification() if exploit_p is None else BestActionIdentification(exploit_p=exploit_p) - super().__init__(actions=actions, strategy=strategy, epsilon=epsilon, default_action=default_action) - - @classmethod - def from_state(cls, state: dict) -> "CmabBernoulliBAI": - return cls(actions=state["actions"], exploit_p=state["strategy"].get("exploit_p", None)) - @validate_call(config=dict(arbitrary_types_allowed=True)) - def update(self, context: ArrayLike, actions: List[ActionId], rewards: List[BinaryReward]): - super().update(context=context, actions=actions, rewards=rewards) - - -# TODO: add tests class CmabBernoulliCC(BaseCmabBernoulli): """ Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling, and Cost Control strategy. @@ -303,163 +265,3 @@ class CmabBernoulliCC(BaseCmabBernoulli): strategy: CostControlBandit predict_with_proba: bool = True predict_actions_randomly: bool = False - - def __init__( - self, - actions: Dict[ActionId, BayesianLogisticRegressionCC], - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, - subsidy_factor: Optional[Float01] = None, - ): - strategy = CostControlBandit() if subsidy_factor is None else CostControlBandit(subsidy_factor=subsidy_factor) - super().__init__(actions=actions, strategy=strategy, epsilon=epsilon, default_action=default_action) - - @classmethod - def from_state(cls, state: dict) -> "CmabBernoulliCC": - return cls(actions=state["actions"], subsidy_factor=state["strategy"].get("subsidy_factor", None)) - - @validate_call(config=dict(arbitrary_types_allowed=True)) - def update(self, context: ArrayLike, actions: List[ActionId], rewards: List[BinaryReward]): - super().update(context=context, actions=actions, rewards=rewards) - - -@validate_call -def create_cmab_bernoulli_cold_start( - action_ids: Set[ActionId], - n_features: PositiveInt, - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, -) -> CmabBernoulli: - """ - Utility function to create a Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling, with default - parameters. Until the very first update the model will predict actions randomly, where each action has equal - probability to be selected. - - Parameters - ---------- - action_ids: Set[ActionId] - The list of possible actions. - n_features: PositiveInt - The number of features expected after in the context matrix. This is also the number of betas of the - Bayesian Logistic Regression model. - epsilon: Optional[Float01] - epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - - Returns - ------- - cmab: CmabBernoulli - Contextual Multi-Armed Bandit with strategy = ClassicBandit - """ - actions = {} - for a in set(action_ids): - actions[a] = create_bayesian_logistic_regression_cold_start(n_betas=n_features) - mab = CmabBernoulli(actions=actions, epsilon=epsilon, default_action=default_action) - mab.predict_actions_randomly = True - return mab - - -@validate_call -def create_cmab_bernoulli_bai_cold_start( - action_ids: Set[ActionId], - n_features: PositiveInt, - exploit_p: Optional[Float01] = None, - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, -) -> CmabBernoulliBAI: - """ - Utility function to create a Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling, and Best Action - Identification strategy, with default parameters. Until the very first update the model will predict actions - randomly, where each action has equal probability to be selected. - - Reference: Analysis of Thompson Sampling for the Multi-armed Bandit Problem (Agrawal and Goyal, 2012) - http://proceedings.mlr.press/v23/agrawal12/agrawal12.pdf - - Parameters - ---------- - action_ids: Set[ActionId] - The list of possible actions. - n_features: PositiveInt - The number of features expected after in the context matrix. This is also the number of betas of the - Bayesian Logistic Regression model. - exploit_p: Float_0_1 (default=0.5) - Number in [0, 1] which specifies the amount of exploitation. - If exploit_p is 1, the bandits always selects the action with highest probability of getting a positive reward, - (it behaves as a Greedy strategy). - If exploit_p is 0, the bandits always select the action with 2nd highest probability of getting a positive - reward. - epsilon: Optional[Float01] - epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - - Returns - ------- - cmab: CmabBernoulliBAI - Contextual Multi-Armed Bandit with strategy = BestActionIdentification - """ - actions = {} - for a in set(action_ids): - actions[a] = create_bayesian_logistic_regression_cold_start(n_betas=n_features) - mab = CmabBernoulliBAI(actions=actions, exploit_p=exploit_p, epsilon=epsilon, default_action=default_action) - mab.predict_actions_randomly = True - return mab - - -@validate_call -def create_cmab_bernoulli_cc_cold_start( - action_ids_cost: Dict[ActionId, NonNegativeFloat], - n_features: PositiveInt, - subsidy_factor: Optional[Float01] = None, - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, -) -> CmabBernoulliCC: - """ - Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling, and Cost Control - strategy, with default parameters. - - The sMAB is extended to include a control of the action cost. Each action is associated with a predefined "cost". - At prediction time, the model considers the actions whose expected rewards is above a pre-defined lower bound. Among - these actions, the one with the lowest associated cost is recommended. The expected reward interval for feasible - actions is defined as [(1-subsidy_factor) * max_p, max_p], where max_p is the highest expected reward sampled value. - - Reference: Thompson Sampling for Contextual Bandit Problems with Auxiliary Safety Constraints (Daulton et al., 2019) - https://arxiv.org/abs/1911.00638 - - Multi-Armed Bandits with Cost Subsidy (Sinha et al., 2021) - https://arxiv.org/abs/2011.01488 - - Parameters - ---------- - action_ids_cost: Dict[ActionId, NonNegativeFloat] - The list of possible actions, and their cost. - n_features: PositiveInt - The number of features expected after in the context matrix. This is also the number of betas of the - Bayesian Logistic Regression model. - subsidy_factor: Optional[Float_0_1], default=0.5 - Number in [0, 1] to define smallest tolerated probability reward, hence the set of feasible actions. - If subsidy_factor is 1, the bandits always selects the action with the minimum cost. - If subsidy_factor is 0, the bandits always selects the action with highest probability of getting a positive - reward (it behaves as a classic Bernoulli bandit). - epsilon: Optional[Float01] - epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - - Returns - ------- - cmab: CmabBernoulliCC - Contextual Multi-Armed Bandit with strategy = CostControl - """ - actions = {} - for a, cost in action_ids_cost.items(): - actions[a] = create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=cost) - mab = CmabBernoulliCC( - actions=actions, subsidy_factor=subsidy_factor, epsilon=epsilon, default_action=default_action - ) - mab.predict_actions_randomly = True - return mab diff --git a/pybandits/consts.py b/pybandits/consts.py new file mode 100644 index 0000000..eba43a4 --- /dev/null +++ b/pybandits/consts.py @@ -0,0 +1 @@ +ACTION_IDS_PREFIX = "action_ids_" diff --git a/pybandits/mab.py b/pybandits/mab.py new file mode 100644 index 0000000..ca7c0fc --- /dev/null +++ b/pybandits/mab.py @@ -0,0 +1,382 @@ +# MIT License +# +# Copyright (c) 2023 Playtika Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +import warnings +from abc import ABC, abstractmethod +from collections import defaultdict +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union, get_args + +import numpy as np +from pydantic import field_validator, model_validator, validate_call + +from pybandits.base import ( + ActionId, + ActionRewardLikelihood, + BinaryReward, + Float01, + Predictions, + PyBanditsBaseModel, +) +from pybandits.consts import ACTION_IDS_PREFIX +from pybandits.model import Model +from pybandits.strategy import Strategy +from pybandits.utils import extract_argument_names_from_function + + +class BaseMab(PyBanditsBaseModel, ABC): + """ + Multi-armed bandit superclass. + + Parameters + ---------- + actions : Dict[ActionId, Model] + The list of possible actions, and their associated Model. + strategy : Strategy + The strategy used to select actions. + epsilon : Optional[Float01], 0 if not specified. + The probability of selecting a random action. + default_action : Optional[ActionId], None if not specified. + The default action to select with a probability of epsilon when using the epsilon-greedy approach. + If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. + """ + + actions: Dict[ActionId, Model] + strategy: Strategy + epsilon: Optional[Float01] = None + default_action: Optional[ActionId] = None + + def __init__( + self, + actions: Dict[ActionId, Model], + epsilon: Optional[Float01] = None, + default_action: Optional[ActionId] = None, + **strategy_kwargs, + ): + if "strategy" in strategy_kwargs: + strategy = strategy_kwargs["strategy"] + if len(strategy_kwargs) > 1: + raise ValueError("strategy should be the only keyword argument.") + else: + strategy_class = self.model_fields["strategy"].annotation + strategy = strategy_class(**strategy_kwargs) + + super().__init__(actions=actions, strategy=strategy, epsilon=epsilon, default_action=default_action) + + ############################################ Instance Input Validators ############################################# + + @field_validator("actions", mode="before") + @classmethod + def at_least_2_actions_are_defined(cls, v): + if len(v) == 0: + raise AttributeError("At least one action should be defined.") + elif len(v) == 1: + warnings.warn("Only a single action was supplied. This MAB will be deterministic.") + return v + + @model_validator(mode="after") + def validate_default_action(self): + if not self.epsilon and self.default_action: + raise AttributeError("A default action should only be defined when epsilon is defined.") + if self.default_action and self.default_action not in self.actions: + raise AttributeError("The default action should be defined in the actions.") + return self + + ############################################# Method Input Validators ############################################## + + def _get_valid_actions(self, forbidden_actions: Optional[Set[ActionId]]) -> Set[ActionId]: + """ + Given a set of forbidden action IDs, return a set of valid action IDs. + + Parameters + ---------- + forbidden_actions: Optional[Set[ActionId]] + The set of forbidden action IDs. + + Returns + ------- + valid_actions: Set[ActionId] + The list of valid (i.e. not forbidden) action IDs. + """ + if forbidden_actions is None: + forbidden_actions = set() + + if not all(a in self.actions.keys() for a in forbidden_actions): + raise ValueError("forbidden_actions contains invalid action IDs.") + valid_actions = set(self.actions.keys()) - forbidden_actions + if len(valid_actions) == 0: + raise ValueError("All actions are forbidden. You must allow at least 1 action.") + if self.default_action and self.default_action not in valid_actions: + raise ValueError("The default action is forbidden.") + + return valid_actions + + def _validate_update_params( + self, actions: List[ActionId], rewards: Union[List[BinaryReward], List[List[BinaryReward]]] + ): + """ + Verify that the given list of action IDs is a subset of the currently defined actions and that + the rewards type matches the strategy type. + + Parameters + ---------- + actions : List[ActionId] + The selected action for each sample. + rewards: List[Union[BinaryReward, List[BinaryReward]]] + The reward for each sample. + """ + invalid = set(actions) - set(self.actions.keys()) + if invalid: + raise AttributeError(f"The following invalid action(s) were specified: {invalid}.") + if len(actions) != len(rewards): + raise AttributeError(f"Shape mismatch: actions and rewards should have the same length {len(actions)}.") + + #################################################################################################################### + + @abstractmethod + @validate_call + def update( + self, actions: List[ActionId], rewards: Union[List[BinaryReward], List[List[BinaryReward]]], *args, **kwargs + ): + """ + Update the multi-armed bandit model. + + actions: List[ActionId] + The selected action for each sample. + rewards: List[Union[BinaryReward, List[BinaryReward]]] + The reward for each sample. + """ + + @abstractmethod + @validate_call + def predict(self, forbidden_actions: Optional[Set[ActionId]] = None) -> Predictions: + """ + Predict actions. + + Parameters + ---------- + forbidden_actions : Optional[Set[ActionId]], default=None + Set of forbidden actions. If specified, the model will discard the forbidden_actions and it will only + consider the remaining allowed_actions. By default, the model considers all actions as allowed_actions. + Note that: actions = allowed_actions U forbidden_actions. + + Returns + ------- + actions: List[ActionId] of shape (n_samples,) + The actions selected by the multi-armed bandit model. + probs: List[Dict[ActionId, Probability]] of shape (n_samples,) + The probabilities of getting a positive reward for each action + ws : List[Dict[ActionId, float]], only relevant for some of the MABs + The weighted sum of logistic regression logits.. + """ + + def get_state(self) -> (str, dict): + """ + Access the complete model internal state, enough to create an exact copy of the same model from it. + Returns + ------- + model_class_name: str + The name of the class of the model. + model_state: dict + The internal state of the model (actions, scores, etc.). + """ + model_name = self.__class__.__name__ + state: dict = self.model_dump() + return model_name, state + + @validate_call + def _select_epsilon_greedy_action( + self, + p: ActionRewardLikelihood, + actions: Optional[Dict[ActionId, Model]] = None, + ) -> ActionId: + """ + Wraps self.strategy.select_action function with epsilon-greedy strategy, + such that with probability epsilon a default_action is selected, + and with probability 1-epsilon the select_action function is triggered to choose action. + If no default_action is provided, a random action is selected. + + Reference: Reinforcement Learning: An Introduction, Ch. 2 (Sutton and Burto, 2018) + https://web.stanford.edu/class/psych209/Readings/SuttonBartoIPRLBook2ndEd.pdf&ved=2ahUKEwjMy8WV9N2HAxVe0gIHHVjjG5sQFnoECEMQAQ&usg=AOvVaw3bKK-Y_1kf6XQVwR-UYrBY + + Parameters + ---------- + p: Union[Dict[ActionId, float], Dict[ActionId, Probability], Dict[ActionId, List[Probability]]] + The dictionary or actions and their sampled probability of getting a positive reward. + For MO strategy, the sampled probability is a list with elements corresponding to the objectives. + actions: Optional[Dict[ActionId, Model]] + The dictionary of actions and their associated Model. + + Returns + ------- + selected_action: ActionId + The selected action. + + Raises + ------ + KeyError + If self.default_action is not present as a key in the probabilities dictionary. + """ + + if self.epsilon: + if self.default_action and self.default_action not in p.keys(): + raise KeyError(f"Default action {self.default_action} not in actions.") + if np.random.binomial(1, self.epsilon): + selected_action = self.default_action or np.random.choice(list(p.keys())) + else: + selected_action = self.strategy.select_action(p=p, actions=actions) + else: + selected_action = self.strategy.select_action(p=p, actions=actions) + return selected_action + + @classmethod + def from_state(cls, state: dict) -> "BaseMab": + """ + Create a new instance of the class from a given model state. + The state can be obtained by applying get_state() to a model. + + Parameters + ---------- + state: dict + The internal state of a model (actions, strategy, etc.) of the same type. + + Returns + ------- + model: BaseMab + The new model instance. + + """ + model_attributes = extract_argument_names_from_function(cls.__init__, True) + strategy_attributes = list(state["strategy"].keys()) + attributes_mapping = {k: state[k] for k in model_attributes if k not in strategy_attributes and k in state} + attributes_mapping.update({k: state["strategy"][k] for k in strategy_attributes}) + return cls(**attributes_mapping) + + @classmethod + def cold_start( + cls, + action_ids: Optional[Set[ActionId]] = None, + epsilon: Optional[Float01] = None, + default_action: Optional[ActionId] = None, + **kwargs, + ) -> "BaseMab": + """ + Factory method to create a Multi-Armed Bandit with Thompson Sampling, with default + parameters. + + Parameters + ---------- + action_ids: Optional[Set[ActionId]] + The list of possible actions. + epsilon: Optional[Float01] + epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. + default_action: Optional[ActionId] + The default action to select with a probability of epsilon when using the epsilon-greedy approach. + If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. + kwargs: Dict[str, Any] + Additional parameters for the mab and for the action model. + + Returns + ------- + mab: BaseMab + Multi-Armed Bandit + """ + action_specific_kwargs, kwargs = cls._extract_action_specific_kwargs(**kwargs) + + # Extract inner_action_ids + inner_action_ids = action_ids or set(action_specific_kwargs.keys()) + if not inner_action_ids: + raise ValueError( + "inner_action_ids should be provided either directly or via keyword argument in the form of " + "action_id_{model argument name} = {action_id: value}." + ) + + # Assign model for each action + action_model_cold_start, action_general_kwargs = cls._extract_action_model_class_and_attributes(**kwargs) + actions = {} + for a in inner_action_ids: + actions[a] = action_model_cold_start(**action_general_kwargs, **action_specific_kwargs.get(a, {})) + + # Instantiate the MAB + strategy_kwargs = {k: kwargs[k] for k in kwargs.keys() if k not in action_general_kwargs.keys()} + strategy_class = cls.model_fields["strategy"].annotation + strategy = strategy_class(**strategy_kwargs) + mab = cls(actions=actions, strategy=strategy, epsilon=epsilon, default_action=default_action) + # For contextual multi-armed bandit, until the very first update the model will predict actions randomly, + # where each action has equal probability to be selected. + if hasattr(mab, "predict_actions_randomly"): + mab.predict_actions_randomly = True + return mab + + @staticmethod + def _extract_action_specific_kwargs(**kwargs) -> Tuple[Dict[str, Dict], Dict[str, Any]]: + """ + Utility function to extract kwargs that are specific for each action when constructing the action model. + + Parameters + ---------- + kwargs : Dict[str, Any] + Additional parameters for the mab and for the action model. + + Returns + ------- + action_specific_kwargs : Dict[str, Dict] + Dictionary of actions and the parameters of their associated model. + kwargs : Dict[str, Any] + Dictionary of parameters and their values, without the action_specific_kwargs. + """ + action_specific_kwargs = defaultdict(dict) + for keyword in list(kwargs): + argument = kwargs[keyword] + if keyword.startswith(ACTION_IDS_PREFIX) and type(argument) is dict: + kwargs.pop(keyword) + inner_keyword = keyword.split(ACTION_IDS_PREFIX)[1] + for action_id, value in argument.items(): + action_specific_kwargs[action_id][inner_keyword] = value + return dict(action_specific_kwargs), kwargs + + @classmethod + def _extract_action_model_class_and_attributes(cls, **kwargs) -> Tuple[Callable, Dict[str, Dict]]: + """ + Utility function to extract kwargs that are specific for each action when constructing the action model. + + Parameters + ---------- + kwargs : Dict[str, Any] + Additional parameters for the mab and for the action model. + + Returns + ------- + action_model_cold_start : Callable + Function handle for factoring the required action model. + action_general_kwargs : Dict[str, any] + Dictionary of parameters and their values for the action model. + """ + action_model_class = get_args(cls.model_fields["actions"].annotation)[1] + if hasattr(action_model_class, "cold_start"): + action_model_cold_start_init = action_model_cold_start = action_model_class.cold_start + else: + action_model_cold_start_init = action_model_class.__init__ + action_model_cold_start = action_model_class + + action_model_attributes = extract_argument_names_from_function(action_model_cold_start_init, True) + + action_general_kwargs = {k: kwargs[k] for k in action_model_attributes if k in kwargs.keys()} + return action_model_cold_start, action_general_kwargs diff --git a/pybandits/model.py b/pybandits/model.py index c94ba1f..29becce 100644 --- a/pybandits/model.py +++ b/pybandits/model.py @@ -21,8 +21,9 @@ # SOFTWARE. +from abc import ABC, abstractmethod from random import betavariate -from typing import List, Tuple +from typing import Any, List, Tuple from numpy import array, c_, exp, insert, mean, multiply, ones, sqrt, std from numpy.typing import ArrayLike @@ -41,7 +42,25 @@ from pytensor.tensor import dot from scipy.stats import t -from pybandits.base import BinaryReward, Model, Probability, PyBanditsBaseModel +from pybandits.base import BinaryReward, Probability, PyBanditsBaseModel + + +class Model(PyBanditsBaseModel, ABC): + """ + Class to model the prior distributions. + """ + + @abstractmethod + def sample_proba(self) -> Probability: + """ + Sample the probability of getting a positive reward. + """ + + @abstractmethod + def update(self, rewards: List[Any]): + """ + Update the model parameters. + """ class BaseBeta(Model): @@ -131,7 +150,7 @@ class BetaCC(BaseBeta): cost: NonNegativeFloat -class BaseBetaMO(Model): +class BetaMO(Model): """ Beta Distribution model for Bernoulli multi-armed bandits with multi-objectives. @@ -173,19 +192,36 @@ def update(self, rewards: List[List[BinaryReward]]): for i, counter in enumerate(self.counters): counter.update([r[i] for r in rewards]) + @classmethod + def cold_start(cls, n_objectives: PositiveInt, **kwargs) -> "BetaMO": + """ + Utility function to create a Bayesian Logistic Regression model or child model with cost control, + with default parameters. -class BetaMO(BaseBetaMO): - """ - Beta Distribution model for Bernoulli multi-armed bandits with multi-objectives. + It is modeled as: - Parameters - ---------- - counters: List[Beta] of shape (n_objectives,) - List of Beta distributions. - """ + y = sigmoid(alpha + beta1 * x1 + beta2 * x2 + ... + betaN * xN) + + where the alpha and betas coefficients are Student's t-distributions. + + Parameters + ---------- + n_betas : PositiveInt + The number of betas of the Bayesian Logistic Regression model. This is also the number of features expected + after in the context matrix. + kwargs: Dict[str, Any] + Additional arguments for the Bayesian Logistic Regression child model. + + Returns + ------- + blr: BayesianLogisticRegrssion + The Bayesian Logistic Regression model. + """ + counters = n_objectives * [Beta()] + return cls(counters=counters, **kwargs) -class BetaMOCC(BaseBetaMO): +class BetaMOCC(BetaMO): """ Beta Distribution model for Bernoulli multi-armed bandits with multi-objectives and cost control. @@ -219,7 +255,7 @@ class StudentT(PyBanditsBaseModel): nu: confloat(allow_inf_nan=False) = 5.0 -class BaseBayesianLogisticRegression(Model): +class BayesianLogisticRegression(Model): """ Base Bayesian Logistic Regression model. @@ -240,7 +276,7 @@ class BaseBayesianLogisticRegression(Model): """ alpha: StudentT - betas: List[StudentT] = Field(..., min_items=1) + betas: List[StudentT] = Field(..., min_length=1) @validate_call(config=dict(arbitrary_types_allowed=True)) def check_context_matrix(self, context: ArrayLike): @@ -379,29 +415,35 @@ def update( self.betas[i].mu = mean(trace["beta" + str(i)]) self.betas[i].sigma = std(trace["beta" + str(i)], ddof=1) + @classmethod + def cold_start(cls, n_features: PositiveInt, **kwargs) -> "BayesianLogisticRegression": + """ + Utility function to create a Bayesian Logistic Regression model or child model with cost control, + with default parameters. -class BayesianLogisticRegression(BaseBayesianLogisticRegression): - """ - Bayesian Logistic Regression model. + It is modeled as: - It is modeled as: + y = sigmoid(alpha + beta1 * x1 + beta2 * x2 + ... + betaN * xN) - y = sigmoid(alpha + beta1 * x1 + beta2 * x2 + ... + betaN * xN) + where the alpha and betas coefficients are Student's t-distributions. - where the alpha and betas coefficients are Student's t-distributions. + Parameters + ---------- + n_features : PositiveInt + The number of betas of the Bayesian Logistic Regression model. This is also the number of features expected + after in the context matrix. + kwargs: Dict[str, Any] + Additional arguments for the Bayesian Logistic Regression child model. - Parameters - ---------- - alpha: StudentT - Student's t-distribution of the alpha coefficient. - betas: StudentT - Student's t-distributions of the betas coefficients. - params_sample: Dict - Parameters for the function pymc.sample() - """ + Returns + ------- + blr: BayesianLogisticRegrssion + The Bayesian Logistic Regression model. + """ + return cls(alpha=StudentT(), betas=[StudentT() for _ in range(n_features)], **kwargs) -class BayesianLogisticRegressionCC(BaseBayesianLogisticRegression): +class BayesianLogisticRegressionCC(BayesianLogisticRegression): """ Bayesian Logistic Regression model with cost control. @@ -424,55 +466,3 @@ class BayesianLogisticRegressionCC(BaseBayesianLogisticRegression): """ cost: NonNegativeFloat - - -def create_bayesian_logistic_regression_cold_start(n_betas: PositiveInt) -> BayesianLogisticRegression: - """ - Utility function to create a Bayesian Logistic Regression model, with default parameters. - - It is modeled as: - - y = sigmoid(alpha + beta1 * x1 + beta2 * x2 + ... + betaN * xN) - - where the alpha and betas coefficients are Student's t-distributions. - - Parameters - ---------- - n_betas : PositiveInt - The number of betas of the Bayesian Logistic Regression model. This is also the number of features expected - after in the context matrix. - - Returns - ------- - blr: BayesianLogisticRegression - The Bayesian Logistic Regression model. - """ - return BayesianLogisticRegression(alpha=StudentT(), betas=[StudentT() for _ in range(n_betas)]) - - -def create_bayesian_logistic_regression_cc_cold_start( - n_betas: PositiveInt, cost: NonNegativeFloat -) -> BayesianLogisticRegressionCC: - """ - Utility function to create a Bayesian Logistic Regression model with cost control, with default parameters. - - It is modeled as: - - y = sigmoid(alpha + beta1 * x1 + beta2 * x2 + ... + betaN * xN) - - where the alpha and betas coefficients are Student's t-distributions. - - Parameters - ---------- - n_betas : PositiveInt - The number of betas of the Bayesian Logistic Regression model. This is also the number of features expected - after in the context matrix. - cost: NonNegativeFloat - Cost associated to the Bayesian Logistic Regression model. - - Returns - ------- - blr: BayesianLogisticRegressionCC - The Bayesian Logistic Regression model. - """ - return BayesianLogisticRegressionCC(alpha=StudentT(), betas=[StudentT() for _ in range(n_betas)], cost=cost) diff --git a/pybandits/smab.py b/pybandits/smab.py index 65e4bb1..00ded40 100644 --- a/pybandits/smab.py +++ b/pybandits/smab.py @@ -20,26 +20,27 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. + from collections import defaultdict -from typing import Dict, List, Optional, Set, Tuple, Union +from typing import Dict, List, Optional, Set, Union -from pydantic import NonNegativeFloat, PositiveInt, field_validator, validate_call +from pydantic import PositiveInt, field_validator, validate_call from pybandits.base import ( ActionId, - BaseMab, BinaryReward, - Float01, Probability, - Strategy, + SmabPredictions, ) -from pybandits.model import BaseBeta, BaseBetaMO, Beta, BetaCC, BetaMO, BetaMOCC +from pybandits.mab import BaseMab +from pybandits.model import BaseBeta, Beta, BetaCC, BetaMO, BetaMOCC from pybandits.strategy import ( BestActionIdentification, ClassicBandit, CostControlBandit, MultiObjectiveBandit, MultiObjectiveCostControlBandit, + Strategy, ) @@ -62,7 +63,7 @@ def predict( self, n_samples: PositiveInt = 1, forbidden_actions: Optional[Set[ActionId]] = None, - ) -> Tuple[List[ActionId], List[Dict[ActionId, Probability]]]: + ) -> SmabPredictions: """ Predict actions. @@ -95,7 +96,7 @@ def predict( return selected_actions, probs @validate_call - def update(self, actions: List[ActionId], rewards: List[Union[BinaryReward, List[BinaryReward]]]): + def update(self, actions: List[ActionId], rewards: Union[List[BinaryReward], List[List[BinaryReward]]]): """ Update the stochastic Bernoulli bandit given the list of selected actions and their corresponding binary rewards. @@ -111,7 +112,8 @@ def update(self, actions: List[ActionId], rewards: List[Union[BinaryReward, List If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2): rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...] """ - self._check_update_params(actions=actions, rewards=rewards) + + self._validate_update_params(actions=actions, rewards=rewards) rewards_dict = defaultdict(list) @@ -140,22 +142,6 @@ class SmabBernoulli(BaseSmabBernoulli): actions: Dict[ActionId, Beta] strategy: ClassicBandit - def __init__( - self, - actions: Dict[ActionId, Beta], - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, - ): - super().__init__(actions=actions, strategy=ClassicBandit(), epsilon=epsilon, default_action=default_action) - - @classmethod - def from_state(cls, state: dict) -> "SmabBernoulli": - return cls(actions=state["actions"]) - - @validate_call - def update(self, actions: List[ActionId], rewards: List[BinaryReward]): - super().update(actions=actions, rewards=rewards) - class SmabBernoulliBAI(BaseSmabBernoulli): """ @@ -175,24 +161,6 @@ class SmabBernoulliBAI(BaseSmabBernoulli): actions: Dict[ActionId, Beta] strategy: BestActionIdentification - def __init__( - self, - actions: Dict[ActionId, Beta], - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, - exploit_p: Optional[Float01] = None, - ): - strategy = BestActionIdentification() if exploit_p is None else BestActionIdentification(exploit_p=exploit_p) - super().__init__(actions=actions, strategy=strategy, epsilon=epsilon, default_action=default_action) - - @classmethod - def from_state(cls, state: dict) -> "SmabBernoulliBAI": - return cls(actions=state["actions"], exploit_p=state["strategy"].get("exploit_p", None)) - - @validate_call - def update(self, actions: List[ActionId], rewards: List[BinaryReward]): - super().update(actions=actions, rewards=rewards) - class SmabBernoulliCC(BaseSmabBernoulli): """ @@ -220,24 +188,6 @@ class SmabBernoulliCC(BaseSmabBernoulli): actions: Dict[ActionId, BetaCC] strategy: CostControlBandit - def __init__( - self, - actions: Dict[ActionId, BetaCC], - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, - subsidy_factor: Optional[Float01] = None, - ): - strategy = CostControlBandit() if subsidy_factor is None else CostControlBandit(subsidy_factor=subsidy_factor) - super().__init__(actions=actions, strategy=strategy, epsilon=epsilon, default_action=default_action) - - @classmethod - def from_state(cls, state: dict) -> "SmabBernoulliCC": - return cls(actions=state["actions"], subsidy_factor=state["strategy"].get("subsidy_factor", None)) - - @validate_call - def update(self, actions: List[ActionId], rewards: List[BinaryReward]): - super().update(actions=actions, rewards=rewards) - class BaseSmabBernoulliMO(BaseSmabBernoulli): """ @@ -252,21 +202,17 @@ class BaseSmabBernoulliMO(BaseSmabBernoulli): The strategy used to select actions. """ - actions: Dict[ActionId, BaseBetaMO] + actions: Dict[ActionId, BetaMO] strategy: Strategy @field_validator("actions", mode="after") @classmethod - def all_actions_have_same_number_of_objectives(cls, actions: Dict[ActionId, BaseBetaMO]): + def all_actions_have_same_number_of_objectives(cls, actions: Dict[ActionId, BetaMO]): n_objs_per_action = [len(beta.counters) for beta in actions.values()] if len(set(n_objs_per_action)) != 1: raise ValueError("All actions should have the same number of objectives") return actions - @validate_call - def update(self, actions: List[ActionId], rewards: List[List[BinaryReward]]): - super().update(actions=actions, rewards=rewards) - class SmabBernoulliMO(BaseSmabBernoulliMO): """ @@ -291,20 +237,6 @@ class SmabBernoulliMO(BaseSmabBernoulliMO): actions: Dict[ActionId, BetaMO] strategy: MultiObjectiveBandit - def __init__( - self, - actions: Dict[ActionId, Beta], - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, - ): - super().__init__( - actions=actions, strategy=MultiObjectiveBandit(), epsilon=epsilon, default_action=default_action - ) - - @classmethod - def from_state(cls, state: dict) -> "SmabBernoulliMO": - return cls(actions=state["actions"]) - class SmabBernoulliMOCC(BaseSmabBernoulliMO): """ @@ -324,217 +256,3 @@ class SmabBernoulliMOCC(BaseSmabBernoulliMO): actions: Dict[ActionId, BetaMOCC] strategy: MultiObjectiveCostControlBandit - - def __init__( - self, - actions: Dict[ActionId, Beta], - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, - ): - super().__init__( - actions=actions, strategy=MultiObjectiveCostControlBandit(), epsilon=epsilon, default_action=default_action - ) - - @classmethod - def from_state(cls, state: dict) -> "SmabBernoulliMOCC": - return cls(actions=state["actions"]) - - -@validate_call -def create_smab_bernoulli_cold_start( - action_ids: Set[ActionId], epsilon: Optional[Float01] = None, default_action: Optional[ActionId] = None -) -> SmabBernoulli: - """ - Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling, with default - parameters. - - Parameters - ---------- - action_ids: Set[ActionId] - The list of possible actions. - epsilon: Optional[Float01] - epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - - Returns - ------- - smab: SmabBernoulli - Stochastic Multi-Armed Bandit with strategy = ClassicBandit - """ - actions = {} - for a in set(action_ids): - actions[a] = Beta() - return SmabBernoulli(actions=actions, epsilon=epsilon, default_action=default_action) - - -@validate_call -def create_smab_bernoulli_bai_cold_start( - action_ids: Set[ActionId], - exploit_p: Optional[Float01] = None, - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, -) -> SmabBernoulliBAI: - """ - Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling, and Best Action - Identification strategy, with default parameters. - - Reference: Analysis of Thompson Sampling for the Multi-armed Bandit Problem (Agrawal and Goyal, 2012) - http://proceedings.mlr.press/v23/agrawal12/agrawal12.pdf - - Parameters - ---------- - action_ids: Set[ActionId] - The list of possible actions. - exploit_p: Float_0_1 (default=0.5) - Number in [0, 1] which specifies the amount of exploitation. - If exploit_p is 1, the bandits always selects the action with highest probability of getting a positive reward, - (it behaves as a Greedy strategy). - If exploit_p is 0, the bandits always select the action with 2nd highest probability of getting a positive - reward. - epsilon: Optional[Float01] - epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - - Returns - ------- - smab: SmabBernoulliBAI - Stochastic Multi-Armed Bandit with strategy = BestActionIdentification - """ - actions = {} - for a in set(action_ids): - actions[a] = Beta() - return SmabBernoulliBAI(actions=actions, epsilon=epsilon, default_action=default_action, exploit_p=exploit_p) - - -@validate_call -def create_smab_bernoulli_cc_cold_start( - action_ids_cost: Dict[ActionId, NonNegativeFloat], - subsidy_factor: Optional[Float01] = None, - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, -) -> SmabBernoulliCC: - """ - Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling, and Cost Control - strategy, with default parameters. - - The sMAB is extended to include a control of the action cost. Each action is associated with a predefined "cost". - At prediction time, the model considers the actions whose expected rewards is above a pre-defined lower bound. Among - these actions, the one with the lowest associated cost is recommended. The expected reward interval for feasible - actions is defined as [(1-subsidy_factor) * max_p, max_p], where max_p is the highest expected reward sampled value. - - Reference: Thompson Sampling for Contextual Bandit Problems with Auxiliary Safety Constraints (Daulton et al., 2019) - https://arxiv.org/abs/1911.00638 - - Multi-Armed Bandits with Cost Subsidy (Sinha et al., 2021) - https://arxiv.org/abs/2011.01488 - - Parameters - ---------- - action_ids_cost: Dict[ActionId, NonNegativeFloat] - The list of possible actions, and their cost. - subsidy_factor: Optional[Float_0_1], default=0.5 - Number in [0, 1] to define smallest tolerated probability reward, hence the set of feasible actions. - If subsidy_factor is 1, the bandits always selects the action with the minimum cost. - If subsidy_factor is 0, the bandits always selects the action with highest probability of getting a positive - reward (it behaves as a classic Bernoulli bandit). - epsilon: Optional[Float01] - epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - - Returns - ------- - smab: SmabBernoulliCC - Stochastic Multi-Armed Bandit with strategy = CostControlBandit - """ - actions = {} - for a, cost in action_ids_cost.items(): - actions[a] = BetaCC(cost=cost) - return SmabBernoulliCC( - actions=actions, epsilon=epsilon, default_action=default_action, subsidy_factor=subsidy_factor - ) - - -@validate_call -def create_smab_bernoulli_mo_cold_start( - action_ids: Set[ActionId], - n_objectives: PositiveInt, - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, -) -> SmabBernoulliMO: - """ - Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling, and Multi-Objectives - strategy, with default parameters. - - The reward pertaining to an action is a multidimensional vector instead of a scalar value. In this setting, - different actions are compared according to Pareto order between their expected reward vectors, and those actions - whose expected rewards are not inferior to that of any other actions are called Pareto optimal actions, all of which - constitute the Pareto front. - - Reference: Thompson Sampling for Multi-Objective Multi-Armed Bandits Problem (Yahyaa and Manderick, 2015) - https://www.researchgate.net/publication/272823659_Thompson_Sampling_for_Multi-Objective_Multi-Armed_Bandits_Problem - - Parameters - ---------- - action_ids: Set[ActionId] - The list of possible actions. - n_objectives: PositiveInt - The number of objectives to optimize. The bandit assumes the same number of objectives for all actions. - epsilon: Optional[Float01] - epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - - Returns - ------- - smab: SmabBernoulliMO - Stochastic Multi-Armed Bandit with strategy = MultiObjectiveBandit - """ - actions = {} - for a in set(action_ids): - actions[a] = BetaMO(counters=n_objectives * [Beta()]) - return SmabBernoulliMO(actions=actions, epsilon=epsilon, default_action=default_action) - - -@validate_call -def create_smab_bernoulli_mo_cc_cold_start( - action_ids_cost: Dict[ActionId, NonNegativeFloat], - n_objectives: PositiveInt, - epsilon: Optional[Float01] = None, - default_action: Optional[ActionId] = None, -) -> SmabBernoulliMOCC: - """ - Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling implementation for - Multi-Objective (MO) with Cost Control (CC) strategy, with default parameters. - - This Bandit allows the reward to be a multidimensional vector and include a control of the action cost. It merges - the Multi-Objective and Cost Control strategies. - - Parameters - ---------- - action_ids_cost: Dict[ActionId, NonNegativeFloat] - The list of possible actions, and their cost. - n_objectives: PositiveInt - The number of objectives to optimize. The bandit assumes the same number of objectives for all actions. - epsilon: Optional[Float01] - epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] - The default action to select with a probability of epsilon when using the epsilon-greedy approach. - If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - - - Returns - ------- - smab: SmabBernoulliMO - Stochastic Multi-Armed Bandit with strategy = MultiObjectiveBandit - """ - actions = {} - for a, cost in action_ids_cost.items(): - actions[a] = BetaMOCC(counters=n_objectives * [Beta()], cost=cost) - return SmabBernoulliMOCC(actions=actions, epsilon=epsilon, default_action=default_action) diff --git a/pybandits/strategy.py b/pybandits/strategy.py index bc225f2..383b514 100644 --- a/pybandits/strategy.py +++ b/pybandits/strategy.py @@ -20,15 +20,59 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from abc import ABC, abstractmethod from random import random -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional, TypeVar, Union import numpy as np -from pydantic import validate_call +from pydantic import field_validator, validate_call from scipy.stats import ttest_ind_from_stats -from pybandits.base import ActionId, Float01, Model, Probability, Strategy -from pybandits.model import Beta, BetaMOCC +from pybandits.base import ActionId, Float01, Probability, PyBanditsBaseModel +from pybandits.model import Beta, BetaMOCC, Model + +_Self = TypeVar("_Self", bound="Strategy") + + +class Strategy(PyBanditsBaseModel, ABC): + """ + Strategy to select actions in multi-armed bandits. + """ + + def _with_argument(self, argument_name: str, argument_value: Any) -> _Self: + """ + Instantiate a mutated strategy with an altered argument_value for argument_name. + + Parameters + ---------- + argument_name: str + The name of the argument. + argument_value: Any + The value of the argument. + + Returns + ------- + mutated_strategy: Strategy + The mutated strategy. + """ + mutated_strategy = self.model_copy(update={argument_name: argument_value}, deep=True) + return mutated_strategy + + @abstractmethod + def select_action(self, p: Dict[ActionId, Probability], actions: Optional[Dict[ActionId, Model]]) -> ActionId: + """ + Select the action. + """ + + @classmethod + @validate_call + def numerize_field(cls, v, field_name: str): + return v if v is not None else cls.model_fields[field_name].default + + @classmethod + @validate_call + def get_expected_value_from_state(cls, state: Dict[str, Any], field_name: str) -> float: + return cls.numerize_field(state["strategy"].get(field_name), field_name) class ClassicBandit(Strategy): @@ -53,8 +97,10 @@ def select_action( Parameters ---------- - p: Dict[ActionId, Probability] - The dictionary or actions and their sampled probability of getting a positive reward. + p : Dict[ActionId, Probability] + The dictionary of actions and their sampled probability of getting a positive reward. + actions : Optional[Dict[ActionId, Model]] + The dictionary of actions and their associated model. Returns ------- @@ -73,32 +119,44 @@ class BestActionIdentification(Strategy): Parameters ---------- - exploit_p: Float_0_1 (default=0.5) + exploit_p: Optional[Float01], 0.5 if not specified Tuning parameter taking value in [0, 1] which specifies the probability of selecting the best or an alternative action. - If exploit_p is 1, the bandits always selects the action with highest probability of getting a positive reward, - (it behaves as a Greedy strategy). - If exploit_p is 0, the bandits always select the action with 2nd highest probability of getting a positive + If exploit_p is 1, the bandit always selects the action with the highest probability of + getting a positive reward. That is, it behaves as a Greedy strategy. + If exploit_p is 0, the bandit always select the action with 2nd highest probability of getting a positive reward. """ - exploit_p: Float01 = 0.5 + exploit_p: Optional[Float01] = 0.5 + + @field_validator("exploit_p", mode="before") + @classmethod + def numerize_exploit_p(cls, v): + return cls.numerize_field(v, "exploit_p") @validate_call - def set_exploit_p(self, exploit_p: Float01): + def with_exploit_p(self, exploit_p: Optional[Float01]) -> _Self: """ - Set exploit_p. + Instantiate a mutated cost control bandit strategy with an altered subsidy factor. Parameters ---------- - exploit_p: Float_0_1 (default=0.5) - Number in [0, 1] which specifies the amount of exploitation. - If exploit_p is 1, the bandits always selects the action with highest probability of getting a positive - reward (it behaves as a Greedy strategy). - If exploit_p is 0, the bandits always select the action with 2nd highest probability of getting a positive + exploit_p: Optional[Float01], 0.5 if not specified + Tuning parameter taking value in [0, 1] which specifies the probability of selecting the best or an alternative + action. + If exploit_p is 1, the bandit always selects the action with the highest probability of + getting a positive reward. That is, it behaves as a Greedy strategy. + If exploit_p is 0, the bandit always select the action with 2nd highest probability of getting a positive reward. + + Returns + ------- + mutated_best_action_identification : BestActionIdentification + The mutated best action identification strategy. """ - self.exploit_p = exploit_p + mutated_best_action_identification = self._with_argument("exploit_p", exploit_p) + return mutated_best_action_identification @validate_call def select_action( @@ -113,8 +171,10 @@ def select_action( Parameters ---------- - p: Dict[ActionId, Probability] - The dictionary or actions and their sampled probability of getting a positive reward. + p : Dict[ActionId, Probability] + The dictionary of actions and their sampled probability of getting a positive reward. + actions : Optional[Dict[ActionId, Model]] + The dictionary of actions and their associated model. Returns ------- @@ -167,7 +227,56 @@ def compare_best_actions(self, actions: Dict[ActionId, Beta]) -> float: return pvalue -class CostControlBandit(Strategy): +class CostControlStrategy(Strategy, ABC): + """ + Cost Control (CC) strategy for multi-armed bandits. + + Bandits are extended to include a control of the action cost. Each action is associated with a predefined "cost". + """ + + @classmethod + @validate_call + def _average(cls, p_of_action: Union[Probability, List[Probability]]): + return np.mean(p_of_action) + + @classmethod + @validate_call + def _evaluate_and_select( + cls, + p: Union[Dict[ActionId, Probability], Dict[ActionId, List[Probability]]], + actions: Dict[ActionId, Model], + feasible_actions: List[ActionId], + ) -> ActionId: + """ + Evaluate the feasible actions and select the one with the minimum cost. + + Parameters + ---------- + p: Union[Dict[ActionId, Probability], Dict[ActionId, List[Probability]]] + The dictionary of actions and their sampled probability of getting a positive reward. + actions: Dict[ActionId, Model] + The dictionary of actions and their associated model. + feasible_actions: List[ActionId] + The list of feasible actions. + + Returns + ------- + selected_action: ActionId + The selected action. + """ + # feasible actions enriched with their characteristics (cost, np.mean(probabilities), action_id) + # the negative probability ensures that if we order the actions based on their minimum values the one with + # higher probability will be selected + sortable_actions = [(actions[a].cost, -cls._average(p[a]), a) for a in feasible_actions] + + # select the action with the min cost (and the highest mean of probabilities in case of cost equality) + _, _, selected_action = sorted(sortable_actions)[0] + + # return cheapest action from the set of feasible actions + return selected_action + + +class CostControlBandit(CostControlStrategy): """ Cost Control (CC) strategy for multi-armed bandits. @@ -185,18 +294,40 @@ class CostControlBandit(Strategy): Parameters ---------- - subsidy_factor: Optional[Float_0_1], default=0.5 + subsidy_factor: Optional[Float01], 0.5 if not specified Number in [0, 1] to define smallest tolerated probability reward, hence the set of feasible actions. If subsidy_factor is 1, the bandits always selects the action with the minimum cost. If subsidy_factor is 0, the bandits always selects the action with highest probability of getting a positive reward (it behaves as a classic Bernoulli bandit). """ - subsidy_factor: Float01 = 0.5 + subsidy_factor: Optional[Float01] = 0.5 + + @field_validator("subsidy_factor", mode="before") + @classmethod + def numerize_subsidy_factor(cls, v): + return cls.numerize_field(v, "subsidy_factor") @validate_call - def set_subsidy_factor(self, subsidy_factor: Float01): - self.subsidy_factor = subsidy_factor + def with_subsidy_factor(self, subsidy_factor: Optional[Float01]) -> _Self: + """ + Instantiate a mutated cost control bandit strategy with an altered subsidy factor. + + Parameters + ---------- + subsidy_factor : Optional[Float01], 0.5 if not specified + Number in [0, 1] to define smallest tolerated probability reward, hence the set of feasible actions. + If subsidy_factor is 1, the bandits always selects the action with the minimum cost. + If subsidy_factor is 0, the bandits always selects the action with highest probability of getting a positive + reward (it behaves as a classic Bernoulli bandit). + + Returns + ------- + mutated_cost_control_bandit : CostControlBandit + The mutated cost control bandit strategy. + """ + mutated_cost_control_bandit = self._with_argument("subsidy_factor", subsidy_factor) + return mutated_cost_control_bandit @validate_call def select_action(self, p: Dict[ActionId, Probability], actions: Dict[ActionId, Model]) -> ActionId: @@ -223,65 +354,63 @@ def select_action(self, p: Dict[ActionId, Probability], actions: Dict[ActionId, # define the set of feasible actions feasible_actions = [a for a in p.keys() if p[a] >= (1 - self.subsidy_factor) * max_p] - # feasible actions enriched with their characteristics (cost, -probability, action_id) - # the negative probability ensures that if we order the actions based on their minimum values the one with - # higher proba will be selected - sortable_actions = [(actions[a].cost, -p[a], a) for a in feasible_actions] - - # select the action with the cheapest cost (and the highest probability in case of cost equality) - _, _, selected_action = sorted(sortable_actions)[0] - - # return cheapest action from the set of feasible actions + selected_action = self._evaluate_and_select(p, actions, feasible_actions) return selected_action -@validate_call -def get_pareto_front(p: Dict[ActionId, List[Probability]]) -> List[ActionId]: +class MultiObjectiveStrategy(Strategy, ABC): """ - Create Pareto optimal set of actions (Pareto front) A* identified as actions that are not dominated by any action - out of the set A*. - - Parameters: - ----------- - p: Dict[ActionId, Probability] - The dictionary or actions and their sampled probability of getting a positive reward for each objective. - - Return - ------ - pareto_front: set - The list of Pareto optimal actions + Multi Objective Strategy to select actions in multi-armed bandits. """ - # store non dominated actions - pareto_front = [] - for this_action in p.keys(): - is_pareto = True # we assume that action is Pareto Optimal until proven otherwise - other_actions = [a for a in p.keys() if a != this_action] + @classmethod + @validate_call + def get_pareto_front(cls, p: Dict[ActionId, List[Probability]]) -> List[ActionId]: + """ + Create Pareto optimal set of actions (Pareto front) A* identified as actions that are not dominated by + any action out of the set A*. + + Parameters: + ----------- + p: Dict[ActionId, Probability] + The dictionary or actions and their sampled probability of getting a positive reward for each objective. + + Return + ------ + pareto_front: set + The list of Pareto optimal actions + """ + # store non dominated actions + pareto_front = [] + + for this_action in p.keys(): + is_pareto = True # we assume that action is Pareto Optimal until proven otherwise + other_actions = [a for a in p.keys() if a != this_action] - for other_action in other_actions: - # check if this_action is not dominated by other_action based on - # multiple objectives reward prob vectors - is_dominated = not ( - # an action cannot be dominated by an identical one - (p[this_action] == p[other_action]) - # otherwise, apply the classical definition - or any(p[this_action][i] > p[other_action][i] for i in range(len(p[this_action]))) - ) + for other_action in other_actions: + # check if this_action is not dominated by other_action based on + # multiple objectives reward prob vectors + is_dominated = not ( + # an action cannot be dominated by an identical one + (p[this_action] == p[other_action]) + # otherwise, apply the classical definition + or any(p[this_action][i] > p[other_action][i] for i in range(len(p[this_action]))) + ) - if is_dominated: - # this_action dominated by at least one other_action, - # this_action is not pareto optimal - is_pareto = False - break + if is_dominated: + # this_action dominated by at least one other_action, + # this_action is not pareto optimal + is_pareto = False + break - if is_pareto: - # this_action is pareto optimal - pareto_front.append(this_action) + if is_pareto: + # this_action is pareto optimal + pareto_front.append(this_action) - return pareto_front + return pareto_front -class MultiObjectiveBandit(Strategy): +class MultiObjectiveBandit(MultiObjectiveStrategy): """ Multi-Objective (MO) strategy for multi-armed bandits. @@ -292,11 +421,6 @@ class MultiObjectiveBandit(Strategy): Reference: Thompson Sampling for Multi-Objective Multi-Armed Bandits Problem (Yahyaa and Manderick, 2015) https://www.researchgate.net/publication/272823659_Thompson_Sampling_for_Multi-Objective_Multi-Armed_Bandits_Problem - - Parameters - ---------- - n_objectives: int - Number of objectives to be solved by the bandit (n_objectives must be >= 1). """ @validate_call @@ -316,20 +440,15 @@ def select_action(self, p: Dict[ActionId, List[Probability]], **kwargs) -> Actio selected_action: ActionId The selected action. """ - return np.random.choice(get_pareto_front(p=p)) + return np.random.choice(self.get_pareto_front(p=p)) -class MultiObjectiveCostControlBandit(Strategy): +class MultiObjectiveCostControlBandit(MultiObjectiveStrategy, CostControlStrategy): """ Multi-Objective (MO) with Cost Control (CC) strategy for multi-armed bandits. This strategy allows the reward to be a multidimensional vector and include a control of the action cost. It merges the Multi-Objective and Cost Control strategies. - - Parameters - ---------- - n_objectives: int - Number of objectives to be solved by the bandit (n_objectives must be >= 1) """ @validate_call @@ -349,13 +468,7 @@ def select_action(self, p: Dict[ActionId, List[Probability]], actions: Dict[Acti selected_action: ActionId The selected action. """ - pareto_set = get_pareto_front(p=p) + pareto_set = self.get_pareto_front(p=p) - # feasible actions enriched with their characteristics (cost, np.mean(probabilities), action_id) - sortable_actions = [(actions[a].cost, -np.mean(p[a]), a) for a in pareto_set] - - # select the action with the min cost (and the highest mean of probabilities in case of cost equality) - _, _, selected_action = sorted(sortable_actions)[0] - - # return cheapest action from the set of feasible actions + selected_action = self._evaluate_and_select(p, actions, pareto_set) return selected_action diff --git a/pybandits/utils.py b/pybandits/utils.py index 45a69a6..62e6af7 100644 --- a/pybandits/utils.py +++ b/pybandits/utils.py @@ -1,5 +1,5 @@ import json -from typing import Any, Dict, List, Union +from typing import Any, Callable, Dict, List, Union from pydantic import validate_call @@ -19,3 +19,26 @@ def to_serializable_dict(d: Dict[str, Any]) -> Dict[str, JSONSerializable]: """ return json.loads(json.dumps(d, default=dict)) + + +@validate_call +def extract_argument_names_from_function(function_handle: Callable, is_class_method: bool = False) -> List[str]: + """ + Extract the argument names from a function handle. + + Parameters + ---------- + function_handle : Callable + Handle of a function to extract the argument names from + + is_class_method : bool, defaults to False + Whether the function is a class method + + Returns + ------- + argument_names : List[str] + List of argument names + """ + start_index = int(is_class_method) + argument_names = function_handle.__code__.co_varnames[start_index : function_handle.__code__.co_argcount] + return argument_names diff --git a/pyproject.toml b/pyproject.toml index 565164e..38022e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pybandits" -version = "0.5.1" +version = "1.0.0" description = "Python Multi-Armed Bandit Library" authors = [ "Dario d'Andrea ", diff --git a/tests/test_cmab.py b/tests/test_cmab.py index 5fe15e4..96099c9 100644 --- a/tests/test_cmab.py +++ b/tests/test_cmab.py @@ -28,26 +28,9 @@ from pydantic import NonNegativeFloat, ValidationError from pybandits.base import Float01 -from pybandits.cmab import ( - CmabBernoulli, - CmabBernoulliBAI, - CmabBernoulliCC, - create_cmab_bernoulli_bai_cold_start, - create_cmab_bernoulli_cc_cold_start, - create_cmab_bernoulli_cold_start, -) -from pybandits.model import ( - BayesianLogisticRegression, - BayesianLogisticRegressionCC, - StudentT, - create_bayesian_logistic_regression_cc_cold_start, - create_bayesian_logistic_regression_cold_start, -) -from pybandits.strategy import ( - BestActionIdentification, - ClassicBandit, - CostControlBandit, -) +from pybandits.cmab import CmabBernoulli, CmabBernoulliBAI, CmabBernoulliCC +from pybandits.model import BayesianLogisticRegression, BayesianLogisticRegressionCC, StudentT +from pybandits.strategy import BestActionIdentification, ClassicBandit, CostControlBandit from pybandits.utils import to_serializable_dict from tests.test_utils import is_serializable @@ -63,13 +46,13 @@ def test_create_cmab_bernoulli_cold_start(a_int): # n_features must be > 0 if a_int <= 0: with pytest.raises(ValidationError): - create_cmab_bernoulli_cold_start(action_ids={"a1", "a2"}, n_features=a_int) + CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=a_int) else: - mab1 = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2"}, n_features=a_int) + mab1 = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=a_int) mab2 = CmabBernoulli( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=a_int), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=a_int), + "a1": BayesianLogisticRegression.cold_start(n_features=a_int), + "a2": BayesianLogisticRegression.cold_start(n_features=a_int), } ) mab2.predict_actions_randomly = True @@ -83,21 +66,13 @@ def test_cmab_can_instantiate(n_features): CmabBernoulli() with pytest.raises(AttributeError): CmabBernoulli(actions={}) - with pytest.raises(AttributeError): - CmabBernoulli(actions={"a1": create_bayesian_logistic_regression_cold_start(n_betas=2)}) - with pytest.raises(TypeError): # strategy is not an argument of init + with pytest.warns(UserWarning): + CmabBernoulli(actions={"a1": BayesianLogisticRegression.cold_start(n_features=2)}) + with pytest.raises(ValidationError): # predict_with_proba is not an argument of init CmabBernoulli( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - }, - strategy=ClassicBandit(), - ) - with pytest.raises(TypeError): # predict_with_proba is not an argument of init - CmabBernoulli( - actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a1": BayesianLogisticRegression.cold_start(n_features=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), }, predict_with_proba=True, ) @@ -108,15 +83,22 @@ def test_cmab_can_instantiate(n_features): "a2": None, }, ) + CmabBernoulli( + actions={ + "a1": BayesianLogisticRegression.cold_start(n_features=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), + }, + strategy=ClassicBandit(), + ) mab = CmabBernoulli( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a1": BayesianLogisticRegression.cold_start(n_features=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), } ) - assert mab.actions["a1"] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) - assert mab.actions["a2"] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) + assert mab.actions["a1"] == BayesianLogisticRegression.cold_start(n_features=n_features) + assert mab.actions["a2"] == BayesianLogisticRegression.cold_start(n_features=n_features) assert not mab.predict_actions_randomly assert not mab.predict_with_proba mab.predict_with_proba = True @@ -133,17 +115,17 @@ def test_cmab_init_with_wrong_blr_models(a, b): with pytest.raises(AttributeError): CmabBernoulli( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=a), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=a), - "a3": create_bayesian_logistic_regression_cold_start(n_betas=b), + "a1": BayesianLogisticRegression.cold_start(n_features=a), + "a2": BayesianLogisticRegression.cold_start(n_features=a), + "a3": BayesianLogisticRegression.cold_start(n_features=b), } ) else: CmabBernoulli( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=a), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=b), - "a3": create_bayesian_logistic_regression_cold_start(n_betas=b), + "a1": BayesianLogisticRegression.cold_start(n_features=a), + "a2": BayesianLogisticRegression.cold_start(n_features=b), + "a3": BayesianLogisticRegression.cold_start(n_features=b), } ) @@ -153,13 +135,13 @@ def test_cmab_update(n_samples=100, n_features=3): rewards = np.random.choice([0, 1], size=n_samples).tolist() def run_update(context): - mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2"}, n_features=n_features) + mab = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=n_features) assert all( - [mab.actions[a] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) for a in set(actions)] + [mab.actions[a] == BayesianLogisticRegression.cold_start(n_features=n_features) for a in set(actions)] ) mab.update(context=context, actions=actions, rewards=rewards) assert all( - [mab.actions[a] != create_bayesian_logistic_regression_cold_start(n_betas=n_features) for a in set(actions)] + [mab.actions[a] != BayesianLogisticRegression.cold_start(n_features=n_features) for a in set(actions)] ) assert not mab.predict_actions_randomly @@ -183,13 +165,13 @@ def test_cmab_update_not_all_actions(n_samples=100, n_feat=3): actions = np.random.choice(["a3", "a4"], size=n_samples).tolist() rewards = np.random.choice([0, 1], size=n_samples).tolist() context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_feat)) - mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2", "a3", "a4"}, n_features=n_feat) + mab = CmabBernoulli.cold_start(action_ids={"a1", "a2", "a3", "a4"}, n_features=n_feat) mab.update(context=context, actions=actions, rewards=rewards) - assert mab.actions["a1"] == create_bayesian_logistic_regression_cold_start(n_betas=n_feat) - assert mab.actions["a2"] == create_bayesian_logistic_regression_cold_start(n_betas=n_feat) - assert mab.actions["a3"] != create_bayesian_logistic_regression_cold_start(n_betas=n_feat) - assert mab.actions["a4"] != create_bayesian_logistic_regression_cold_start(n_betas=n_feat) + assert mab.actions["a1"] == BayesianLogisticRegression.cold_start(n_features=n_feat) + assert mab.actions["a2"] == BayesianLogisticRegression.cold_start(n_features=n_feat) + assert mab.actions["a3"] != BayesianLogisticRegression.cold_start(n_features=n_feat) + assert mab.actions["a4"] != BayesianLogisticRegression.cold_start(n_features=n_feat) @settings(deadline=500) @@ -198,7 +180,7 @@ def test_cmab_update_shape_mismatch(n_samples, n_features): actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() rewards = np.random.choice([0, 1], size=n_samples).tolist() context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2"}, n_features=n_features) + mab = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=n_features) with pytest.raises(AttributeError): # actions shape mismatch mab.update(context=context, actions=actions[1:], rewards=rewards) @@ -216,7 +198,7 @@ def test_cmab_update_shape_mismatch(n_samples, n_features): @given(st.integers(min_value=1, max_value=1000), st.integers(min_value=1, max_value=100)) def test_cmab_predict_cold_start(n_samples, n_features): def run_predict(context): - mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2"}, n_features=n_features) + mab = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=n_features) selected_actions, probs, weighted_sums = mab.predict(context=context) assert mab.predict_actions_randomly assert all([a in ["a1", "a2"] for a in selected_actions]) @@ -247,7 +229,7 @@ def run_predict(context): mab = CmabBernoulli( actions={ "a1": BayesianLogisticRegression(alpha=StudentT(mu=1, sigma=2), betas=n_features * [StudentT()]), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), }, ) assert not mab.predict_actions_randomly @@ -274,7 +256,7 @@ def run_predict(context): @given(st.integers(min_value=1, max_value=10)) def test_cmab_predict_shape_mismatch(a_int): context = np.random.uniform(low=-1.0, high=1.0, size=(100, a_int - 1)) - mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2"}, n_features=a_int) + mab = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=a_int) with pytest.raises(AttributeError): mab.predict(context=context) with pytest.raises(AttributeError): @@ -299,20 +281,20 @@ def run_predict(mab): assert set(mab.predict(n_samples=1000, forbidden_actions={"a5", "a4", "a2", "a3", "a1"})[0]) # cold start mab - mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2", "a3", "a4", "a5"}, n_features=n_features) + mab = CmabBernoulli.cold_start(action_ids={"a1", "a2", "a3", "a4", "a5"}, n_features=n_features) run_predict(mab=mab) # not cold start mab mab = CmabBernoulli( actions={ "a1": BayesianLogisticRegression(alpha=StudentT(mu=1, sigma=2), betas=[StudentT(), StudentT(), StudentT()]), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a3": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), + "a3": BayesianLogisticRegression.cold_start(n_features=n_features), "a4": BayesianLogisticRegression(alpha=StudentT(mu=4, sigma=5), betas=[StudentT(), StudentT(), StudentT()]), - "a5": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a5": BayesianLogisticRegression.cold_start(n_features=n_features), }, ) - assert mab != create_cmab_bernoulli_cold_start(action_ids={"a1", "a2", "a3", "a4", "a5"}, n_features=n_features) + assert mab != CmabBernoulli.cold_start(action_ids={"a1", "a2", "a3", "a4", "a5"}, n_features=n_features) run_predict(mab=mab) @@ -321,7 +303,7 @@ def run_predict(mab): def test_cmab_get_state(mu, sigma, n_features): actions: dict = { "a1": BayesianLogisticRegression(alpha=StudentT(mu=mu, sigma=sigma), betas=n_features * [StudentT()]), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), } cmab = CmabBernoulli(actions=actions) @@ -402,25 +384,25 @@ def test_create_cmab_bernoulli_bai_cold_start(a_int): # n_features must be > 0 if a_int <= 0: with pytest.raises(ValidationError): - create_cmab_bernoulli_bai_cold_start(action_ids={"a1", "a2"}, n_features=a_int) + CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=a_int) else: # default exploit_p - mab1 = create_cmab_bernoulli_bai_cold_start(action_ids={"a1", "a2"}, n_features=a_int) + mab1 = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=a_int) mab2 = CmabBernoulliBAI( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=a_int), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=a_int), + "a1": BayesianLogisticRegression.cold_start(n_features=a_int), + "a2": BayesianLogisticRegression.cold_start(n_features=a_int), } ) mab2.predict_actions_randomly = True assert mab1 == mab2 # set exploit_p - mab1 = create_cmab_bernoulli_bai_cold_start(action_ids={"a1", "a2"}, n_features=a_int, exploit_p=0.42) + mab1 = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=a_int, exploit_p=0.42) mab2 = CmabBernoulliBAI( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=a_int), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=a_int), + "a1": BayesianLogisticRegression.cold_start(n_features=a_int), + "a2": BayesianLogisticRegression.cold_start(n_features=a_int), }, exploit_p=0.42, ) @@ -435,21 +417,13 @@ def test_cmab_bai_can_instantiate(n_features): CmabBernoulliBAI() with pytest.raises(AttributeError): CmabBernoulliBAI(actions={}) - with pytest.raises(AttributeError): - CmabBernoulliBAI(actions={"a1": create_bayesian_logistic_regression_cold_start(n_betas=2)}) - with pytest.raises(TypeError): # strategy is not an argument of init - CmabBernoulliBAI( - actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - }, - strategy=BestActionIdentification(), - ) - with pytest.raises(TypeError): # predict_with_proba is not an argument of init + with pytest.warns(UserWarning): + CmabBernoulliBAI(actions={"a1": BayesianLogisticRegression.cold_start(n_features=2)}) + with pytest.raises(ValidationError): # predict_with_proba is not an argument of init CmabBernoulliBAI( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a1": BayesianLogisticRegression.cold_start(n_features=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), }, predict_with_proba=True, ) @@ -460,27 +434,34 @@ def test_cmab_bai_can_instantiate(n_features): "a2": None, }, ) + CmabBernoulliBAI( + actions={ + "a1": BayesianLogisticRegression.cold_start(n_features=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), + }, + strategy=BestActionIdentification(), + ) mab = CmabBernoulliBAI( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a1": BayesianLogisticRegression.cold_start(n_features=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), } ) - assert mab.actions["a1"] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) - assert mab.actions["a2"] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) + assert mab.actions["a1"] == BayesianLogisticRegression.cold_start(n_features=n_features) + assert mab.actions["a2"] == BayesianLogisticRegression.cold_start(n_features=n_features) assert not mab.predict_actions_randomly assert not mab.predict_with_proba assert mab.strategy == BestActionIdentification() mab = CmabBernoulliBAI( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a1": BayesianLogisticRegression.cold_start(n_features=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), }, exploit_p=0.42, ) - assert mab.actions["a1"] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) - assert mab.actions["a2"] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) + assert mab.actions["a1"] == BayesianLogisticRegression.cold_start(n_features=n_features) + assert mab.actions["a2"] == BayesianLogisticRegression.cold_start(n_features=n_features) assert not mab.predict_actions_randomly assert not mab.predict_with_proba assert mab.strategy == BestActionIdentification(exploit_p=0.42) @@ -492,7 +473,7 @@ def test_cmab_bai_predict(n_samples, n_features): context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) # cold start - mab = create_cmab_bernoulli_bai_cold_start(action_ids={"a1", "a2"}, n_features=n_features) + mab = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=n_features) selected_actions, probs, weighted_sums = mab.predict(context=context) assert mab.predict_actions_randomly assert all([a in ["a1", "a2"] for a in selected_actions]) @@ -503,8 +484,8 @@ def test_cmab_bai_predict(n_samples, n_features): # not cold start mab = CmabBernoulliBAI( actions={ - "a1": create_bayesian_logistic_regression_cold_start(n_betas=n_features), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a1": BayesianLogisticRegression.cold_start(n_features=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), }, exploit_p=0.42, ) @@ -517,15 +498,11 @@ def test_cmab_bai_update(n_samples=100, n_features=3): actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() rewards = np.random.choice([0, 1], size=n_samples).tolist() context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = create_cmab_bernoulli_bai_cold_start(action_ids={"a1", "a2"}, n_features=n_features) + mab = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=n_features) assert mab.predict_actions_randomly - assert all( - [mab.actions[a] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) for a in set(actions)] - ) + assert all([mab.actions[a] == BayesianLogisticRegression.cold_start(n_features=n_features) for a in set(actions)]) mab.update(context=context, actions=actions, rewards=rewards) - assert all( - [mab.actions[a] != create_bayesian_logistic_regression_cold_start(n_betas=n_features) for a in set(actions)] - ) + assert all([mab.actions[a] != BayesianLogisticRegression.cold_start(n_features=n_features) for a in set(actions)]) assert not mab.predict_actions_randomly @@ -539,7 +516,7 @@ def test_cmab_bai_update(n_samples=100, n_features=3): def test_cmab_bai_get_state(mu, sigma, n_features, exploit_p: Float01): actions: dict = { "a1": BayesianLogisticRegression(alpha=StudentT(mu=mu, sigma=sigma), betas=n_features * [StudentT()]), - "a2": create_bayesian_logistic_regression_cold_start(n_betas=n_features), + "a2": BayesianLogisticRegression.cold_start(n_features=n_features), } cmab = CmabBernoulliBAI(actions=actions, exploit_p=exploit_p) @@ -606,9 +583,7 @@ def test_cmab_bai_from_state(state): expected_actions = state["actions"] actual_actions = to_serializable_dict(cmab.actions) # Normalize the dict assert expected_actions == actual_actions - expected_exploit_p = ( - state["strategy"].get("exploit_p", 0.5) if state["strategy"].get("exploit_p") is not None else 0.5 - ) # Covers both not existing and existing + None + expected_exploit_p = cmab.strategy.get_expected_value_from_state(state, "exploit_p") actual_exploit_p = cmab.strategy.exploit_p assert expected_exploit_p == actual_exploit_p @@ -630,27 +605,25 @@ def test_create_cmab_bernoulli_cc_cold_start(a_int): # n_features must be > 0 if a_int <= 0: with pytest.raises(ValidationError): - create_cmab_bernoulli_cc_cold_start(action_ids_cost=action_ids_cost, n_features=a_int) + CmabBernoulliCC.cold_start(action_ids_cost=action_ids_cost, n_features=a_int) else: # default subsidy_factor - mab1 = create_cmab_bernoulli_cc_cold_start(action_ids_cost=action_ids_cost, n_features=a_int) + mab1 = CmabBernoulliCC.cold_start(action_ids_cost=action_ids_cost, n_features=a_int) mab2 = CmabBernoulliCC( actions={ - "a1": create_bayesian_logistic_regression_cc_cold_start(n_betas=a_int, cost=action_ids_cost["a1"]), - "a2": create_bayesian_logistic_regression_cc_cold_start(n_betas=a_int, cost=action_ids_cost["a2"]), + "a1": BayesianLogisticRegressionCC.cold_start(n_features=a_int, cost=action_ids_cost["a1"]), + "a2": BayesianLogisticRegressionCC.cold_start(n_features=a_int, cost=action_ids_cost["a2"]), } ) mab2.predict_actions_randomly = True assert mab1 == mab2 # set subsidy_factor - mab1 = create_cmab_bernoulli_cc_cold_start( - action_ids_cost=action_ids_cost, n_features=a_int, subsidy_factor=0.42 - ) + mab1 = CmabBernoulliCC.cold_start(action_ids_cost=action_ids_cost, n_features=a_int, subsidy_factor=0.42) mab2 = CmabBernoulliCC( actions={ - "a1": create_bayesian_logistic_regression_cc_cold_start(n_betas=a_int, cost=action_ids_cost["a1"]), - "a2": create_bayesian_logistic_regression_cc_cold_start(n_betas=a_int, cost=action_ids_cost["a2"]), + "a1": BayesianLogisticRegressionCC.cold_start(n_features=a_int, cost=action_ids_cost["a1"]), + "a2": BayesianLogisticRegressionCC.cold_start(n_features=a_int, cost=action_ids_cost["a2"]), }, subsidy_factor=0.42, ) @@ -665,21 +638,13 @@ def test_cmab_cc_can_instantiate(n_features): CmabBernoulliCC() with pytest.raises(AttributeError): CmabBernoulliCC(actions={}) - with pytest.raises(AttributeError): - CmabBernoulliCC(actions={"a1": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10)}) - with pytest.raises(TypeError): # strategy is not an argument of init + with pytest.warns(UserWarning): + CmabBernoulliCC(actions={"a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10)}) + with pytest.raises(ValidationError): # predict_with_proba is not an argument of init CmabBernoulliCC( actions={ - create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), - create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), - }, - strategy=CostControlBandit(), - ) - with pytest.raises(TypeError): # predict_with_proba is not an argument of init - CmabBernoulliCC( - actions={ - "a1": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), - "a2": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), + "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), + "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), }, predict_with_proba=True, ) @@ -690,27 +655,34 @@ def test_cmab_cc_can_instantiate(n_features): "a2": None, }, ) + CmabBernoulliCC( + actions={ + "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), + "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), + }, + strategy=CostControlBandit(), + ) mab = CmabBernoulliCC( actions={ - "a1": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), - "a2": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), + "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), + "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), } ) - assert mab.actions["a1"] == create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10) - assert mab.actions["a2"] == create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10) + assert mab.actions["a1"] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) + assert mab.actions["a2"] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) assert not mab.predict_actions_randomly assert mab.predict_with_proba assert mab.strategy == CostControlBandit() mab = CmabBernoulliCC( actions={ - "a1": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), - "a2": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), + "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), + "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), }, subsidy_factor=0.42, ) - assert mab.actions["a1"] == create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10) - assert mab.actions["a2"] == create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10) + assert mab.actions["a1"] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) + assert mab.actions["a2"] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) assert not mab.predict_actions_randomly assert mab.predict_with_proba assert mab.strategy == CostControlBandit(subsidy_factor=0.42) @@ -722,7 +694,7 @@ def test_cmab_cc_predict(n_samples, n_features): context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) # cold start - mab = create_cmab_bernoulli_cc_cold_start(action_ids_cost={"a1": 10, "a2": 20.5}, n_features=n_features) + mab = CmabBernoulliCC.cold_start(action_ids_cost={"a1": 10, "a2": 20.5}, n_features=n_features) selected_actions, probs, weighted_sums = mab.predict(context=context) assert mab.predict_actions_randomly assert all([a in ["a1", "a2"] for a in selected_actions]) @@ -733,8 +705,8 @@ def test_cmab_cc_predict(n_samples, n_features): # not cold start mab = CmabBernoulliCC( actions={ - "a1": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10), - "a2": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=20.5), + "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), + "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=20.5), }, subsidy_factor=0.42, ) @@ -747,18 +719,18 @@ def test_cmab_cc_update(n_samples=100, n_features=3): actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() rewards = np.random.choice([0, 1], size=n_samples).tolist() context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = create_cmab_bernoulli_cc_cold_start(action_ids_cost={"a1": 10, "a2": 10}, n_features=n_features) + mab = CmabBernoulliCC.cold_start(action_ids_cost={"a1": 10, "a2": 10}, n_features=n_features) assert mab.predict_actions_randomly assert all( [ - mab.actions[a] == create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10) + mab.actions[a] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) for a in set(actions) ] ) mab.update(context=context, actions=actions, rewards=rewards) assert all( [ - mab.actions[a] != create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10) + mab.actions[a] != BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) for a in set(actions) ] ) @@ -781,7 +753,7 @@ def test_cmab_cc_get_state( "a1": BayesianLogisticRegressionCC( alpha=StudentT(mu=mu, sigma=sigma), betas=n_features * [StudentT()], cost=cost_1 ), - "a2": create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=cost_2), + "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=cost_2), } cmab = CmabBernoulliCC(actions=actions, subsidy_factor=subsidy_factor) @@ -849,9 +821,7 @@ def test_cmab_cc_from_state(state): expected_actions = state["actions"] actual_actions = to_serializable_dict(cmab.actions) # Normalize the dict assert expected_actions == actual_actions - expected_subsidy_factor = ( - state["strategy"].get("subsidy_factor", 0.5) if state["strategy"].get("subsidy_factor") is not None else 0.5 - ) # Covers both not existing and existing + None + expected_subsidy_factor = cmab.strategy.get_expected_value_from_state(state, "subsidy_factor") actual_subsidy_factor = cmab.strategy.subsidy_factor assert expected_subsidy_factor == actual_subsidy_factor @@ -871,9 +841,7 @@ def test_cmab_cc_from_state(state): def test_epsilon_greedy_cmab_predict_cold_start(n_samples, n_features): context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = create_cmab_bernoulli_cold_start( - action_ids={"a1", "a2"}, n_features=n_features, epsilon=0.1, default_action="a1" - ) + mab = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=n_features, epsilon=0.1, default_action="a1") selected_actions, probs, weighted_sums = mab.predict(context=context) assert mab.predict_actions_randomly assert all([a in ["a1", "a2"] for a in selected_actions]) @@ -887,9 +855,7 @@ def test_epsilon_greedy_cmab_predict_cold_start(n_samples, n_features): def test_epsilon_greedy_cmab_bai_predict(n_samples, n_features): context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = create_cmab_bernoulli_bai_cold_start( - action_ids={"a1", "a2"}, n_features=n_features, epsilon=0.1, default_action="a1" - ) + mab = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=n_features, epsilon=0.1, default_action="a1") selected_actions, probs, weighted_sums = mab.predict(context=context) assert mab.predict_actions_randomly assert all([a in ["a1", "a2"] for a in selected_actions]) @@ -904,7 +870,7 @@ def test_epsilon_greedy_cmab_cc_predict(n_samples, n_features): context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) # cold start - mab = create_cmab_bernoulli_cc_cold_start( + mab = CmabBernoulliCC.cold_start( action_ids_cost={"a1": 10, "a2": 20.5}, n_features=n_features, epsilon=0.1, default_action="a1" ) selected_actions, probs, weighted_sums = mab.predict(context=context) diff --git a/tests/test_base.py b/tests/test_mab.py similarity index 54% rename from tests/test_base.py rename to tests/test_mab.py index dbec460..a1ea652 100644 --- a/tests/test_base.py +++ b/tests/test_mab.py @@ -20,16 +20,18 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from typing import Dict, List, Optional, Set +from typing import Dict, List, Optional, Set, Union import hypothesis.strategies as st import numpy as np import pytest from hypothesis import given -from pydantic import NonNegativeInt, ValidationError +from pydantic import ValidationError from pytest_mock import MockerFixture -from pybandits.base import ActionId, BaseMab, Float01, Probability +from pybandits.base import ActionId, BinaryReward, Float01, Probability +from pybandits.consts import ACTION_IDS_PREFIX +from pybandits.mab import BaseMab from pybandits.model import Beta from pybandits.strategy import ClassicBandit @@ -38,9 +40,8 @@ class DummyMab(BaseMab): epsilon: Optional[Float01] = None default_action: Optional[ActionId] = None - def update(self, actions: List[ActionId], rewards: List[NonNegativeInt]): - super().update(actions=actions, rewards=rewards) - pass + def update(self, actions: List[ActionId], rewards: Union[List[BinaryReward], List[List[BinaryReward]]]): + self._validate_update_params(actions=actions, rewards=rewards) def predict( self, @@ -56,17 +57,17 @@ def get_state(self) -> (str, dict): def test_base_mab_raise_on_less_than_2_actions(): - with pytest.raises(ValidationError): + with pytest.raises(TypeError): DummyMab(actions={"a1": Beta(), "a2": Beta()}) with pytest.raises(ValidationError): DummyMab(actions={"": Beta(), "a2": Beta()}, strategy=ClassicBandit()) with pytest.raises(AttributeError): DummyMab(actions={}, strategy=ClassicBandit()) - with pytest.raises(AttributeError): + with pytest.raises(ValidationError): DummyMab(actions={"a1": None}, strategy=ClassicBandit()) with pytest.raises(ValidationError): DummyMab(actions={"a1": None, "a2": None}, strategy=ClassicBandit()) - with pytest.raises(AttributeError): + with pytest.warns(UserWarning): DummyMab(actions={"a1": Beta()}, strategy=ClassicBandit()) @@ -74,12 +75,12 @@ def test_base_mab_check_update_params(): dummy_mab = DummyMab(actions={"a1": Beta(), "a2": Beta()}, strategy=ClassicBandit()) with pytest.raises(AttributeError): # actionId doesn't exist - dummy_mab._check_update_params(actions=["a1", "a3"], rewards=[1, 1]) + dummy_mab._validate_update_params(actions=["a1", "a3"], rewards=[1, 1]) with pytest.raises(AttributeError): # actionId cannot be empty - dummy_mab._check_update_params(actions=[""], rewards=[1]) + dummy_mab._validate_update_params(actions=[""], rewards=[1]) with pytest.raises(AttributeError): - dummy_mab._check_update_params(actions=["a1", "a2"], rewards=[1]) + dummy_mab._validate_update_params(actions=["a1", "a2"], rewards=[1]) @given(r1=st.integers(min_value=0, max_value=1), r2=st.integers(min_value=0, max_value=1)) @@ -92,6 +93,108 @@ def test_base_mab_update_ok(r1, r2): ######################################################################################################################## +# BaseMab._extract_action_specific_kwargs functionality tests + + +def test_returns_empty_dict_when_no_action_specific_kwargs(): + kwargs = {"param1": 1, "param2": 2} + result, _ = BaseMab._extract_action_specific_kwargs(**kwargs) + assert result == {} + + +def test_processes_kwargs_with_non_dict_values(): + kwargs = { + f"{ACTION_IDS_PREFIX}param1": "not_a_dict", + } + result, _ = BaseMab._extract_action_specific_kwargs(**kwargs) + assert result == {} + + +def test_manages_kwargs_with_empty_dicts(): + kwargs = {f"{ACTION_IDS_PREFIX}param1": {}, f"{ACTION_IDS_PREFIX}param2": {}} + result, _ = BaseMab._extract_action_specific_kwargs(**kwargs) + assert result == {} + + +def test_extracts_action_specific_kwargs_with_valid_keys(): + kwargs = { + f"{ACTION_IDS_PREFIX}param1": {"action1": 1, "action2": 2}, + f"{ACTION_IDS_PREFIX}param2": {"action1": 3, "action2": 4}, + } + expected_output = {"action1": {"param1": 1, "param2": 3}, "action2": {"param1": 2, "param2": 4}} + result, _ = BaseMab._extract_action_specific_kwargs(**kwargs) + assert result == expected_output + + +######################################################################################################################## + + +# BaseMab._extract_action_model_class_and_attributes functionality tests + + +def test_extracts_action_model_class_and_attributes_with_valid_kwargs(mocker: MockerFixture): + class MockActionModel: + def __init__(self, param1, param2): + pass + + mocker.patch("pybandits.mab.get_args", return_value=(None, MockActionModel)) + mocker.patch("pybandits.mab.extract_argument_names_from_function", return_value=["param1", "param2"]) + + kwargs = {"param1": 1, "param2": 2} + action_model_cold_start, action_general_kwargs = BaseMab._extract_action_model_class_and_attributes(**kwargs) + + assert action_model_cold_start == MockActionModel + assert action_general_kwargs == {"param1": 1, "param2": 2} + + +def test_returns_callable_for_action_model_cold_start_instantiation(mocker: MockerFixture): + class MockActionModel: + @classmethod + def cold_start(cls): + pass + + mocker.patch("pybandits.mab.get_args", return_value=(None, MockActionModel)) + mocker.patch("pybandits.mab.extract_argument_names_from_function", return_value=[]) + + kwargs = {} + action_model_cold_start, _ = BaseMab._extract_action_model_class_and_attributes(**kwargs) + + assert callable(action_model_cold_start) + + +def test_handles_empty_kwargs_gracefully(mocker: MockerFixture): + class MockActionModel: + def __init__(self): + pass + + mocker.patch("pybandits.mab.get_args", return_value=(None, MockActionModel)) + mocker.patch("pybandits.mab.extract_argument_names_from_function", return_value=[]) + + kwargs = {} + action_model_cold_start, action_general_kwargs = BaseMab._extract_action_model_class_and_attributes(**kwargs) + + assert action_model_cold_start == MockActionModel + assert action_general_kwargs == {} + + +def test_handles_kwargs_with_no_matching_action_model_attributes(mocker: MockerFixture): + class MockActionModel: + def __init__(self): + pass + + mocker.patch("pybandits.mab.get_args", return_value=(None, MockActionModel)) + mocker.patch("pybandits.mab.extract_argument_names_from_function", return_value=[]) + + kwargs = {"irrelevant_param": 1} + action_model_cold_start, action_general_kwargs = BaseMab._extract_action_model_class_and_attributes(**kwargs) + + assert action_model_cold_start == MockActionModel + assert action_general_kwargs == {} + + +######################################################################################################################## + + # Epsilon-greedy functionality tests diff --git a/tests/test_model.py b/tests/test_model.py index ed8827a..2041cf2 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -28,7 +28,6 @@ from pydantic import ValidationError from pybandits.model import ( - BaseBetaMO, BayesianLogisticRegression, BayesianLogisticRegressionCC, Beta, @@ -36,8 +35,6 @@ BetaMO, BetaMOCC, StudentT, - create_bayesian_logistic_regression_cc_cold_start, - create_bayesian_logistic_regression_cold_start, ) ######################################################################################################################## @@ -110,26 +107,26 @@ def test_can_init_betaCC(a_float): ######################################################################################################################## -# BaseBetaMO +# BetaMO def test_can_init_base_beta_mo(): # init with default params - b = BaseBetaMO(counters=[Beta(), Beta()]) + b = BetaMO(counters=[Beta(), Beta()]) assert b.counters[0].n_successes == 1 and b.counters[0].n_failures == 1 assert b.counters[1].n_successes == 1 and b.counters[1].n_failures == 1 # init with empty dict - b = BaseBetaMO(counters=[{}, {}]) + b = BetaMO(counters=[{}, {}]) assert b.counters[0] == Beta() # invalid init with BetaCC instead of Beta with pytest.raises(ValidationError): - BaseBetaMO(counters=[BetaCC(cost=1), BetaCC(cost=1)]) + BetaMO(counters=[BetaCC(cost=1), BetaCC(cost=1)]) def test_calculate_proba_beta_mo(): - b = BaseBetaMO(counters=[Beta(), Beta()]) + b = BetaMO(counters=[Beta(), Beta()]) b.sample_proba() @@ -142,11 +139,11 @@ def test_beta_update_mo(rewards1, rewards2): rewards1, rewards2 = rewards1[:min_len], rewards2[:min_len] rewards = [[a, b] for a, b in zip(rewards1, rewards2)] - b = BaseBetaMO(counters=[Beta(n_successes=11, n_failures=22), Beta(n_successes=33, n_failures=44)]) + b = BetaMO(counters=[Beta(n_successes=11, n_failures=22), Beta(n_successes=33, n_failures=44)]) b.update(rewards=rewards) - assert b == BaseBetaMO( + assert b == BetaMO( counters=[ Beta(n_successes=11 + sum(rewards1), n_failures=22 + len(rewards1) - sum(rewards1)), Beta(n_successes=33 + sum(rewards2), n_failures=44 + len(rewards2) - sum(rewards2)), @@ -247,15 +244,15 @@ def test_create_default_instance_bayesian_logistic_regression(a_int): # at least one beta must be specified if a_int <= 0: with pytest.raises(ValidationError): - create_bayesian_logistic_regression_cold_start(n_betas=a_int) + BayesianLogisticRegression.cold_start(n_features=a_int) else: - blr = create_bayesian_logistic_regression_cold_start(n_betas=a_int) + blr = BayesianLogisticRegression.cold_start(n_features=a_int) assert blr == BayesianLogisticRegression(alpha=StudentT(), betas=[StudentT() for _ in range(a_int)]) @given(st.integers(min_value=1, max_value=1000), st.integers(min_value=1, max_value=100)) def test_check_context_matrix(n_samples, n_features): - blr = create_bayesian_logistic_regression_cold_start(n_betas=n_features) + blr = BayesianLogisticRegression.cold_start(n_features=n_features) # context is numpy array context = np.random.uniform(low=-100.0, high=100.0, size=(n_samples, n_features)) @@ -276,7 +273,7 @@ def test_check_context_matrix(n_samples, n_features): with pytest.raises(AttributeError): blr.check_context_matrix(context=context.loc[:, 1:]) - blr = create_bayesian_logistic_regression_cold_start(n_betas=2) + blr = BayesianLogisticRegression.cold_start(n_features=2) with pytest.raises(AttributeError): blr.check_context_matrix(context=[[1], [2, 3]]) # context has shape mismatch @@ -294,10 +291,10 @@ def sample_proba(context): prob, weighted_sum = blr.sample_proba(context=context) assert type(prob) is type(weighted_sum) is np.ndarray # type of the returns must be np.ndarray - assert len(prob) == len(weighted_sum) == n_samples # return 1 sampled proba and ws per each sample + assert len(prob) == len(weighted_sum) == n_samples # return 1 sampled probability and ws per each sample assert all([0 <= p <= 1 for p in prob]) # probs must be in the interval [0, 1] - blr = create_bayesian_logistic_regression_cold_start(n_betas=n_features) + blr = BayesianLogisticRegression.cold_start(n_features=n_features) # context is numpy array context = np.random.uniform(low=-100.0, high=100.0, size=(n_samples, n_features)) @@ -317,7 +314,7 @@ def sample_proba(context): def test_blr_update(n_samples=100, n_features=3): def update(context, rewards): - blr = create_bayesian_logistic_regression_cold_start(n_betas=n_features) + blr = BayesianLogisticRegression.cold_start(n_features=n_features) assert blr.alpha == StudentT(mu=0.0, sigma=10.0, nu=5.0) assert blr.betas == [ StudentT(mu=0.0, sigma=10.0, nu=5.0), @@ -353,7 +350,7 @@ def update(context, rewards): # raise an error if len(context) != len(rewards) with pytest.raises(ValueError): - blr = create_bayesian_logistic_regression_cold_start(n_betas=n_features) + blr = BayesianLogisticRegression.cold_start(n_features=n_features) blr.update(context=context, rewards=rewards[1:]) @@ -379,9 +376,9 @@ def test_create_default_instance_bayesian_logistic_regression_cc(n_betas, cost): # at least one beta must be specified if n_betas <= 0 or cost < 0: with pytest.raises(ValidationError): - create_bayesian_logistic_regression_cc_cold_start(n_betas=n_betas, cost=cost) + BayesianLogisticRegressionCC.cold_start(n_features=n_betas, cost=cost) else: - blr = create_bayesian_logistic_regression_cc_cold_start(n_betas=n_betas, cost=cost) + blr = BayesianLogisticRegressionCC.cold_start(n_features=n_betas, cost=cost) assert blr == BayesianLogisticRegressionCC( alpha=StudentT(), betas=[StudentT() for _ in range(n_betas)], cost=cost ) diff --git a/tests/test_smab.py b/tests/test_smab.py index 369d016..2f4c949 100644 --- a/tests/test_smab.py +++ b/tests/test_smab.py @@ -30,27 +30,17 @@ from pybandits.base import BinaryReward, Float01 from pybandits.model import Beta, BetaCC, BetaMO, BetaMOCC -from pybandits.smab import ( - SmabBernoulli, - SmabBernoulliBAI, - SmabBernoulliCC, - SmabBernoulliMO, - SmabBernoulliMOCC, - create_smab_bernoulli_bai_cold_start, - create_smab_bernoulli_cc_cold_start, - create_smab_bernoulli_cold_start, - create_smab_bernoulli_mo_cc_cold_start, - create_smab_bernoulli_mo_cold_start, -) -from pybandits.strategy import ( - ClassicBandit, - CostControlBandit, - MultiObjectiveBandit, - MultiObjectiveCostControlBandit, -) +from pybandits.smab import SmabBernoulli, SmabBernoulliBAI, SmabBernoulliCC, SmabBernoulliMO, SmabBernoulliMOCC +from pybandits.strategy import ClassicBandit, CostControlBandit, MultiObjectiveBandit, MultiObjectiveCostControlBandit from pybandits.utils import to_serializable_dict from tests.test_utils import is_serializable + +@pytest.fixture(scope="session") +def n_samples() -> int: + return 1000 + + ######################################################################################################################## @@ -58,7 +48,7 @@ def test_create_smab_bernoulli_cold_start(): - assert create_smab_bernoulli_cold_start(action_ids={"a1", "a2"}) == SmabBernoulli( + assert SmabBernoulli.cold_start(action_ids={"a1", "a2"}) == SmabBernoulli( actions={"a1": Beta(), "a2": Beta()}, ) @@ -75,16 +65,8 @@ def test_can_instantiate_smab(): SmabBernoulli() with pytest.raises(AttributeError): SmabBernoulli(actions={}) - with pytest.raises(AttributeError): + with pytest.warns(UserWarning): SmabBernoulli(actions={"action1": Beta()}) - with pytest.raises(TypeError): # strategy is not an argument of init - SmabBernoulli( - actions={ - "action1": Beta(), - "action2": Beta(), - }, - strategy=ClassicBandit(), - ) with pytest.raises(ValidationError): SmabBernoulli( actions={ @@ -92,11 +74,18 @@ def test_can_instantiate_smab(): "action2": None, }, ) - smab = SmabBernoulli( + SmabBernoulli( actions={ "action1": Beta(), "action2": Beta(), }, + strategy=ClassicBandit(), + ) + smab = SmabBernoulli( + actions={ + "action1": Beta(), + "action2": Beta(), + } ) assert smab.actions["action1"] == Beta() @@ -131,8 +120,7 @@ def test_smab_predict_raise_when_all_actions_forbidden(): s.predict(n_samples=10, forbidden_actions=["a1", "a2"]) -def test_smab_predict(): - n_samples = 1000 +def test_smab_predict(n_samples: int): s = SmabBernoulli( actions={ "a0": Beta(), @@ -244,7 +232,7 @@ def test_smab_from_state(state): assert isinstance(smab, SmabBernoulli) expected_actions = state["actions"] - actual_actions = json.loads(json.dumps(smab.actions, default=dict)) # Normalize the dict + actual_actions = to_serializable_dict(smab.actions) # Normalize the dict assert expected_actions == actual_actions # Ensure get_state and from_state compatibility @@ -260,11 +248,11 @@ def test_smab_from_state(state): def test_create_smab_bernoulli_bai(): # default exploit_p - assert create_smab_bernoulli_bai_cold_start(action_ids={"a1", "a2"}) == SmabBernoulliBAI( + assert SmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}) == SmabBernoulliBAI( actions={"a1": Beta(), "a2": Beta()}, ) # set exploit_p - assert create_smab_bernoulli_bai_cold_start(action_ids={"a1", "a2"}, exploit_p=0.2) == SmabBernoulliBAI( + assert SmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, exploit_p=0.2) == SmabBernoulliBAI( actions={"a1": Beta(), "a2": Beta()}, exploit_p=0.2, ) @@ -296,8 +284,7 @@ def test_can_init_smabbai(): assert s.strategy.exploit_p == 0.3 -def test_smabbai_predict(): - n_samples = 1000 +def test_smabbai_predict(n_samples: int): s = SmabBernoulliBAI(actions={"a1": Beta(), "a2": Beta()}) _, _ = s.predict(n_samples=n_samples) @@ -370,11 +357,9 @@ def test_smab_bai_from_state(state): assert isinstance(smab, SmabBernoulliBAI) expected_actions = state["actions"] - actual_actions = json.loads(json.dumps(smab.actions, default=dict)) # Normalize the dict + actual_actions = to_serializable_dict(smab.actions) # Normalize the dict assert expected_actions == actual_actions - expected_exploit_p = ( - state["strategy"].get("exploit_p", 0.5) if state["strategy"].get("exploit_p") is not None else 0.5 - ) # Covers both not existing and existing + None + expected_exploit_p = smab.strategy.get_expected_value_from_state(state, "exploit_p") actual_exploit_p = smab.strategy.exploit_p assert expected_exploit_p == actual_exploit_p @@ -390,7 +375,7 @@ def test_smab_bai_from_state(state): def test_create_smab_bernoulli_cc(): - assert create_smab_bernoulli_cc_cold_start( + assert SmabBernoulliCC.cold_start( action_ids_cost={"a1": 10, "a2": 20}, subsidy_factor=0.2, ) == SmabBernoulliCC( @@ -398,7 +383,7 @@ def test_create_smab_bernoulli_cc(): subsidy_factor=0.2, ) - assert create_smab_bernoulli_cc_cold_start(action_ids_cost={"a1": 10, "a2": 20}) == SmabBernoulliCC( + assert SmabBernoulliCC.cold_start(action_ids_cost={"a1": 10, "a2": 20}) == SmabBernoulliCC( actions={"a1": BetaCC(cost=10), "a2": BetaCC(cost=20)}, ) @@ -429,8 +414,7 @@ def test_can_init_smabcc(): assert s.strategy.subsidy_factor == 0.7 -def test_smabcc_predict(): - n_samples = 1000 +def test_smabcc_predict(n_samples: int): s = SmabBernoulliCC( actions={ "a1": BetaCC(n_successes=1, n_failures=2, cost=10), @@ -508,9 +492,7 @@ def test_smab_cc_from_state(state): expected_actions = state["actions"] actual_actions = json.loads(json.dumps(smab.actions, default=dict)) # Normalize the dict assert expected_actions == actual_actions - expected_subsidy_factor = ( - state["strategy"].get("subsidy_factor", 0.5) if state["strategy"].get("subsidy_factor") is not None else 0.5 - ) # Covers both not existing and existing + None + expected_subsidy_factor = smab.strategy.get_expected_value_from_state(state, "subsidy_factor") actual_subsidy_factor = smab.strategy.subsidy_factor assert expected_subsidy_factor == actual_subsidy_factor @@ -568,17 +550,15 @@ def test_all_actions_must_have_same_number_of_objectives_smab_mo(): with pytest.raises(ValueError): SmabBernoulliMO( actions={ - "action 1": BetaMO(counters=[Beta(), Beta()]), - "action 2": BetaMO(counters=[Beta(), Beta()]), - "action 3": BetaMO(counters=[Beta(), Beta(), Beta()]), + "a1": BetaMO(counters=[Beta(), Beta()]), + "a2": BetaMO(counters=[Beta(), Beta()]), + "a3": BetaMO(counters=[Beta(), Beta(), Beta()]), }, ) -def test_smab_mo_predict(): - n_samples = 1000 - - s = create_smab_bernoulli_mo_cold_start(action_ids={"a1", "a2"}, n_objectives=3) +def test_smab_mo_predict(n_samples: int, n_objectives=3): + s = SmabBernoulliMO.cold_start(action_ids={"a1", "a2"}, n_objectives=n_objectives) forbidden = None s.predict(n_samples=n_samples, forbidden_actions=forbidden) @@ -601,9 +581,13 @@ def test_smab_mo_predict(): s.predict(n_samples=n_samples, forbidden_actions=forbidden) -def test_smab_mo_update(): - mab = create_smab_bernoulli_mo_cold_start(action_ids={"a1", "a2"}, n_objectives=3) - mab.update(actions=["a1", "a1"], rewards=[[1, 0, 1], [1, 1, 0]]) +def test_smab_mo_update(n_objectives=3): + action_ids = {"a1", "a2"} + mab = SmabBernoulliMO.cold_start(action_ids=action_ids, n_objectives=n_objectives) + assert all([mab.actions[a] == BetaMO.cold_start(n_objectives=n_objectives) for a in action_ids]) + + mab.update(actions=["a1", "a2"], rewards=[[1, 0, 1], [1, 1, 0]]) + assert all([mab.actions[a] != BetaMO.cold_start(n_objectives=n_objectives) for a in set(action_ids)]) @given(st.lists(st.integers(min_value=1), min_size=6, max_size=6)) @@ -741,10 +725,10 @@ def test_all_actions_must_have_same_number_of_objectives_smab_mo_cc(): ) -def test_smab_mo_cc_predict(): +def test_smab_mo_cc_predict(n_samples: int): n_samples = 1000 - s = create_smab_bernoulli_mo_cc_cold_start(action_ids_cost={"a1": 1, "a2": 2}, n_objectives=2) + s = SmabBernoulliMOCC.cold_start(action_ids_cost={"a1": 1, "a2": 2}, n_objectives=2) forbidden = None s.predict(n_samples=n_samples, forbidden_actions=forbidden) @@ -767,8 +751,27 @@ def test_smab_mo_cc_predict(): s.predict(n_samples=n_samples, forbidden_actions=forbidden) +def test_smab_mo_cc_update(n_objectives=3): + action_ids_cost = {"a1": 1, "a2": 2} + mab = SmabBernoulliMOCC.cold_start(action_ids_cost=action_ids_cost, n_objectives=n_objectives) + assert all( + [ + mab.actions[a] == BetaMOCC.cold_start(n_objectives=n_objectives, cost=action_ids_cost[a]) + for a in action_ids_cost.keys() + ] + ) + + mab.update(actions=["a1", "a2"], rewards=[[1, 0, 1], [1, 1, 0]]) + assert all( + [ + mab.actions[a] != BetaMOCC.cold_start(n_objectives=n_objectives, cost=action_ids_cost[a]) + for a in action_ids_cost.keys() + ] + ) + + @given(st.lists(st.integers(min_value=1), min_size=8, max_size=8)) -def test_smab_mocc_get_state(a_list): +def test_smab_mo_cc_get_state(a_list): a, b, c, d, e, f, g, h = a_list actions = { @@ -837,7 +840,7 @@ def test_smab_mo_cc_from_state(state): assert isinstance(smab, SmabBernoulliMOCC) expected_actions = state["actions"] - actual_actions = json.loads(json.dumps(smab.actions, default=dict)) # Normalize the dict + actual_actions = to_serializable_dict(smab.actions) # Normalize the dict assert expected_actions == actual_actions # Ensure get_state and from_state compatibility @@ -868,7 +871,7 @@ def test_can_instantiate_epsilon_greddy_smab_with_params(a, b): assert s.actions["action1"] == s.actions["action2"] -def test_epsilon_greedy_smab_predict(): +def test_epsilon_greedy_smab_predict(n_samples: int): n_samples = 1000 s = SmabBernoulli( @@ -888,13 +891,13 @@ def test_epsilon_greedy_smab_predict(): _, _ = s.predict(n_samples=n_samples, forbidden_actions=forbidden_actions) -def test_epsilon_greddy_smabbai_predict(): +def test_epsilon_greddy_smabbai_predict(n_samples: int): n_samples = 1000 s = SmabBernoulliBAI(actions={"a1": Beta(), "a2": Beta()}, epsilon=0.1, default_action="a1") _, _ = s.predict(n_samples=n_samples) -def test_epsilon_greddy_smabcc_predict(): +def test_epsilon_greddy_smabcc_predict(n_samples: int): n_samples = 1000 s = SmabBernoulliCC( actions={ @@ -908,19 +911,19 @@ def test_epsilon_greddy_smabcc_predict(): _, _ = s.predict(n_samples=n_samples) -def test_epsilon_greddy_smab_mo_predict(): +def test_epsilon_greddy_smab_mo_predict(n_samples: int): n_samples = 1000 - s = create_smab_bernoulli_mo_cold_start(action_ids={"a1", "a2"}, n_objectives=3, epsilon=0.1, default_action="a1") + s = SmabBernoulliMO.cold_start(action_ids={"a1", "a2"}, n_objectives=3, epsilon=0.1, default_action="a1") forbidden = None s.predict(n_samples=n_samples, forbidden_actions=forbidden) -def test_epsilon_greddy_smab_mo_cc_predict(): +def test_epsilon_greddy_smab_mo_cc_predict(n_samples: int): n_samples = 1000 - s = create_smab_bernoulli_mo_cc_cold_start( + s = SmabBernoulliMOCC.cold_start( action_ids_cost={"a1": 1, "a2": 2}, n_objectives=2, epsilon=0.1, default_action="a1" ) diff --git a/tests/test_strategy.py b/tests/test_strategy.py index 3df84e6..5dc1ddb 100644 --- a/tests/test_strategy.py +++ b/tests/test_strategy.py @@ -36,7 +36,7 @@ CostControlBandit, MultiObjectiveBandit, MultiObjectiveCostControlBandit, - get_pareto_front, + MultiObjectiveStrategy, ) ######################################################################################################################## @@ -81,17 +81,18 @@ def test_can_init_best_action_identification(a_float): @given(st.floats()) -def test_set_exploit_p(a_float): +def test_with_exploit_p(a_float): b = BestActionIdentification() # set with invalid float if a_float < 0 or a_float > 1 or np.isnan(a_float) or np.isinf(a_float): with pytest.raises(ValidationError): - b.set_exploit_p(exploit_p=a_float) + b.with_exploit_p(exploit_p=a_float) # set with valid float else: - b.set_exploit_p(exploit_p=a_float) - assert b.exploit_p == a_float + mutated_b = b.with_exploit_p(exploit_p=a_float) + assert mutated_b.exploit_p == a_float + assert mutated_b is not b @given( @@ -117,9 +118,9 @@ def test_select_action_logic(a_float1, a_float2, a_float3): assert max(p, key=p.get) == b.select_action(p=p) # if exploit_p factor is 0 => return the action with 2nd highest prob (not 1st highest prob) - b.set_exploit_p(exploit_p=0) - assert max(p, key=p.get) != b.select_action(p=p) - assert sorted(p.items(), key=lambda x: x[1], reverse=True)[1][0] == b.select_action(p=p) + mutated_b = b.with_exploit_p(exploit_p=0) + assert max(p, key=p.get) != mutated_b.select_action(p=p) + assert sorted(p.items(), key=lambda x: x[1], reverse=True)[1][0] == mutated_b.select_action(p=p) def test_select_action_logic_all_probs_equal(): @@ -130,8 +131,8 @@ def test_select_action_logic_all_probs_equal(): assert "a1" == b.select_action(p=p) # if exploit_p is 0 => return the action with 2nd highest prob (not 1st highest prob) - b.set_exploit_p(exploit_p=0) - assert "a2" == b.select_action(p=p) + mutated_b = b.with_exploit_p(exploit_p=0) + assert "a2" == mutated_b.select_action(p=p) @given(st.builds(Beta), st.builds(Beta), st.builds(Beta)) @@ -166,17 +167,18 @@ def test_can_init_cost_control(a_float): @given(st.floats()) -def test_set_subsidy_factor(a_float): +def test_with_subsidy_factor(a_float): c = CostControlBandit() # set with invalid float if a_float < 0 or a_float > 1 or np.isnan(a_float) or np.isinf(a_float): with pytest.raises(ValidationError): - c.set_subsidy_factor(subsidy_factor=a_float) + c.with_subsidy_factor(subsidy_factor=a_float) # set with valid float else: - c.set_subsidy_factor(subsidy_factor=a_float) - assert c.subsidy_factor == a_float + mutated_c = c.with_subsidy_factor(subsidy_factor=a_float) + assert mutated_c.subsidy_factor == a_float + assert mutated_c is not c @given( @@ -210,12 +212,12 @@ def test_select_action_logic_cc(): assert "a4" == c.select_action(p=p, actions=actions) # if subsidy_factor is 0 => return the action with highest p (classic bandit) - c.set_subsidy_factor(subsidy_factor=0) - assert "a2" == c.select_action(p=p, actions=actions) + mutated_c = c.with_subsidy_factor(subsidy_factor=0) + assert "a2" == mutated_c.select_action(p=p, actions=actions) # otherwise, return the cheapest feasible action with the highest sampled probability - c.set_subsidy_factor(subsidy_factor=0.5) - assert "a5" == c.select_action(p=p, actions=actions) + mutated_c = c.with_subsidy_factor(subsidy_factor=0.5) + assert "a5" == mutated_c.select_action(p=p, actions=actions) @given( @@ -247,20 +249,20 @@ def test_select_action_logic_corner_cases(a_list_p, a_list_cost): assert sorted(actions_cost_proba)[0][-1] == c.select_action(p=p, actions=actions) # if cost factor is 0: - c.set_subsidy_factor(subsidy_factor=0) + mutated_c = c.with_subsidy_factor(subsidy_factor=0) # get the keys of the max p.values() (there might be more max_p_values) max_p_values = [k for k, v in p.items() if v == max(p.values())] # if cost factor is 0 and only 1 max_value => return the action with highest p (classic bandit) # e.g. p={"a1": 0.5, "a2": 0.2} => return always "a1" if len(max_p_values) == 1: - assert max(p, key=p.get) == c.select_action(p=p, actions=actions) + assert max(p, key=p.get) == mutated_c.select_action(p=p, actions=actions) # if cost factor is 0 and only 1+ max_values => return the action with highest p and min cost # e.g. p={"a1": 0.5, "a2": 0.5} and cost={"a1": 20, "a2": 10} => return always "a2" else: actions_cost_max = {k: actions_cost[k] for k in max_p_values} - min(actions_cost_max, key=actions_cost_max.get) == c.select_action(p=p, actions=actions) + assert min(actions_cost_max, key=actions_cost_max.get) == mutated_c.select_action(p=p, actions=actions) ######################################################################################################################## @@ -282,7 +284,7 @@ def test_can_init_multiobjective(): ) def test_select_action_mo(p: Dict[ActionId, List[Probability]]): m = MultiObjectiveBandit() - assert m.select_action(p=p) in get_pareto_front(p=p) + assert m.select_action(p=p) in m.get_pareto_front(p=p) def test_pareto_front(): @@ -310,7 +312,7 @@ def test_pareto_front(): "a7": [0.1, 0.1], } - assert get_pareto_front(p2d) == ["a0", "a1", "a4", "a5"] + assert MultiObjectiveStrategy.get_pareto_front(p2d) == ["a0", "a1", "a4", "a5"] p2d = { "a0": [0.1, 0.1], @@ -318,7 +320,7 @@ def test_pareto_front(): "a2": [0.3, 0.3], } - assert get_pareto_front(p2d) == ["a1", "a2"] + assert MultiObjectiveStrategy.get_pareto_front(p2d) == ["a1", "a2"] # works in 3D p3d = { @@ -332,7 +334,7 @@ def test_pareto_front(): "a7": [0.1, 0.1, 0.3], } - assert get_pareto_front(p3d) == ["a0", "a1", "a4", "a5", "a7"] + assert MultiObjectiveStrategy.get_pareto_front(p3d) == ["a0", "a1", "a4", "a5", "a7"] ######################################################################################################################## @@ -363,7 +365,7 @@ def test_select_action_mo_cc(): "a5": [0.6, 0.1, 0.5], } # within the pareto front ("a3", "a4", "a5") select the action with min cost ("a4") - assert get_pareto_front(p) == ["a3", "a4", "a5"] + assert m.get_pareto_front(p) == ["a3", "a4", "a5"] assert m.select_action(p=p, actions=actions) == "a4" actions = { @@ -377,5 +379,5 @@ def test_select_action_mo_cc(): "a3": [0.0, 0.1, 0.9], } # within the actions with the min cost ("a1" or "a2") select the action the highest mean of probabilities ("a2") - assert get_pareto_front(p) == ["a1", "a2", "a3"] + assert m.get_pareto_front(p) == ["a1", "a2", "a3"] assert m.select_action(p=p, actions=actions) == "a2"