secondmind-labs · AlexGoodallAI · Nov 22, 2022 · Nov 23, 2022 · Nov 23, 2022 · Nov 24, 2022
diff --git a/tests/integration/test_multifidelity_bayesian_optimization.py b/tests/integration/test_multifidelity_bayesian_optimization.py
@@ -0,0 +1,108 @@
+import pytest
+import tensorflow as tf
+import trieste
+from trieste.acquisition.combination import Product
+from trieste.acquisition.function.entropy import MUMBO, CostWeighting
+from trieste.acquisition.optimizer import generate_continuous_optimizer
+from trieste.bayesian_optimizer import FrozenRecord
+from trieste.data import Dataset, add_fidelity_column, check_and_extract_fidelity_query_points
+from trieste.objectives import (
+    Linear2Fidelity,
+    Linear3Fidelity,
+    Linear5Fidelity,
+    SingleObjectiveMultifidelityTestProblem,
+)
+from trieste.objectives.utils import mk_observer, Observer
+from trieste.types import TensorType
+from trieste.models.gpflow import (
+    MultifidelityAutoregressive,
+    build_multifidelity_autoregressive_models,
+)
+
+# def noisy_linear_2_fidelity(x: TensorType) -> TensorType:
+
+#     _, fidelities = check_and_extract_fidelity_query_points(x)
+#     y = linear_two_fidelity(x)
+#     not_lowest_fidelity = fidelities > 0
+#     noise = tf.random.normal(y.shape, stddev=1e-1, dtype=y.dtype)
+#     y = tf.where(not_lowest_fidelity, y + noise, y)
+#     return y
+
+
+def _build_observer(problem: SingleObjectiveMultifidelityTestProblem) -> Observer:
+
+    objective_function = problem.objective
+
+    def noisy_objective(x: TensorType) -> TensorType:
+
+        _, fidelities = check_and_extract_fidelity_query_points(x)
+        y = objective_function(x)
+        not_lowest_fidelity = fidelities > 0
+        noise = tf.random.normal(y.shape, stddev=1e-1, dtype=y.dtype)
+        y = tf.where(not_lowest_fidelity, y + noise, y)
+        return y
+
+    return mk_observer(noisy_objective)
+
+
+def _build_nested_multifidelity_dataset(
+    problem: SingleObjectiveMultifidelityTestProblem, observer: Observer
+) -> Dataset:
+
+    num_fidelities = problem.num_fidelities
+    initial_sample_sizes = [2 + 2 * (num_fidelities - i) for i in range(num_fidelities)]
+    fidelity_samples = list()
+    lowest_fidelity_sample = problem.input_search_space.sample(initial_sample_sizes[0])
+    lowest_fidelity_sample = add_fidelity_column(lowest_fidelity_sample, 0)
+    fidelity_samples.append(lowest_fidelity_sample)
+
+    for i in range(1, num_fidelities):
+        previous_fidelity_points = fidelity_samples[i - 1][:, :-1]
+        indices = tf.range(tf.shape(previous_fidelity_points)[0])
+        random_indices = tf.random.shuffle(indices)[: initial_sample_sizes[i]]
+        random_points = tf.gather(previous_fidelity_points, random_indices)
+        sample_points = add_fidelity_column(random_points, i)
+        fidelity_samples.append(sample_points)
+
+    query_points = tf.concat(fidelity_samples, axis=0)
+    dataset = observer(query_points)
+    print(dataset)
+
+    return dataset
+
+
+@pytest.mark.parametrize("problem", ((Linear2Fidelity), (Linear3Fidelity), (Linear5Fidelity)))
+def test_multifidelity_bo_finds_minima_of_linear_problem(
+    problem: SingleObjectiveMultifidelityTestProblem,
+):
+
+    observer = _build_observer(problem)
+    initial_data = _build_nested_multifidelity_dataset(problem, observer)
+    low_cost = 1.0
+    high_cost = 4.0
+    input_search_space = problem.input_search_space  # Does not include fidelities
+    search_space = problem.search_space  # Includes fidelities
+
+    model = MultifidelityAutoregressive(
+        build_multifidelity_autoregressive_models(
+            initial_data,
+            num_fidelities=problem.num_fidelities,
+            input_search_space=input_search_space,
+        )
+    )
+
+    model.update(initial_data)
+    model.optimize(initial_data)
+
+    bo = trieste.bayesian_optimizer.BayesianOptimizer(observer, search_space)
+    acq_builder = Product(
+        MUMBO(search_space).using("OBJECTIVE"),
+        CostWeighting(low_cost, high_cost).using("OBJECTIVE"),
+    )
+    optimizer = generate_continuous_optimizer(num_initial_samples=10_000, num_optimization_runs=10)
+    rule = trieste.acquisition.rule.EfficientGlobalOptimization(builder=acq_builder)
+
+    num_steps = 1
+    result = bo.optimize(num_steps, initial_data, model, acquisition_rule=rule)
+
+    dataset = result.try_get_final_dataset()
diff --git a/trieste/acquisition/function/entropy.py b/trieste/acquisition/function/entropy.py
@@ -15,6 +15,7 @@
 This module contains entropy-based acquisition function builders.
 """
 from __future__ import annotations
+import math
 
 from typing import Optional, TypeVar, cast, overload
 
@@ -25,7 +26,11 @@
 from ...data import Dataset
 from ...models import ProbabilisticModel
 from ...models.gpflow.interface import SupportsCovarianceBetweenPoints
-from ...models.interfaces import HasTrajectorySampler, SupportsGetObservationNoise
+from ...models.interfaces import (
+    HasTrajectorySampler,
+    SupportsGetObservationNoise,
+    SupportsCovarianceWithTopFidelity,
+)
 from ...space import SearchSpace
 from ...types import TensorType
 from ..interface import (
@@ -617,3 +622,147 @@ def __call__(self, x: TensorType) -> TensorType:
             repulsion_weight = 1.0
 
         return repulsion_weight * repulsion
+
+
+class MUMBO(MinValueEntropySearch):
+    def __repr__(self) -> str:
+        return "MUMBO()"
+
+    def prepare_acquisition_function(
+        self,
+        model: SupportsCovarianceWithTopFidelity,
+        dataset: Optional[Dataset] = None,
+    ) -> AcquisitionFunction:
+        """
+        :param model: The multifidelity model.
+        :param dataset: The data from the observer.
+        :return: The max-value entropy search acquisition function modified for objective
+            minimisation. This function will raise :exc:`ValueError` or
+            :exc:`~tf.errors.InvalidArgumentError` if used with a batch size greater than one.
+        :raise tf.errors.InvalidArgumentError: If ``dataset`` is empty.
+        """
+        tf.debugging.Assert(dataset is not None, [])
+        dataset = cast(Dataset, dataset)
+        tf.debugging.assert_positive(len(dataset), message="Dataset must be populated.")
+        min_value_samples = self.get_min_value_samples_on_top_fidelity(model, dataset)
+        return mumbo(model, min_value_samples)
+
+    def update_acquisition_function(
+        self,
+        function: AcquisitionFunction,
+        model: ProbabilisticModelType,
+        dataset: Optional[Dataset] = None,
+    ) -> AcquisitionFunction:
+        """
+        :param function: The acquisition function to update.
+        :param model: The model.
+        :param dataset: The data from the observer.
+        """
+        tf.debugging.Assert(dataset is not None, [])
+        dataset = cast(Dataset, dataset)
+        tf.debugging.assert_positive(len(dataset), message="Dataset must be populated.")
+        min_value_samples = self.get_min_value_samples_on_top_fidelity(model, dataset)
+        function.update(min_value_samples)  # type: ignore
+        return function
+
+    def get_min_value_samples_on_top_fidelity(
+        self, model: ProbabilisticModelType, dataset: Dataset
+    ):
+        """
+        :param model: The model.
+        :param dataset: The data from the observer.
+        """
+        query_points = self._search_space.sample(num_samples=self._grid_size)
+        tf.debugging.assert_same_float_dtype([dataset.query_points, query_points])
+        query_points = tf.concat([dataset.query_points, query_points], 0)
+        query_points_on_top_fidelity = tf.concat(
+            [query_points[:, :-1], tf.ones_like(query_points[:, -1:])], -1
+        )
+        return self._min_value_sampler.sample(
+            model, self._num_samples, query_points_on_top_fidelity
+        )
+
+
+class mumbo(min_value_entropy_search):
+    # @tf.function
+    def __call__(self, x: TensorType) -> TensorType:
+        tf.debugging.assert_shapes(
+            [(x, [..., 1, None])],
+            message="This acquisition function only supports batch sizes of one.",
+        )
+
+        x_on_top_fidelity = tf.concat(
+            [tf.squeeze(x, -2)[:, :-1], tf.ones_like(tf.squeeze(x, -2)[:, -1:])], -1
+        )
+        fmean, fvar = self._model.predict(x_on_top_fidelity)
+        fsd = tf.math.sqrt(fvar)
+        ymean, yvar = self._model.predict_y(tf.squeeze(x, -2))
+        cov = self._model.covariance_with_top_fidelity(tf.squeeze(x, -2))
+        correlations = cov / tf.math.sqrt(fvar * yvar)
+        correlations = tf.clip_by_value(correlations, -1.0, 1.0)
+
+        # Calculate moments of extended skew Gaussian distributions (ESG)
+        # These will be used to define reasonable ranges for the numerical
+        # intergration of the ESG's differential entropy.
+        gamma = (tf.squeeze(self._samples) - fmean) / fsd
+        normal = tfp.distributions.Normal(tf.cast(0, fmean.dtype), tf.cast(1, fmean.dtype))
+        log_minus_cdf = normal.log_cdf(-gamma)
+        ratio = tf.math.exp(normal.log_prob(gamma) - log_minus_cdf)
+        ESGmean = correlations * ratio
+        ESGvar = 1 + correlations * ESGmean * (gamma - ratio)
+        ESGvar = tf.math.maximum(ESGvar, 0)  # Clip  to improve numerical stability
+
+        # get upper limits for numerical integration
+        # we need this range to contain almost all of the ESG's probability density
+        # we found +-5 standard deviations provides a tight enough approximation
+        upper_limit = ESGmean + 5 * tf.math.sqrt(ESGvar)
+        lower_limit = ESGmean - 5 * tf.math.sqrt(ESGvar)
+
+        # perform numerical integrations
+        z = tf.linspace(lower_limit, upper_limit, num=1000)  # build discretisation
+        minus_correlations = tf.math.sqrt(
+            1 - correlations ** 2
+        )  # calculate ESG density at these points
+        minus_correlations = tf.math.maximum(
+            minus_correlations, 1e-10
+        )  # clip below for numerical stability
+
+        density = tf.math.exp(
+            normal.log_prob(z)
+            - log_minus_cdf
+            + normal.log_cdf(-(gamma - correlations * z) / minus_correlations)
+        )
+        # calculate point-wise entropy function contributions (carefuly where density is 0)
+        entropy_function = -density * tf.where(density != 0, tf.math.log(density), 0.0)
+        approximate_entropy = tfp.math.trapz(
+            entropy_function, z, axis=0
+        )  # perform integration over ranges
+
+        approximate_entropy = tf.reduce_mean(
+            approximate_entropy, axis=-1
+        )  # build monte-carlo estimate over the gumbel samples
+        f_acqu_x = (
+            tf.cast(0.5 * tf.math.log(2.0 * math.pi * math.e), tf.float64) - approximate_entropy
+        )
+        return f_acqu_x[:, None]
+
+
+class CostWeighting(SingleModelAcquisitionBuilder):
+    def __init__(self, low_fidelity_cost, high_fidelity_cost):
+        self._low_fidelity_cost = low_fidelity_cost
+        self._high_fidelity_cost = high_fidelity_cost
+
+    def prepare_acquisition_function(self, model, dataset=None):
+        def acquisition(x):
+            tf.debugging.assert_shapes(
+                [(x, [..., 1, None])],
+                message="This acquisition function only supports batch sizes of one.",
+            )
+            fidelities = x[..., -1:]  # [..., 1]
+            costs = tf.where(fidelities == 0.0, self._low_fidelity_cost, self._high_fidelity_cost)
+            return tf.cast(1.0 / costs, x.dtype)[:, 0, :]
+
+        return acquisition
+
+    def update_acquisition_function(self, function, model, dataset=None):
+        return function
diff --git a/trieste/models/interfaces.py b/trieste/models/interfaces.py
@@ -661,3 +661,20 @@ def get_inducing_variables(self) -> tuple[TensorType, TensorType, TensorType, bo
             or not whitened representations.
         """
         raise NotImplementedError
+
+
+@runtime_checkable
+class SupportsCovarianceWithTopFidelity(ProbabilisticModel, Protocol):
+    """A probabilistic model is multifidelity and has access to a method to calculate the
+    covariance between a point and the same point at the top fidelity"""
+
+    @abstractmethod
+    def covariance_with_top_fidelity(self, query_points: TensorType) -> TensorType:
+        """
+        Calculate the covariance of the output at `query_point` and a given fidelity with the
+        highest fidelity output at the same `query_point`.
+        :param query_points: The query points to calculate the covariance for, of shape [N, D+1],
+            where the final column of the final dimension contains the fidelity of the query point
+        :return: The covariance with the top fidelity for the `query_points`, of shape [N, P]
+        """
+        raise NotImplementedError
diff --git a/trieste/objectives/__init__.py b/trieste/objectives/__init__.py
@@ -36,3 +36,9 @@
     SingleObjectiveTestProblem,
     Trid10,
 )
+from .multifidelity_objectives import (
+    Linear2Fidelity,
+    Linear3Fidelity,
+    Linear5Fidelity,
+    SingleObjectiveMultifidelityTestProblem,
+)
diff --git a/trieste/objectives/multifidelity_objectives.py b/trieste/objectives/multifidelity_objectives.py
@@ -0,0 +1,83 @@
+import tensorflow as tf
+import numpy as np
+from dataclasses import dataclass
+
+from .single_objectives import SingleObjectiveTestProblem
+from ..types import TensorType
+from ..space import Box, DiscreteSearchSpace, TaggedProductSearchSpace, SearchSpace
+
+
+@dataclass(frozen=True)
+class SingleObjectiveMultifidelityTestProblem(SingleObjectiveTestProblem):
+
+    num_fidelities: int
+    """The number of fidelities of test function"""
+
+    input_search_space: SearchSpace
+    """The search space of the inputs, ignoring fidelities"""
+
+
+def linear_multifidelity(x: TensorType):
+
+    x_input = x[..., :-1]
+    x_fidelity = x[..., -1:]
+
+    f = 0.5 * ((6.0 * x_input - 2.0) ** 2) * tf.math.sin(12.0 * x_input - 4.0) + 10.0 * (
+        x_input - 1.0
+    )
+    f = f + x_fidelity * (f - 20.0 * (x_input - 1.0))
+
+    return f
+
+
+_LINEAR_MULTIFIDELITY_MINIMIZERS = {2: 0.75724875, 3: 0.76333767, 5: 0.76801846}
+
+_LINEAR_MULTIFIDELITY_MINIMA = {
+    2: -6.020740055,
+    3: -6.634287061,
+    5: -7.933019704,
+}
+
+
+def _linear_multifidelity_search_space_builder(
+    n_fidelities: int, input_search_space
+) -> TaggedProductSearchSpace:
+
+    fidelity_search_space = DiscreteSearchSpace(
+        np.array([np.arange(n_fidelities, dtype=float)]).reshape(-1, 1)
+    )
+    search_space = TaggedProductSearchSpace(
+        [input_search_space, fidelity_search_space], ["input", "fidelity"]
+    )
+    return search_space
+
+
+Linear2Fidelity = SingleObjectiveMultifidelityTestProblem(
+    name="Linear 2 Fidelity",
+    objective=linear_multifidelity,
+    input_search_space=Box(np.zeros(1), np.ones(1)),
+    search_space=_linear_multifidelity_search_space_builder(2, Box(np.zeros(1), np.ones(1))),
+    minimizers=_LINEAR_MULTIFIDELITY_MINIMIZERS[2],
+    minimum=_LINEAR_MULTIFIDELITY_MINIMA[2],
+    num_fidelities=2,
+)
+
+Linear3Fidelity = SingleObjectiveMultifidelityTestProblem(
+    name="Linear 3 Fidelity",
+    objective=linear_multifidelity,
+    input_search_space=Box(np.zeros(1), np.ones(1)),
+    search_space=_linear_multifidelity_search_space_builder(3, Box(np.zeros(1), np.ones(1))),
+    minimizers=_LINEAR_MULTIFIDELITY_MINIMIZERS[3],
+    minimum=_LINEAR_MULTIFIDELITY_MINIMA[3],
+    num_fidelities=3,
+)
+
+Linear5Fidelity = SingleObjectiveMultifidelityTestProblem(
+    name="Linear 5 Fidelity",
+    objective=linear_multifidelity,
+    input_search_space=Box(np.zeros(1), np.ones(1)),
+    search_space=_linear_multifidelity_search_space_builder(5, Box(np.zeros(1), np.ones(1))),
+    minimizers=_LINEAR_MULTIFIDELITY_MINIMIZERS[5],
+    minimum=_LINEAR_MULTIFIDELITY_MINIMA[5],
+    num_fidelities=5,
+)