From d38bd2314ed3311db444c4468eb75d0d5753c407 Mon Sep 17 00:00:00 2001 From: Brandon Trabucco Date: Mon, 29 Jan 2024 14:04:09 -0500 Subject: [PATCH] merging changes from Young into hopper-v1 task --- design_bench/__init__.py | 16 ++ design_bench/oracles/exact/__init__.py | 5 + .../oracles/exact/hopper_controller_oracle.py | 36 ++-- .../hopper_controller_stochastic_oracle.py | 193 ++++++++++++++++++ setup.py | 2 +- 5 files changed, 231 insertions(+), 21 deletions(-) create mode 100644 design_bench/oracles/exact/hopper_controller_stochastic_oracle.py diff --git a/design_bench/__init__.py b/design_bench/__init__.py index adbad6f..c466f75 100644 --- a/design_bench/__init__.py +++ b/design_bench/__init__.py @@ -1209,6 +1209,22 @@ noise_std=0.0)) +register('HopperController-Exact-v1', + 'design_bench.datasets.continuous.hopper_controller_dataset:HopperControllerDataset', + 'design_bench.oracles.exact:HopperControllerStochasticOracle', + + # keyword arguments for building the dataset + dataset_kwargs=dict( + max_samples=None, + distribution=None, + max_percentile=100, + min_percentile=0), + + # keyword arguments for building the exact oracle + oracle_kwargs=dict( + noise_std=0.0)) + + register('HopperController-GP-v0', 'design_bench.datasets.continuous.hopper_controller_dataset:HopperControllerDataset', 'design_bench.oracles.sklearn:GaussianProcessOracle', diff --git a/design_bench/oracles/exact/__init__.py b/design_bench/oracles/exact/__init__.py index 5c103b9..d5df5ee 100644 --- a/design_bench/oracles/exact/__init__.py +++ b/design_bench/oracles/exact/__init__.py @@ -18,6 +18,11 @@ except ImportError as e: print("Skipping HopperControllerOracle import:", e) +try: + from .hopper_controller_stochastic_oracle import HopperControllerStochasticOracle +except ImportError as e: + print("Skipping HopperControllerStochasticOracle import:", e) + try: from .nas_bench_oracle import NASBenchOracle except ImportError as e: diff --git a/design_bench/oracles/exact/hopper_controller_oracle.py b/design_bench/oracles/exact/hopper_controller_oracle.py index e591134..f1e3485 100644 --- a/design_bench/oracles/exact/hopper_controller_oracle.py +++ b/design_bench/oracles/exact/hopper_controller_oracle.py @@ -117,43 +117,40 @@ def protected_predict(self, x, render=False, **render_kwargs): value 'x' in a model-based optimization problem """ + # extract weights from the vector design weights = [] for s in ((self.obs_dim, self.hidden_dim), - (1, self.hidden_dim,), + (self.hidden_dim,), (self.hidden_dim, self.hidden_dim), - (1, self.hidden_dim,), + (self.hidden_dim,), (self.hidden_dim, self.action_dim), - (1, self.action_dim,), + (self.action_dim,), (1, self.action_dim)): weights.append(x[0:np.prod(s)].reshape(s)) x = x[np.prod(s):] + # the final weight is logstd and is not used + weights.pop(-1) + # create a policy forward pass in numpy def mlp_policy(h): - h = h.reshape(1, -1) h = np.tanh(h @ weights[0] + weights[1]) h = np.tanh(h @ weights[2] + weights[3]) - h = h @ weights[4] + weights[5] + np.random.randn(1, self.action_dim) * np.exp(weights[6]) - return h + return h @ weights[4] + weights[5] # make a copy of the policy and the environment env = gym.make(self.env_name) # perform a single rollout for quick evaluation + obs = env.reset() + done = False path_returns = np.zeros([1], dtype=np.float32) - total_return = 0.0 - for _ in range(self.eval_n_trials): - obs = env.reset() - done = False - for step in range(1000): - obs, rew, done, info = env.step(mlp_policy(obs)) - if render: - env.render(**render_kwargs) - total_return += rew - if done: - break - path_returns[0] = total_return / self.eval_n_trials + while not done: + obs, rew, done, info = env.step(mlp_policy(obs)) + if render: + env.render(**render_kwargs) + path_returns += rew.astype(np.float32) # return the sum of rewards for a single trajectory return path_returns.astype(np.float32) @@ -184,10 +181,9 @@ def __init__(self, dataset: ContinuousDataset, **kwargs): self.action_dim = 3 self.hidden_dim = 64 self.env_name = 'Hopper-v2' - self.eval_n_trials = 10 # initialize the oracle using the super class super(HopperControllerOracle, self).__init__( dataset, internal_batch_size=1, is_batched=False, expect_normalized_y=False, - expect_normalized_x=False, expect_logits=None, **kwargs) + expect_normalized_x=False, expect_logits=None, **kwargs) \ No newline at end of file diff --git a/design_bench/oracles/exact/hopper_controller_stochastic_oracle.py b/design_bench/oracles/exact/hopper_controller_stochastic_oracle.py new file mode 100644 index 0000000..92b40d4 --- /dev/null +++ b/design_bench/oracles/exact/hopper_controller_stochastic_oracle.py @@ -0,0 +1,193 @@ +from design_bench.oracles.exact_oracle import ExactOracle +from design_bench.datasets.continuous_dataset import ContinuousDataset +from design_bench.datasets.continuous.hopper_controller_dataset import HopperControllerDataset +import numpy as np +import gym + + +class HopperControllerStochasticOracle(ExactOracle): + """An abstract class for managing the ground truth score functions f(x) + for model-based optimization problems, where the + goal is to find a design 'x' that maximizes a prediction 'y': + + max_x { y = f(x) } + + Public Attributes: + + external_dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class which points to + the mutable task dataset for a model-based optimization problem + + internal_dataset: DatasetBuilder + an instance of a subclass of the DatasetBuilder class which has frozen + statistics and is used for training the oracle + + is_batched: bool + a boolean variable that indicates whether the evaluation function + implemented for a particular oracle is batched, which effects + the scaling coefficient of its computational cost + + internal_batch_size: int + an integer representing the number of design values to process + internally at the same time, if None defaults to the entire + tensor given to the self.score method + internal_measurements: int + an integer representing the number of independent measurements of + the prediction made by the oracle, which are subsequently + averaged, and is useful when the oracle is stochastic + + noise_std: float + the standard deviation of gaussian noise added to the prediction + values 'y' coming out of the ground truth score function f(x) + in order to make the optimization problem difficult + + expect_normalized_y: bool + a boolean indicator that specifies whether the inputs to the oracle + score function are expected to be normalized + expect_normalized_x: bool + a boolean indicator that specifies whether the outputs of the oracle + score function are expected to be normalized + expect_logits: bool + a boolean that specifies whether the oracle score function is + expecting logits when the dataset is discrete + + Public Methods: + + predict(np.ndarray) -> np.ndarray: + a function that accepts a batch of design values 'x' as input and for + each design computes a prediction value 'y' which corresponds + to the score in a model-based optimization problem + + check_input_format(DatasetBuilder) -> bool: + a function that accepts a list of integers as input and returns true + when design values 'x' with the shape specified by that list are + compatible with this class of approximate oracle + + """ + + name = "exact_average_return" + + @classmethod + def supported_datasets(cls): + """An attribute the defines the set of dataset classes which this + oracle can be applied to forming a valid ground truth score + function for a model-based optimization problem + + """ + + return {HopperControllerDataset} + + @classmethod + def fully_characterized(cls): + """An attribute the defines whether all possible inputs to the + model-based optimization problem have been evaluated and + are are returned via lookup in self.predict + + """ + + return False + + @classmethod + def is_simulated(cls): + """An attribute the defines whether the values returned by the oracle + were obtained by running a computer simulation rather than + performing physical experiments with real data + + """ + + return True + + def protected_predict(self, x, render=False, **render_kwargs): + """Score function to be implemented by oracle subclasses, where x is + either a batch of designs if self.is_batched is True or is a + single design when self._is_batched is False + + Arguments: + + x_batch: np.ndarray + a batch or single design 'x' that will be given as input to the + oracle model in order to obtain a prediction value 'y' for + each 'x' which is then returned + + Returns: + + y_batch: np.ndarray + a batch or single prediction 'y' made by the oracle model, + corresponding to the ground truth score for each design + value 'x' in a model-based optimization problem + + """ + # extract weights from the vector design + weights = [] + for s in ((self.obs_dim, self.hidden_dim), + (1, self.hidden_dim,), + (self.hidden_dim, self.hidden_dim), + (1, self.hidden_dim,), + (self.hidden_dim, self.action_dim), + (1, self.action_dim,), + (1, self.action_dim)): + weights.append(x[0:np.prod(s)].reshape(s)) + x = x[np.prod(s):] + + # create a policy forward pass in numpy + def mlp_policy(h): + h = h.reshape(1, -1) + h = np.tanh(h @ weights[0] + weights[1]) + h = np.tanh(h @ weights[2] + weights[3]) + h = h @ weights[4] + weights[5] + np.random.randn(1, self.action_dim) * np.exp(weights[6]) + return h + + # make a copy of the policy and the environment + env = gym.make(self.env_name) + + # perform a single rollout for quick evaluation + path_returns = np.zeros([1], dtype=np.float32) + total_return = 0.0 + for _ in range(self.eval_n_trials): + obs = env.reset() + done = False + for step in range(1000): + obs, rew, done, info = env.step(mlp_policy(obs)) + if render: + env.render(**render_kwargs) + total_return += rew + if done: + break + path_returns[0] = total_return / self.eval_n_trials + + # return the sum of rewards for a single trajectory + return path_returns.astype(np.float32) + + def __init__(self, dataset: ContinuousDataset, **kwargs): + """Initialize the ground truth score function f(x) for a model-based + optimization problem, which involves loading the parameters of an + oracle model and estimating its computational cost + + Arguments: + + dataset: DiscreteDataset + an instance of a subclass of the DatasetBuilder class which has + a set of design values 'x' and prediction values 'y', and defines + batching and sampling methods for those attributes + noise_std: float + the standard deviation of gaussian noise added to the prediction + values 'y' coming out of the ground truth score function f(x) + in order to make the optimization problem difficult + internal_measurements: int + an integer representing the number of independent measurements of + the prediction made by the oracle, which are subsequently + averaged, and is useful when the oracle is stochastic + + """ + + self.obs_dim = 11 + self.action_dim = 3 + self.hidden_dim = 64 + self.env_name = 'Hopper-v2' + self.eval_n_trials = 10 + + # initialize the oracle using the super class + super(HopperControllerStochasticOracle, self).__init__( + dataset, internal_batch_size=1, is_batched=False, + expect_normalized_y=False, + expect_normalized_x=False, expect_logits=None, **kwargs) diff --git a/setup.py b/setup.py index 7767add..40881b6 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ 'brandontrabucco/design-bench/archive/v2_0_20.tar.gz', keywords=['Deep Learning', 'Neural Networks', 'Benchmark', 'Model-Based Optimization'], - extras_require={'all': ['gym[mujoco]'], 'cma': ['cma']}, + extras_require={'all': ['gym[mujoco]<0.26.0'], 'cma': ['cma']}, install_requires=['pandas', 'requests', 'scikit-learn', 'torch', 'torchvision', 'numpy', 'tensorflow>=2.2', 'transformers',