Skip to content

Commit

Permalink
Merge pull request #19 from brandontrabucco/hopper-change
Browse files Browse the repository at this point in the history
Creating Additional HopperController-Exact-v1 task that averages 10 trials
  • Loading branch information
brandontrabucco authored Jan 29, 2024
2 parents ecbf4e9 + d38bd23 commit e529395
Show file tree
Hide file tree
Showing 5 changed files with 216 additions and 2 deletions.
16 changes: 16 additions & 0 deletions design_bench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1209,6 +1209,22 @@
noise_std=0.0))


register('HopperController-Exact-v1',
'design_bench.datasets.continuous.hopper_controller_dataset:HopperControllerDataset',
'design_bench.oracles.exact:HopperControllerStochasticOracle',

# keyword arguments for building the dataset
dataset_kwargs=dict(
max_samples=None,
distribution=None,
max_percentile=100,
min_percentile=0),

# keyword arguments for building the exact oracle
oracle_kwargs=dict(
noise_std=0.0))


register('HopperController-GP-v0',
'design_bench.datasets.continuous.hopper_controller_dataset:HopperControllerDataset',
'design_bench.oracles.sklearn:GaussianProcessOracle',
Expand Down
5 changes: 5 additions & 0 deletions design_bench/oracles/exact/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
except ImportError as e:
print("Skipping HopperControllerOracle import:", e)

try:
from .hopper_controller_stochastic_oracle import HopperControllerStochasticOracle
except ImportError as e:
print("Skipping HopperControllerStochasticOracle import:", e)

try:
from .nas_bench_oracle import NASBenchOracle
except ImportError as e:
Expand Down
2 changes: 1 addition & 1 deletion design_bench/oracles/exact/hopper_controller_oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,4 +186,4 @@ def __init__(self, dataset: ContinuousDataset, **kwargs):
super(HopperControllerOracle, self).__init__(
dataset, internal_batch_size=1, is_batched=False,
expect_normalized_y=False,
expect_normalized_x=False, expect_logits=None, **kwargs)
expect_normalized_x=False, expect_logits=None, **kwargs)
193 changes: 193 additions & 0 deletions design_bench/oracles/exact/hopper_controller_stochastic_oracle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
from design_bench.oracles.exact_oracle import ExactOracle
from design_bench.datasets.continuous_dataset import ContinuousDataset
from design_bench.datasets.continuous.hopper_controller_dataset import HopperControllerDataset
import numpy as np
import gym


class HopperControllerStochasticOracle(ExactOracle):
"""An abstract class for managing the ground truth score functions f(x)
for model-based optimization problems, where the
goal is to find a design 'x' that maximizes a prediction 'y':
max_x { y = f(x) }
Public Attributes:
external_dataset: DatasetBuilder
an instance of a subclass of the DatasetBuilder class which points to
the mutable task dataset for a model-based optimization problem
internal_dataset: DatasetBuilder
an instance of a subclass of the DatasetBuilder class which has frozen
statistics and is used for training the oracle
is_batched: bool
a boolean variable that indicates whether the evaluation function
implemented for a particular oracle is batched, which effects
the scaling coefficient of its computational cost
internal_batch_size: int
an integer representing the number of design values to process
internally at the same time, if None defaults to the entire
tensor given to the self.score method
internal_measurements: int
an integer representing the number of independent measurements of
the prediction made by the oracle, which are subsequently
averaged, and is useful when the oracle is stochastic
noise_std: float
the standard deviation of gaussian noise added to the prediction
values 'y' coming out of the ground truth score function f(x)
in order to make the optimization problem difficult
expect_normalized_y: bool
a boolean indicator that specifies whether the inputs to the oracle
score function are expected to be normalized
expect_normalized_x: bool
a boolean indicator that specifies whether the outputs of the oracle
score function are expected to be normalized
expect_logits: bool
a boolean that specifies whether the oracle score function is
expecting logits when the dataset is discrete
Public Methods:
predict(np.ndarray) -> np.ndarray:
a function that accepts a batch of design values 'x' as input and for
each design computes a prediction value 'y' which corresponds
to the score in a model-based optimization problem
check_input_format(DatasetBuilder) -> bool:
a function that accepts a list of integers as input and returns true
when design values 'x' with the shape specified by that list are
compatible with this class of approximate oracle
"""

name = "exact_average_return"

@classmethod
def supported_datasets(cls):
"""An attribute the defines the set of dataset classes which this
oracle can be applied to forming a valid ground truth score
function for a model-based optimization problem
"""

return {HopperControllerDataset}

@classmethod
def fully_characterized(cls):
"""An attribute the defines whether all possible inputs to the
model-based optimization problem have been evaluated and
are are returned via lookup in self.predict
"""

return False

@classmethod
def is_simulated(cls):
"""An attribute the defines whether the values returned by the oracle
were obtained by running a computer simulation rather than
performing physical experiments with real data
"""

return True

def protected_predict(self, x, render=False, **render_kwargs):
"""Score function to be implemented by oracle subclasses, where x is
either a batch of designs if self.is_batched is True or is a
single design when self._is_batched is False
Arguments:
x_batch: np.ndarray
a batch or single design 'x' that will be given as input to the
oracle model in order to obtain a prediction value 'y' for
each 'x' which is then returned
Returns:
y_batch: np.ndarray
a batch or single prediction 'y' made by the oracle model,
corresponding to the ground truth score for each design
value 'x' in a model-based optimization problem
"""
# extract weights from the vector design
weights = []
for s in ((self.obs_dim, self.hidden_dim),
(1, self.hidden_dim,),
(self.hidden_dim, self.hidden_dim),
(1, self.hidden_dim,),
(self.hidden_dim, self.action_dim),
(1, self.action_dim,),
(1, self.action_dim)):
weights.append(x[0:np.prod(s)].reshape(s))
x = x[np.prod(s):]

# create a policy forward pass in numpy
def mlp_policy(h):
h = h.reshape(1, -1)
h = np.tanh(h @ weights[0] + weights[1])
h = np.tanh(h @ weights[2] + weights[3])
h = h @ weights[4] + weights[5] + np.random.randn(1, self.action_dim) * np.exp(weights[6])
return h

# make a copy of the policy and the environment
env = gym.make(self.env_name)

# perform a single rollout for quick evaluation
path_returns = np.zeros([1], dtype=np.float32)
total_return = 0.0
for _ in range(self.eval_n_trials):
obs = env.reset()
done = False
for step in range(1000):
obs, rew, done, info = env.step(mlp_policy(obs))
if render:
env.render(**render_kwargs)
total_return += rew
if done:
break
path_returns[0] = total_return / self.eval_n_trials

# return the sum of rewards for a single trajectory
return path_returns.astype(np.float32)

def __init__(self, dataset: ContinuousDataset, **kwargs):
"""Initialize the ground truth score function f(x) for a model-based
optimization problem, which involves loading the parameters of an
oracle model and estimating its computational cost
Arguments:
dataset: DiscreteDataset
an instance of a subclass of the DatasetBuilder class which has
a set of design values 'x' and prediction values 'y', and defines
batching and sampling methods for those attributes
noise_std: float
the standard deviation of gaussian noise added to the prediction
values 'y' coming out of the ground truth score function f(x)
in order to make the optimization problem difficult
internal_measurements: int
an integer representing the number of independent measurements of
the prediction made by the oracle, which are subsequently
averaged, and is useful when the oracle is stochastic
"""

self.obs_dim = 11
self.action_dim = 3
self.hidden_dim = 64
self.env_name = 'Hopper-v2'
self.eval_n_trials = 10

# initialize the oracle using the super class
super(HopperControllerStochasticOracle, self).__init__(
dataset, internal_batch_size=1, is_batched=False,
expect_normalized_y=False,
expect_normalized_x=False, expect_logits=None, **kwargs)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
'brandontrabucco/design-bench/archive/v2_0_20.tar.gz',
keywords=['Deep Learning', 'Neural Networks',
'Benchmark', 'Model-Based Optimization'],
extras_require={'all': ['gym[mujoco]'], 'cma': ['cma']},
extras_require={'all': ['gym[mujoco]<0.26.0'], 'cma': ['cma']},
install_requires=['pandas', 'requests', 'scikit-learn',
'torch', 'torchvision', 'numpy',
'tensorflow>=2.2', 'transformers',
Expand Down

0 comments on commit e529395

Please sign in to comment.