diff --git a/models/rllab_humanoid.xml b/models/rllab_humanoid.xml new file mode 100644 index 000000000..33faa6754 --- /dev/null +++ b/models/rllab_humanoid.xml @@ -0,0 +1,135 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/models/rllab_swimmer.xml b/models/rllab_swimmer.xml new file mode 100644 index 000000000..e321031f0 --- /dev/null +++ b/models/rllab_swimmer.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/softlearning/environments/gym/__init__.py b/softlearning/environments/gym/__init__.py index 00167fec9..ce57b31cd 100644 --- a/softlearning/environments/gym/__init__.py +++ b/softlearning/environments/gym/__init__.py @@ -61,6 +61,16 @@ 'entry_point': (f'{MUJOCO_ENVIRONMENTS_PATH}' '.image_pusher_2d:BlindForkReacher2dEnv'), }, + { + 'id': 'Humanoid-RLLab-v0', + 'entry_point': (f'{MUJOCO_ENVIRONMENTS_PATH}' + '.rllab_humanoid:RLLabHumanoidEnv'), + }, + { + 'id': 'Swimmer-RLLab-v0', + 'entry_point': (f'{MUJOCO_ENVIRONMENTS_PATH}' + '.rllab_swimmer:RLLabSwimmerEnv'), + }, ) GENERAL_ENVIRONMENT_SPECS = ( diff --git a/softlearning/environments/gym/mujoco/rllab_humanoid.py b/softlearning/environments/gym/mujoco/rllab_humanoid.py new file mode 100644 index 000000000..5d938a907 --- /dev/null +++ b/softlearning/environments/gym/mujoco/rllab_humanoid.py @@ -0,0 +1,82 @@ +import os.path as osp +import numpy as np + +from gym.envs.mujoco.mujoco_env import MujocoEnv + +from serializable import Serializable + +from softlearning.misc.utils import PROJECT_PATH + +""" +Environment based off of the HumanoidEnv found in rllab +https://github.com/rll/rllab/blob/master/rllab/envs/mujoco/simple_humanoid_env.py +""" +class RLLabHumanoidEnv(Serializable, MujocoEnv): + + MODEL_PATH = osp.abspath( + osp.join(PROJECT_PATH, 'models', 'rllab_humanoid.xml')) + + def __init__( + self, + vel_deviation_cost_coeff=1e-2, + alive_bonus=0.2, + ctrl_cost_coeff=1e-3, + impact_cost_coeff=1e-5, + *args, **kwargs): + self._Serializable__initialize(locals()) + self.vel_deviation_cost_coeff = vel_deviation_cost_coeff + self.alive_bonus = alive_bonus + self.ctrl_cost_coeff = ctrl_cost_coeff + self.impact_cost_coeff = impact_cost_coeff + MujocoEnv.__init__(self, model_path=self.MODEL_PATH, frame_skip=5, *args, **kwargs) + + def _get_obs(self): + data = self.sim.data + return np.concatenate([ + data.qpos.flat, + data.qvel.flat, + np.clip(data.cfrc_ext, -1, 1).flat, + self.get_body_com("torso").flat, + ]) + + def _get_com(self): + data = self.sim.data + mass = self.model.body_mass[:, None] + xpos = data.xipos + return (np.sum(mass * xpos, 0) / np.sum(mass)) + + + def step(self, action): + pos_before = self._get_com() + self.do_simulation(action, self.frame_skip) + pos_after = self._get_com() + next_obs = self._get_obs() + + alive_bonus = self.alive_bonus + data = self.sim.data + + # velocity computation was originally done by mujoco rather than finite differences + comvel = (pos_after - pos_before) / self.dt + lin_vel_reward = comvel[0] + lb, ub = self.action_space.low, self.action_space.high + scaling = (ub - lb) * 0.5 + ctrl_cost = .5 * self.ctrl_cost_coeff * np.sum( + np.square(action / scaling)) + impact_cost = .5 * self.impact_cost_coeff * np.sum( + np.square(np.clip(data.cfrc_ext, -1, 1))) + vel_deviation_cost = 0.5 * self.vel_deviation_cost_coeff * np.sum( + np.square(comvel[1:])) + reward = lin_vel_reward + alive_bonus - ctrl_cost - \ + impact_cost - vel_deviation_cost + done = data.qpos[2] < 0.8 or data.qpos[2] > 2.0 + env_infos = dict(reward_linvel=lin_vel_reward, + reward_ctrl=-ctrl_cost, + reward_alive=alive_bonus) + + return next_obs, reward, done, env_infos + + def reset_model(self): + self.set_state(self.init_qpos + np.random.normal(size=self.init_qpos.shape) * 0.01, + self.init_qvel + np.random.normal(size=self.init_qvel.shape) * 0.1) + return self._get_obs() + diff --git a/softlearning/environments/gym/mujoco/rllab_swimmer.py b/softlearning/environments/gym/mujoco/rllab_swimmer.py new file mode 100644 index 000000000..3d54ecae4 --- /dev/null +++ b/softlearning/environments/gym/mujoco/rllab_swimmer.py @@ -0,0 +1,60 @@ +import os.path as osp +import numpy as np + +from gym.envs.mujoco.mujoco_env import MujocoEnv + +from serializable import Serializable + +from softlearning.misc.utils import PROJECT_PATH + +""" +Environment based off of the SwimmerEnv found in rllab +https://github.com/rll/rllab/blob/master/rllab/envs/mujoco/swimmer_env.py +""" +class RLLabSwimmerEnv(MujocoEnv, Serializable): + + MODEL_PATH = osp.abspath( + osp.join(PROJECT_PATH, 'models', 'rllab_swimmer.xml')) + ORI_IND = 2 + + def __init__( + self, + ctrl_cost_coeff=1e-2, + *args, **kwargs): + self._Serializable__initialize(locals()) + self.ctrl_cost_coeff = ctrl_cost_coeff + MujocoEnv.__init__(self, model_path=self.MODEL_PATH, frame_skip=5, *args, **kwargs) + + def _get_obs(self): + return np.concatenate([ + self.sim.data.qpos.flat, + self.sim.data.qvel.flat, + # self.get_body_com("torso").flat, + ]).reshape(-1) + + def get_ori(self): + return self.sim.data.qpos[self.__class__.ORI_IND] + + def reset_model(self): + self.set_state(self.init_qpos + np.random.normal(size=self.init_qpos.shape) * 0.01, + self.init_qvel + np.random.normal(size=self.init_qvel.shape) * 0.1) + return self._get_obs() + + def step(self, action): + # pos_before = self.get_body_com("torso")[0] + pos_before = self.sim.data.qpos[0] + self.do_simulation(action, self.frame_skip) + pos_after = self.sim.data.qpos[0] + next_obs = self._get_obs() + lb, ub = self.action_space.low, self.action_space.high + scaling = (ub - lb) * 0.5 + ctrl_cost = 0.5 * self.ctrl_cost_coeff * np.sum( + np.square(action / scaling)) + forward_reward = (pos_after - pos_before) / self.dt + reward = forward_reward - ctrl_cost + done = False + env_infos = dict(reward_forward=forward_reward, + reward_ctrl=-ctrl_cost, + ) + + return next_obs, reward, done, env_infos