From 19411bd64337d25070c3a24b969cb23847968888 Mon Sep 17 00:00:00 2001 From: Reggie <72816837+reginald-mclean@users.noreply.github.com> Date: Wed, 12 Jul 2023 10:15:40 -0400 Subject: [PATCH] Update mujoco-py to mujoco and gym to gymnasium + more (#421) --- .gitignore | 2 + CONTRIBUTING.md | 30 +- README.md | 40 ++- docker/Dockerfile | 21 +- metaworld/__init__.py | 27 +- .../sawyer_xyz/sawyer_assembly_peg.xml | 2 +- .../sawyer_xyz/sawyer_basketball.xml | 8 +- metaworld/envs/mujoco/env_dict.py | 286 +--------------- metaworld/envs/mujoco/mujoco_env.py | 157 --------- .../envs/mujoco/sawyer_xyz/sawyer_xyz_env.py | 318 +++++++++--------- .../sawyer_xyz/v1/sawyer_assembly_peg.py | 2 +- .../mujoco/sawyer_xyz/v1/sawyer_basketball.py | 2 +- .../sawyer_xyz/v1/sawyer_bin_picking.py | 2 +- .../mujoco/sawyer_xyz/v1/sawyer_box_close.py | 2 +- .../sawyer_xyz/v1/sawyer_button_press.py | 2 +- .../v1/sawyer_button_press_topdown.py | 2 +- .../v1/sawyer_button_press_topdown_wall.py | 2 +- .../sawyer_xyz/v1/sawyer_button_press_wall.py | 2 +- .../sawyer_xyz/v1/sawyer_coffee_button.py | 2 +- .../sawyer_xyz/v1/sawyer_coffee_pull.py | 2 +- .../sawyer_xyz/v1/sawyer_coffee_push.py | 2 +- .../mujoco/sawyer_xyz/v1/sawyer_dial_turn.py | 2 +- .../sawyer_xyz/v1/sawyer_disassemble_peg.py | 2 +- .../envs/mujoco/sawyer_xyz/v1/sawyer_door.py | 6 +- .../mujoco/sawyer_xyz/v1/sawyer_door_lock.py | 2 +- .../sawyer_xyz/v1/sawyer_door_unlock.py | 2 +- .../sawyer_xyz/v1/sawyer_drawer_close.py | 2 +- .../sawyer_xyz/v1/sawyer_drawer_open.py | 2 +- .../sawyer_xyz/v1/sawyer_faucet_close.py | 2 +- .../sawyer_xyz/v1/sawyer_faucet_open.py | 2 +- .../mujoco/sawyer_xyz/v1/sawyer_hammer.py | 2 +- .../sawyer_xyz/v1/sawyer_hand_insert.py | 2 +- .../sawyer_xyz/v1/sawyer_handle_press.py | 2 +- .../sawyer_xyz/v1/sawyer_handle_press_side.py | 2 +- .../sawyer_xyz/v1/sawyer_handle_pull.py | 2 +- .../sawyer_xyz/v1/sawyer_handle_pull_side.py | 2 +- .../mujoco/sawyer_xyz/v1/sawyer_lever_pull.py | 2 +- .../v1/sawyer_peg_insertion_side.py | 2 +- .../sawyer_xyz/v1/sawyer_peg_unplug_side.py | 2 +- .../sawyer_xyz/v1/sawyer_pick_out_of_hole.py | 2 +- .../sawyer_xyz/v1/sawyer_plate_slide.py | 2 +- .../sawyer_xyz/v1/sawyer_plate_slide_back.py | 2 +- .../v1/sawyer_plate_slide_back_side.py | 2 +- .../sawyer_xyz/v1/sawyer_plate_slide_side.py | 2 +- .../mujoco/sawyer_xyz/v1/sawyer_push_back.py | 2 +- .../v1/sawyer_reach_push_pick_place.py | 2 +- .../v1/sawyer_reach_push_pick_place_wall.py | 2 +- .../sawyer_xyz/v1/sawyer_shelf_place.py | 2 +- .../mujoco/sawyer_xyz/v1/sawyer_soccer.py | 2 +- .../mujoco/sawyer_xyz/v1/sawyer_stick_pull.py | 2 +- .../mujoco/sawyer_xyz/v1/sawyer_stick_push.py | 2 +- .../envs/mujoco/sawyer_xyz/v1/sawyer_sweep.py | 2 +- .../sawyer_xyz/v1/sawyer_sweep_into_goal.py | 2 +- .../sawyer_xyz/v1/sawyer_window_close.py | 2 +- .../sawyer_xyz/v1/sawyer_window_open.py | 2 +- .../envs/mujoco/sawyer_xyz/v2/__init__.py | 144 +++++++- .../sawyer_xyz/v2/sawyer_assembly_peg_v2.py | 48 ++- .../sawyer_xyz/v2/sawyer_basketball_v2.py | 46 ++- .../sawyer_xyz/v2/sawyer_bin_picking_v2.py | 30 +- .../sawyer_xyz/v2/sawyer_box_close_v2.py | 43 ++- .../v2/sawyer_button_press_topdown_v2.py | 43 ++- .../v2/sawyer_button_press_topdown_wall_v2.py | 39 ++- .../sawyer_xyz/v2/sawyer_button_press_v2.py | 36 +- .../v2/sawyer_button_press_wall_v2.py | 37 +- .../sawyer_xyz/v2/sawyer_coffee_button_v2.py | 33 +- .../sawyer_xyz/v2/sawyer_coffee_pull_v2.py | 36 +- .../sawyer_xyz/v2/sawyer_coffee_push_v2.py | 36 +- .../sawyer_xyz/v2/sawyer_dial_turn_v2.py | 52 ++- .../v2/sawyer_disassemble_peg_v2.py | 41 ++- .../sawyer_xyz/v2/sawyer_door_close_v2.py | 83 ++++- .../sawyer_xyz/v2/sawyer_door_lock_v2.py | 41 ++- .../sawyer_xyz/v2/sawyer_door_unlock_v2.py | 36 +- .../mujoco/sawyer_xyz/v2/sawyer_door_v2.py | 59 +++- .../sawyer_xyz/v2/sawyer_drawer_close_v2.py | 34 +- .../sawyer_xyz/v2/sawyer_drawer_open_v2.py | 37 +- .../sawyer_xyz/v2/sawyer_faucet_close_v2.py | 37 +- .../sawyer_xyz/v2/sawyer_faucet_open_v2.py | 37 +- .../mujoco/sawyer_xyz/v2/sawyer_hammer_v2.py | 42 ++- .../sawyer_xyz/v2/sawyer_hand_insert_v2.py | 33 +- .../v2/sawyer_handle_press_side_v2.py | 40 ++- .../sawyer_xyz/v2/sawyer_handle_press_v2.py | 44 ++- .../v2/sawyer_handle_pull_side_v2.py | 41 ++- .../sawyer_xyz/v2/sawyer_handle_pull_v2.py | 41 ++- .../sawyer_xyz/v2/sawyer_lever_pull_v2.py | 40 ++- .../v2/sawyer_peg_insertion_side_v2.py | 42 ++- .../v2/sawyer_peg_unplug_side_v2.py | 36 +- .../v2/sawyer_pick_out_of_hole_v2.py | 30 +- .../sawyer_xyz/v2/sawyer_pick_place_v2.py | 35 +- .../v2/sawyer_pick_place_wall_v2.py | 39 ++- .../v2/sawyer_plate_slide_back_side_v2.py | 39 ++- .../v2/sawyer_plate_slide_back_v2.py | 38 ++- .../v2/sawyer_plate_slide_side_v2.py | 36 +- .../sawyer_xyz/v2/sawyer_plate_slide_v2.py | 35 +- .../sawyer_xyz/v2/sawyer_push_back_v2.py | 38 ++- .../mujoco/sawyer_xyz/v2/sawyer_push_v2.py | 33 +- .../sawyer_xyz/v2/sawyer_push_wall_v2.py | 38 ++- .../mujoco/sawyer_xyz/v2/sawyer_reach_v2.py | 38 ++- .../sawyer_xyz/v2/sawyer_reach_wall_v2.py | 32 +- .../sawyer_xyz/v2/sawyer_shelf_place_v2.py | 57 +++- .../mujoco/sawyer_xyz/v2/sawyer_soccer_v2.py | 37 +- .../sawyer_xyz/v2/sawyer_stick_pull_v2.py | 40 ++- .../sawyer_xyz/v2/sawyer_stick_push_v2.py | 41 ++- .../v2/sawyer_sweep_into_goal_v2.py | 31 +- .../mujoco/sawyer_xyz/v2/sawyer_sweep_v2.py | 34 +- .../sawyer_xyz/v2/sawyer_window_close_v2.py | 37 +- .../sawyer_xyz/v2/sawyer_window_open_v2.py | 37 +- .../policies/sawyer_assembly_v2_policy.py | 2 - .../policies/sawyer_basketball_v2_policy.py | 4 - .../policies/sawyer_box_close_v2_policy.py | 1 - .../policies/sawyer_door_close_v2_policy.py | 26 +- metaworld/policies/sawyer_hammer_v2_policy.py | 1 - .../policies/sawyer_hand_insert_v2_policy.py | 1 - .../sawyer_pick_place_wall_v2_policy.py | 4 +- .../policies/sawyer_push_back_v2_policy.py | 2 +- .../policies/sawyer_push_wall_v2_policy.py | 1 - pyproject.toml | 5 +- scripts/policy_testing.py | 49 +++ tests/helpers.py | 8 +- tests/integration/helpers.py | 33 ++ tests/integration/test_memory_usage.py | 12 +- tests/integration/test_new_api.py | 6 +- tests/integration/test_single_goal_envs.py | 16 +- .../envs/mujoco/sawyer_xyz/helpers.py | 33 ++ .../mujoco/sawyer_xyz/test_obs_space_hand.py | 4 +- .../mujoco/sawyer_xyz/test_sawyer_xyz_env.py | 10 +- .../sawyer_xyz/test_scripted_policies.py | 230 +++++-------- .../mujoco/sawyer_xyz/test_seeded_rand_vec.py | 6 +- .../metaworld/envs/mujoco/sawyer_xyz/utils.py | 28 +- 128 files changed, 2332 insertions(+), 1267 deletions(-) create mode 100644 scripts/policy_testing.py create mode 100644 tests/integration/helpers.py create mode 100644 tests/metaworld/envs/mujoco/sawyer_xyz/helpers.py diff --git a/.gitignore b/.gitignore index 03b6378d8..a164d9cc3 100644 --- a/.gitignore +++ b/.gitignore @@ -146,3 +146,5 @@ MUJOCO_LOG.TXT # tool Pipfile Pipfile.lock + +mujoco_migration.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 751148e75..62c870818 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,7 +10,7 @@ Ensure that your task and pull request: * [ ] Can be performed by a real robot arm * [ ] Is dissimilar from current tasks * [ ] Contains meaningful internal variation (e.g. different object positions, etc.) -* [ ] Conforms to the action space, observation space, and reward functions conventions used by metaworld environments +* [ ] Conforms to the action space, observation space, and reward functions conventions used by Meta-World environments * [ ] Uses existing assets if they exist, and that any new assets added are high-quality * [ ] Follows the code quality, style, testing, and documentation guidelines outlined below * [ ] Provides learning curves which show the task can by solved by PPO and SAC, using the implementations linked below @@ -153,12 +153,12 @@ These are Meta-World specific rules which are not part of the aforementioned sty ```python import collections - import gym.spaces + import gymnasium.spaces from garage.tf.models import MLPModel q = collections.deque(10) - d = gym.spaces.Discrete(5) + d = gymnasium.spaces.Discrete(5) m = MLPModel(output_dim=2) ``` @@ -166,7 +166,7 @@ These are Meta-World specific rules which are not part of the aforementioned sty ```python from collections import deque - from gym.spaces import Discrete + from gymnasium.spaces import Discrete import tensorflow as tf from garage.tf.models import MLPModel @@ -239,14 +239,14 @@ Do's and Don'ts for avoiding accidental merge commits and other headaches: * *Don't* use `git merge` * *Don't* use `git pull` (unless git tells you that your branch can be fast-forwarded) * *Don't* make commits in the `master` branch---always use a feature branch -* *Do* fetch upstream (`rlworkgroup/metaworld`) frequently and keep your `master` branch up-to-date with upstream +* *Do* fetch upstream (`Farama-Foundation/Metaworld`) frequently and keep your `master` branch up-to-date with upstream * *Do* rebase your feature branch on `master` frequently * *Do* keep only one or a few commits in your feature branch, and use `git commit --amend` to update your changes. This helps prevent long chains of identical merges during a rebase. Please see [this guide](https://gist.github.com/markreid/12e7c2203916b93d23c27a263f6091a0) for a tutorial on the workflow. Note: unlike the guide, we don't use separate `develop`/`master` branches, so all PRs should be based on `master` rather than `develop` ### Commit message format -metaworld follows the git commit message guidelines documented [here](https://gist.github.com/robertpainsi/b632364184e70900af4ab688decf6f53) and [here](https://chris.beams.io/posts/git-commit/). You can also find an in-depth guide to writing great commit messages [here](https://github.com/RomuloOliveira/commit-messages-guide/blob/master/README.md) +Meta-World follows the git commit message guidelines documented [here](https://gist.github.com/robertpainsi/b632364184e70900af4ab688decf6f53) and [here](https://chris.beams.io/posts/git-commit/). You can also find an in-depth guide to writing great commit messages [here](https://github.com/RomuloOliveira/commit-messages-guide/blob/master/README.md) In short: * All commit messages have an informative subject line of 50 characters @@ -257,20 +257,20 @@ In short: These recipes assume you are working out of a private GitHub fork. -If you are working directly as a contributor to `rlworkgroup`, you can replace references to `rlworkgroup` with `origin`. You also, of course, do not need to add `rlworkgroup` as a remote, since it will be `origin` in your repository. +If you are working directly as a contributor to `Farama-Foundation`, you can replace references to `Farama-Foundation` with `origin`. You also, of course, do not need to add `Farama-Foundation` as a remote, since it will be `origin` in your repository. -#### Clone your GitHub fork and setup the rlworkgroup remote +#### Clone your GitHub fork and setup the Farama-Foundation remote ```sh git clone git@github.com:/metaworld.git cd metaworld -git remote add rlworkgroup git@github.com:rlworkgroup/metaworld.git -git fetch rlworkgroup +git remote add Farama-Foundation git@github.com:Farama-Foundation/metaworld.git +git fetch Farama-Foundation ``` #### Update your GitHub fork with the latest from upstream ```sh -git fetch rlworkgroup -git reset --hard master rlworkgroup/master +git fetch Farama-Foundation +git reset --hard master Farama-Foundation/master git push -f origin master ``` @@ -287,8 +287,8 @@ git push origin myfeaturebranch #### Rebase a feature branch so it's up-to-date with upstream and push it to your fork ```sh git checkout master -git fetch rlworkgroup -git reset --hard rlworkgroup/master +git fetch Farama-Foundation +git reset --hard Farama-Foundation/master git checkout myfeaturebranch git rebase master # you may need to manually reconcile merge conflicts here. Follow git's instructions. @@ -298,4 +298,4 @@ git push -f origin myfeaturebranch # -f is frequently necessary because rebases ## Release ### Modify CHANGELOG.md -For each release in metaworld, modify [CHANGELOG.md](https://github.com/rlworkgroup/metaworld/blob/master/CHANGELOG.md) with the most relevant changes from the latest release. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), which adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +For each release in metaworld, modify [CHANGELOG.md](https://github.com/Farama-Foundation/Metaworld/blob/master/CHANGELOG.md) with the most relevant changes from the latest release. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), which adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). diff --git a/README.md b/README.md index def96912a..9bdd445a1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # Meta-World -[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/rlworkgroup/metaworld/blob/master/LICENSE) -![Build Status](https://github.com/rlworkgroup/metaworld/workflows/MetaWorld%20CI/badge.svg) +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/Farama-Foundation/metaworld/blob/master/LICENSE) +![Build Status](https://github.com/Farama-Foundation/Metaworld/workflows/MetaWorld%20CI/badge.svg) + +# The current version of Meta-World is a work in progress. If you find any bugs/errors please open an issue. __Meta-World is an open-source simulated benchmark for meta-reinforcement learning and multi-task learning consisting of 50 distinct robotic manipulation tasks.__ We aim to provide task distributions that are sufficiently broad to evaluate meta-RL algorithms' generalization ability to new behaviors. @@ -20,13 +22,15 @@ __Table of Contents__ - [Acknowledgements](#acknowledgements) ## Join the Community + Metaworld is now maintained by the Farama Foundation! You can interact with our community and the new developers in our [Discord server](https://discord.gg/PfR7a79FpQ) ## Maintenance Status The current roadmap for Meta-World can be found [here](https://github.com/Farama-Foundation/Metaworld/issues/409) ## Installation -Meta-World is based on MuJoCo, which has a proprietary dependency we can't set up for you. Please follow the [instructions](https://github.com/openai/mujoco-py#install-mujoco) in the mujoco-py package for help. Once you're ready to install everything, run: +To install everything, run: + ``` pip install git+https://github.com/Farama-Foundation/Metaworld.git@master#egg=metaworld @@ -34,9 +38,9 @@ pip install git+https://github.com/Farama-Foundation/Metaworld.git@master#egg=me Alternatively, you can clone the repository and install an editable version locally: -``` -git clone https://github.com/rlworkgroup/metaworld.git -cd metaworld +```sh +git clone https://github.com/Farama-Foundation/Metaworld.git +cd Metaworld pip install -e . ``` @@ -50,11 +54,11 @@ Here is a list of benchmark environments for meta-RL (ML*) and multi-task-RL (MT * [__ML1__](https://meta-world.github.io/figures/ml1.gif) is a meta-RL benchmark environment which tests few-shot adaptation to goal variation within single task. You can choose to test variation within any of [50 tasks](https://meta-world.github.io/figures/ml45-1080p.gif) for this benchmark. * [__ML10__](https://meta-world.github.io/figures/ml10.gif) is a meta-RL benchmark which tests few-shot adaptation to new tasks. It comprises 10 meta-train tasks, and 3 test tasks. * [__ML45__](https://meta-world.github.io/figures/ml45-1080p.gif) is a meta-RL benchmark which tests few-shot adaptation to new tasks. It comprises 45 meta-train tasks and 5 test tasks. -* [__MT10__](https://meta-world.github.io/figures/mt10.gif), __MT1__, and __MT50__ are multi-task-RL benchmark environments for learning a multi-task policy that perform 10, 1, and 50 training tasks respectively. __MT1__ is similar to __ML1__ becau you can choose to test variation within any of [50 tasks](https://meta-world.github.io/figures/ml45-1080p.gif) for this benchmark. In the original Metaworld experiments, we augment MT10 and MT50 environment observations with a one-hot vector which identifies the task. We don't enforce how users utilize task one-hot vectors, however one solution would be to use a Gym wrapper such as [this one](https://github.com/rlworkgroup/garage/blob/master/src/garage/envs/multi_env_wrapper.py) +* [__MT10__](https://meta-world.github.io/figures/mt10.gif), __MT1__, and __MT50__ are multi-task-RL benchmark environments for learning a multi-task policy that perform 10, 1, and 50 training tasks respectively. __MT1__ is similar to __ML1__ because you can choose to test variation within any of [50 tasks](https://meta-world.github.io/figures/ml45-1080p.gif) for this benchmark. In the original Meta-World experiments, we augment MT10 and MT50 environment observations with a one-hot vector which identifies the task. We don't enforce how users utilize task one-hot vectors, however one solution would be to use a Gym wrapper such as [this one](https://github.com/rlworkgroup/garage/blob/master/src/garage/envs/multi_env_wrapper.py) ### Basics -We provide a `Benchmark` API, that allows constructing environments following the [`gym.Env`](https://github.com/openai/gym/blob/c33cfd8b2cc8cac6c346bc2182cd568ef33b8821/gym/core.py#L8) interface. +We provide a `Benchmark` API, that allows constructing environments following the [`gymnasium.Env`](https://github.com/Farama-Foundation/Gymnasium/blob/main/gymnasium/core.py#L21) interface. To use a `Benchmark`, first construct it (this samples the tasks allowed for one run of an algorithm on the benchmark). Then, construct at least one instance of each environment listed in `benchmark.train_classes` and `benchmark.test_classes`. @@ -95,7 +99,7 @@ env.set_task(task) # Set task obs = env.reset() # Reset environment a = env.action_space.sample() # Sample an action -obs, reward, done, info = env.step(a) # Step the environoment with the sampled random action +obs, reward, done, info = env.step(a) # Step the environment with the sampled random action ``` __MT1__ can be run the same way except that it does not contain any `test_tasks` ### Running a benchmark @@ -117,7 +121,7 @@ for name, env_cls in ml10.train_classes.items(): for env in training_envs: obs = env.reset() # Reset environment a = env.action_space.sample() # Sample an action - obs, reward, done, info = env.step(a) # Step the environoment with the sampled random action + obs, reward, done, info = env.step(a) # Step the environment with the sampled random action ``` Create an environment with test tasks (this only works for ML10 and ML45, since MT10 and MT50 don't have a separate set of test tasks): ```python @@ -137,11 +141,11 @@ for name, env_cls in ml10.test_classes.items(): for env in testing_envs: obs = env.reset() # Reset environment a = env.action_space.sample() # Sample an action - obs, reward, done, info = env.step(a) # Step the environoment with the sampled random action + obs, reward, done, info = env.step(a) # Step the environment with the sampled random action ``` ## Accessing Single Goal Environments -You may wish to only access individual environments used in the Metaworld benchmark for your research. +You may wish to only access individual environments used in the Meta-World benchmark for your research. We provide constructors for creating environments where the goal has been hidden (by zeroing out the goal in the observation) and environments where the goal is observable. They are called GoalHidden and GoalObservable environments respectively. @@ -161,7 +165,7 @@ door_open_goal_hidden_cls = ALL_V2_ENVIRONMENTS_GOAL_HIDDEN["door-open-v2-goal-h env = door_open_goal_hidden_cls() env.reset() # Reset environment a = env.action_space.sample() # Sample an action -obs, reward, done, info = env.step(a) # Step the environoment with the sampled random action +obs, reward, done, info = env.step(a) # Step the environment with the sampled random action assert (obs[-3:] == np.zeros(3)).all() # goal will be zeroed out because env is HiddenGoal # You can choose to initialize the random seed of the environment. @@ -173,7 +177,8 @@ env1.reset() # Reset environment env2.reset() a1 = env1.action_space.sample() # Sample an action a2 = env2.action_space.sample() -next_obs1, _, _, _ = env1.step(a1) # Step the environoment with the sampled random action +next_obs1, _, _, _ = env1.step(a1) # Step the environment with the sampled random action + next_obs2, _, _, _ = env2.step(a2) assert (next_obs1[-3:] == next_obs2[-3:]).all() # 2 envs initialized with the same seed will have the same goal assert not (next_obs2[-3:] == np.zeros(3)).all() # The env's are goal observable, meaning the goal is not zero'd out @@ -183,7 +188,7 @@ env1.reset() # Reset environment env3.reset() a1 = env1.action_space.sample() # Sample an action a3 = env3.action_space.sample() -next_obs1, _, _, _ = env1.step(a1) # Step the environoment with the sampled random action +next_obs1, _, _, _ = env1.step(a1) # Step the environment with the sampled random action next_obs3, _, _, _ = env3.step(a3) assert not (next_obs1[-3:] == next_obs3[-3:]).all() # 2 envs initialized with different seeds will have different goals @@ -208,11 +213,12 @@ If you use Meta-World for academic research, please kindly cite our CoRL 2019 pa ``` ## Accompanying Baselines -If you're looking for implementations of the baselines algorithms used in the Metaworld conference publication, please look at our sister directory, [Garage](https://github.com/rlworkgroup/garage). +If you're looking for implementations of the baselines algorithms used in the Meta-World conference publication, please look at our sister directory, [Garage](https://github.com/rlworkgroup/garage). + Note that these aren't the exact same baselines that were used in the original conference publication, however they are true to the original baselines. ## Become a Contributor -We welcome all contributions to Meta-World. Please refer to the [contributor's guide](https://github.com/rlworkgroup/metaworld/blob/master/CONTRIBUTING.md) for how to prepare your contributions. +We welcome all contributions to Meta-World. Please refer to the [contributor's guide](https://github.com/Farama-Foundation/Metaworld/blob/master/CONTRIBUTING.md) for how to prepare your contributions. ## Acknowledgements Meta-World is a work by [Tianhe Yu (Stanford University)](https://cs.stanford.edu/~tianheyu/), [Deirdre Quillen (UC Berkeley)](https://scholar.google.com/citations?user=eDQsOFMAAAAJ&hl=en), [Zhanpeng He (Columbia University)](https://zhanpenghe.github.io), [Ryan Julian (University of Southern California)](https://ryanjulian.me), [Karol Hausman (Google AI)](https://karolhausman.github.io), [Chelsea Finn (Stanford University)](https://ai.stanford.edu/~cbfinn/) and [Sergey Levine (UC Berkeley)](https://people.eecs.berkeley.edu/~svlevine/). diff --git a/docker/Dockerfile b/docker/Dockerfile index ab22e9185..66ceb22e9 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -7,26 +7,17 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] RUN apt-get -y update \ && apt-get install --no-install-recommends -y \ libglu1-mesa-dev libgl1-mesa-dev libosmesa6-dev \ - xvfb unzip patchelf ffmpeg cmake swig \ + xvfb unzip patchelf ffmpeg cmake swig git\ && apt-get autoremove -y \ && apt-get clean \ - && rm -rf /var/lib/apt/lists/* \ - # Download mujoco - && mkdir /root/.mujoco \ - && cd /root/.mujoco \ - && wget -qO- 'https://github.com/deepmind/mujoco/releases/download/2.1.0/mujoco210-linux-x86_64.tar.gz' | tar -xzvf - - -ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/root/.mujoco/mujoco210/bin" - -# Build mujoco-py from source. Pypi installs wheel packages and Cython won't recompile old file versions in the Github Actions CI. -# Thus generating the following error https://github.com/cython/cython/pull/4428 -RUN git clone https://github.com/openai/mujoco-py.git\ - && cd mujoco-py \ - && pip install -e . + && rm -rf /var/lib/apt/lists/* COPY . /usr/local/metaworld/ WORKDIR /usr/local/metaworld/ - +RUN free -g RUN pip install .[testing] +RUN git clone https://github.com/reginald-mclean/Gymnasium.git +RUN pip install -e Gymnasium + ENTRYPOINT ["/usr/local/metaworld/docker/entrypoint"] diff --git a/metaworld/__init__.py b/metaworld/__init__.py index 6d4120ca9..951a96148 100644 --- a/metaworld/__init__.py +++ b/metaworld/__init__.py @@ -5,6 +5,7 @@ from typing import List, NamedTuple, Type import numpy as np +from memory_profiler import profile import metaworld.envs.mujoco.env_dict as _env_dict @@ -85,8 +86,7 @@ def _make_tasks(classes, args_kwargs, kwargs_override, seed=None): tasks = [] for env_name, args in args_kwargs.items(): assert len(args["args"]) == 0 - env_cls = classes[env_name] - env = env_cls() + env = classes[env_name]() env._freeze_rand_vec = False env._set_task_called = True rand_vecs = [] @@ -97,15 +97,17 @@ def _make_tasks(classes, args_kwargs, kwargs_override, seed=None): env.reset() rand_vecs.append(env._last_rand_vec) unique_task_rand_vecs = np.unique(np.array(rand_vecs), axis=0) - assert unique_task_rand_vecs.shape[0] == _N_GOALS - + assert unique_task_rand_vecs.shape[0] == _N_GOALS, unique_task_rand_vecs.shape[ + 0 + ] env.close() for rand_vec in rand_vecs: kwargs = args["kwargs"].copy() del kwargs["task_id"] - kwargs.update(dict(rand_vec=rand_vec, env_cls=env_cls)) + kwargs.update(dict(rand_vec=rand_vec, env_cls=classes[env_name])) kwargs.update(kwargs_override) tasks.append(_encode_task(env_name, kwargs)) + del env if seed is not None: np.random.set_state(st0) return tasks @@ -150,13 +152,13 @@ def __init__(self, env_name, seed=None): raise ValueError(f"{env_name} is not a V2 environment") cls = _env_dict.ALL_V2_ENVIRONMENTS[env_name] self._train_classes = OrderedDict([(env_name, cls)]) - self._test_classes = self._train_classes - self._train_ = OrderedDict([(env_name, cls)]) + self._test_classes = OrderedDict([(env_name, cls)]) args_kwargs = _env_dict.ML1_args_kwargs[env_name] self._train_tasks = _make_tasks( self._train_classes, {env_name: args_kwargs}, _MT_OVERRIDE, seed=seed ) + self._test_tasks = [] @@ -166,10 +168,12 @@ def __init__(self, seed=None): self._train_classes = _env_dict.ML10_V2["train"] self._test_classes = _env_dict.ML10_V2["test"] train_kwargs = _env_dict.ml10_train_args_kwargs + + test_kwargs = _env_dict.ml10_test_args_kwargs self._train_tasks = _make_tasks( self._train_classes, train_kwargs, _ML_OVERRIDE, seed=seed ) - test_kwargs = _env_dict.ml10_test_args_kwargs + self._test_tasks = _make_tasks( self._test_classes, test_kwargs, _ML_OVERRIDE, seed=seed ) @@ -181,10 +185,11 @@ def __init__(self, seed=None): self._train_classes = _env_dict.ML45_V2["train"] self._test_classes = _env_dict.ML45_V2["test"] train_kwargs = _env_dict.ml45_train_args_kwargs + test_kwargs = _env_dict.ml45_test_args_kwargs + self._train_tasks = _make_tasks( self._train_classes, train_kwargs, _ML_OVERRIDE, seed=seed ) - test_kwargs = _env_dict.ml45_test_args_kwargs self._test_tasks = _make_tasks( self._test_classes, test_kwargs, _ML_OVERRIDE, seed=seed ) @@ -199,7 +204,9 @@ def __init__(self, seed=None): self._train_tasks = _make_tasks( self._train_classes, train_kwargs, _MT_OVERRIDE, seed=seed ) + self._test_tasks = [] + self._test_classes = [] class MT50(Benchmark): @@ -208,9 +215,11 @@ def __init__(self, seed=None): self._train_classes = _env_dict.MT50_V2 self._test_classes = OrderedDict() train_kwargs = _env_dict.MT50_V2_ARGS_KWARGS + self._train_tasks = _make_tasks( self._train_classes, train_kwargs, _MT_OVERRIDE, seed=seed ) + self._test_tasks = [] diff --git a/metaworld/envs/assets_v2/sawyer_xyz/sawyer_assembly_peg.xml b/metaworld/envs/assets_v2/sawyer_xyz/sawyer_assembly_peg.xml index 7167601b2..d8d9bba31 100644 --- a/metaworld/envs/assets_v2/sawyer_xyz/sawyer_assembly_peg.xml +++ b/metaworld/envs/assets_v2/sawyer_xyz/sawyer_assembly_peg.xml @@ -24,6 +24,6 @@ - + diff --git a/metaworld/envs/assets_v2/sawyer_xyz/sawyer_basketball.xml b/metaworld/envs/assets_v2/sawyer_xyz/sawyer_basketball.xml index 351bb5d76..3997fad41 100644 --- a/metaworld/envs/assets_v2/sawyer_xyz/sawyer_basketball.xml +++ b/metaworld/envs/assets_v2/sawyer_xyz/sawyer_basketball.xml @@ -17,10 +17,10 @@ - - - - + + + + diff --git a/metaworld/envs/mujoco/env_dict.py b/metaworld/envs/mujoco/env_dict.py index 385741f3b..d11358451 100644 --- a/metaworld/envs/mujoco/env_dict.py +++ b/metaworld/envs/mujoco/env_dict.py @@ -3,54 +3,6 @@ import numpy as np -from metaworld.envs.mujoco.sawyer_xyz.v1 import ( - SawyerBasketballEnv, - SawyerBinPickingEnv, - SawyerBoxCloseEnv, - SawyerButtonPressEnv, - SawyerButtonPressTopdownEnv, - SawyerButtonPressTopdownWallEnv, - SawyerButtonPressWallEnv, - SawyerCoffeeButtonEnv, - SawyerCoffeePullEnv, - SawyerCoffeePushEnv, - SawyerDialTurnEnv, - SawyerDoorCloseEnv, - SawyerDoorEnv, - SawyerDoorLockEnv, - SawyerDoorUnlockEnv, - SawyerDrawerCloseEnv, - SawyerDrawerOpenEnv, - SawyerFaucetCloseEnv, - SawyerFaucetOpenEnv, - SawyerHammerEnv, - SawyerHandInsertEnv, - SawyerHandlePressEnv, - SawyerHandlePressSideEnv, - SawyerHandlePullEnv, - SawyerHandlePullSideEnv, - SawyerLeverPullEnv, - SawyerNutAssemblyEnv, - SawyerNutDisassembleEnv, - SawyerPegInsertionSideEnv, - SawyerPegUnplugSideEnv, - SawyerPickOutOfHoleEnv, - SawyerPlateSlideBackEnv, - SawyerPlateSlideBackSideEnv, - SawyerPlateSlideEnv, - SawyerPlateSlideSideEnv, - SawyerPushBackEnv, - SawyerReachPushPickPlaceEnv, - SawyerReachPushPickPlaceWallEnv, - SawyerShelfPlaceEnv, - SawyerSoccerEnv, - SawyerStickPullEnv, - SawyerStickPushEnv, - SawyerSweepEnv, - SawyerSweepIntoGoalEnv, - SawyerWindowCloseEnv, - SawyerWindowOpenEnv, -) from metaworld.envs.mujoco.sawyer_xyz.v2 import ( SawyerBasketballEnvV2, SawyerBinPickingEnvV2, @@ -104,61 +56,6 @@ SawyerWindowOpenEnvV2, ) -ALL_V1_ENVIRONMENTS = OrderedDict( - ( - ("reach-v1", SawyerReachPushPickPlaceEnv), - ("push-v1", SawyerReachPushPickPlaceEnv), - ("pick-place-v1", SawyerReachPushPickPlaceEnv), - ("door-open-v1", SawyerDoorEnv), - ("drawer-open-v1", SawyerDrawerOpenEnv), - ("drawer-close-v1", SawyerDrawerCloseEnv), - ("button-press-topdown-v1", SawyerButtonPressTopdownEnv), - ("peg-insert-side-v1", SawyerPegInsertionSideEnv), - ("window-open-v1", SawyerWindowOpenEnv), - ("window-close-v1", SawyerWindowCloseEnv), - ("door-close-v1", SawyerDoorCloseEnv), - ("reach-wall-v1", SawyerReachPushPickPlaceWallEnv), - ("pick-place-wall-v1", SawyerReachPushPickPlaceWallEnv), - ("push-wall-v1", SawyerReachPushPickPlaceWallEnv), - ("button-press-v1", SawyerButtonPressEnv), - ("button-press-topdown-wall-v1", SawyerButtonPressTopdownWallEnv), - ("button-press-wall-v1", SawyerButtonPressWallEnv), - ("peg-unplug-side-v1", SawyerPegUnplugSideEnv), - ("disassemble-v1", SawyerNutDisassembleEnv), - ("hammer-v1", SawyerHammerEnv), - ("plate-slide-v1", SawyerPlateSlideEnv), - ("plate-slide-side-v1", SawyerPlateSlideSideEnv), - ("plate-slide-back-v1", SawyerPlateSlideBackEnv), - ("plate-slide-back-side-v1", SawyerPlateSlideBackSideEnv), - ("handle-press-v1", SawyerHandlePressEnv), - ("handle-pull-v1", SawyerHandlePullEnv), - ("handle-press-side-v1", SawyerHandlePressSideEnv), - ("handle-pull-side-v1", SawyerHandlePullSideEnv), - ("stick-push-v1", SawyerStickPushEnv), - ("stick-pull-v1", SawyerStickPullEnv), - ("basketball-v1", SawyerBasketballEnv), - ("soccer-v1", SawyerSoccerEnv), - ("faucet-open-v1", SawyerFaucetOpenEnv), - ("faucet-close-v1", SawyerFaucetCloseEnv), - ("coffee-push-v1", SawyerCoffeePushEnv), - ("coffee-pull-v1", SawyerCoffeePullEnv), - ("coffee-button-v1", SawyerCoffeeButtonEnv), - ("sweep-v1", SawyerSweepEnv), - ("sweep-into-v1", SawyerSweepIntoGoalEnv), - ("pick-out-of-hole-v1", SawyerPickOutOfHoleEnv), - ("assembly-v1", SawyerNutAssemblyEnv), - ("shelf-place-v1", SawyerShelfPlaceEnv), - ("push-back-v1", SawyerPushBackEnv), - ("lever-pull-v1", SawyerLeverPullEnv), - ("dial-turn-v1", SawyerDialTurnEnv), - ("bin-picking-v1", SawyerBinPickingEnv), - ("box-close-v1", SawyerBoxCloseEnv), - ("hand-insert-v1", SawyerHandInsertEnv), - ("door-lock-v1", SawyerDoorLockEnv), - ("door-unlock-v1", SawyerDoorUnlockEnv), - ) -) - ALL_V2_ENVIRONMENTS = OrderedDict( ( ("assembly-v2", SawyerNutAssemblyEnvV2), @@ -217,183 +114,8 @@ ) ) -_NUM_METAWORLD_ENVS = len(ALL_V1_ENVIRONMENTS) - -EASY_MODE_CLS_DICT = OrderedDict( - ( - ("reach-v1", SawyerReachPushPickPlaceEnv), - ("push-v1", SawyerReachPushPickPlaceEnv), - ("pick-place-v1", SawyerReachPushPickPlaceEnv), - ("door-open-v1", SawyerDoorEnv), - ("drawer-open-v1", SawyerDrawerOpenEnv), - ("drawer-close-v1", SawyerDrawerCloseEnv), - ("button-press-topdown-v1", SawyerButtonPressTopdownEnv), - ("peg-insert-side-v1", SawyerPegInsertionSideEnv), - ("window-open-v1", SawyerWindowOpenEnv), - ("window-close-v1", SawyerWindowCloseEnv), - ), -) - -EASY_MODE_ARGS_KWARGS = { - key: dict(args=[], kwargs={"task_id": list(ALL_V1_ENVIRONMENTS.keys()).index(key)}) - for key, _ in EASY_MODE_CLS_DICT.items() -} - -EASY_MODE_ARGS_KWARGS["reach-v1"]["kwargs"]["task_type"] = "reach" -EASY_MODE_ARGS_KWARGS["push-v1"]["kwargs"]["task_type"] = "push" -EASY_MODE_ARGS_KWARGS["pick-place-v1"]["kwargs"]["task_type"] = "pick_place" - -MEDIUM_MODE_CLS_DICT = OrderedDict( - ( - ( - "train", - OrderedDict( - ( - ("reach-v1", SawyerReachPushPickPlaceEnv), - ("push-v1", SawyerReachPushPickPlaceEnv), - ("pick-place-v1", SawyerReachPushPickPlaceEnv), - ("door-open-v1", SawyerDoorEnv), - ("drawer-close-v1", SawyerDrawerCloseEnv), - ("button-press-topdown-v1", SawyerButtonPressTopdownEnv), - ("peg-insert-side-v1", SawyerPegInsertionSideEnv), - ("window-open-v1", SawyerWindowOpenEnv), - ("sweep-v1", SawyerSweepEnv), - ("basketball-v1", SawyerBasketballEnv), - ) - ), - ), - ( - "test", - OrderedDict( - ( - ("drawer-open-v1", SawyerDrawerOpenEnv), - ("door-close-v1", SawyerDoorCloseEnv), - ("shelf-place-v1", SawyerShelfPlaceEnv), - ("sweep-into-v1", SawyerSweepIntoGoalEnv), - ( - "lever-pull-v1", - SawyerLeverPullEnv, - ), - ) - ), - ), - ) -) -medium_mode_train_args_kwargs = { - key: dict( - args=[], - kwargs={ - "task_id": list(ALL_V1_ENVIRONMENTS.keys()).index(key), - }, - ) - for key, _ in MEDIUM_MODE_CLS_DICT["train"].items() -} - -medium_mode_test_args_kwargs = { - key: dict(args=[], kwargs={"task_id": list(ALL_V1_ENVIRONMENTS.keys()).index(key)}) - for key, _ in MEDIUM_MODE_CLS_DICT["test"].items() -} - -medium_mode_train_args_kwargs["reach-v1"]["kwargs"]["task_type"] = "reach" -medium_mode_train_args_kwargs["push-v1"]["kwargs"]["task_type"] = "push" -medium_mode_train_args_kwargs["pick-place-v1"]["kwargs"]["task_type"] = "pick_place" - -MEDIUM_MODE_ARGS_KWARGS = dict( - train=medium_mode_train_args_kwargs, - test=medium_mode_test_args_kwargs, -) -""" - ML45 environments and arguments -""" -HARD_MODE_CLS_DICT = OrderedDict( - ( - ( - "train", - OrderedDict( - ( - ("reach-v1", SawyerReachPushPickPlaceEnv), - ("push-v1", SawyerReachPushPickPlaceEnv), - ("pick-place-v1", SawyerReachPushPickPlaceEnv), - ("door-open-v1", SawyerDoorEnv), - ("drawer-open-v1", SawyerDrawerOpenEnv), - ("drawer-close-v1", SawyerDrawerCloseEnv), - ("button-press-topdown-v1", SawyerButtonPressTopdownEnv), - ("peg-insert-side-v1", SawyerPegInsertionSideEnv), - ("window-open-v1", SawyerWindowOpenEnv), - ("window-close-v1", SawyerWindowCloseEnv), - ("door-close-v1", SawyerDoorCloseEnv), - ("reach-wall-v1", SawyerReachPushPickPlaceWallEnv), - ("pick-place-wall-v1", SawyerReachPushPickPlaceWallEnv), - ("push-wall-v1", SawyerReachPushPickPlaceWallEnv), - ("button-press-v1", SawyerButtonPressEnv), - ("button-press-topdown-wall-v1", SawyerButtonPressTopdownWallEnv), - ("button-press-wall-v1", SawyerButtonPressWallEnv), - ("peg-unplug-side-v1", SawyerPegUnplugSideEnv), - ("disassemble-v1", SawyerNutDisassembleEnv), - ("hammer-v1", SawyerHammerEnv), - ("plate-slide-v1", SawyerPlateSlideEnv), - ("plate-slide-side-v1", SawyerPlateSlideSideEnv), - ("plate-slide-back-v1", SawyerPlateSlideBackEnv), - ("plate-slide-back-side-v1", SawyerPlateSlideBackSideEnv), - ("handle-press-v1", SawyerHandlePressEnv), - ("handle-pull-v1", SawyerHandlePullEnv), - ("handle-press-side-v1", SawyerHandlePressSideEnv), - ("handle-pull-side-v1", SawyerHandlePullSideEnv), - ("stick-push-v1", SawyerStickPushEnv), - ("stick-pull-v1", SawyerStickPullEnv), - ("basketball-v1", SawyerBasketballEnv), - ("soccer-v1", SawyerSoccerEnv), - ("faucet-open-v1", SawyerFaucetOpenEnv), - ("faucet-close-v1", SawyerFaucetCloseEnv), - ("coffee-push-v1", SawyerCoffeePushEnv), - ("coffee-pull-v1", SawyerCoffeePullEnv), - ("coffee-button-v1", SawyerCoffeeButtonEnv), - ("sweep-v1", SawyerSweepEnv), - ("sweep-into-v1", SawyerSweepIntoGoalEnv), - ("pick-out-of-hole-v1", SawyerPickOutOfHoleEnv), - ("assembly-v1", SawyerNutAssemblyEnv), - ("shelf-place-v1", SawyerShelfPlaceEnv), - ("push-back-v1", SawyerPushBackEnv), - ("lever-pull-v1", SawyerLeverPullEnv), - ("dial-turn-v1", SawyerDialTurnEnv), - ) - ), - ), - ( - "test", - OrderedDict( - ( - ("bin-picking-v1", SawyerBinPickingEnv), - ("box-close-v1", SawyerBoxCloseEnv), - ("hand-insert-v1", SawyerHandInsertEnv), - ("door-lock-v1", SawyerDoorLockEnv), - ("door-unlock-v1", SawyerDoorUnlockEnv), - ) - ), - ), - ) -) - - -def _hard_mode_args_kwargs(env_cls_, key_): - del env_cls_ - - kwargs = dict(task_id=list(ALL_V1_ENVIRONMENTS.keys()).index(key_)) - if key_ == "reach-v1" or key_ == "reach-wall-v1": - kwargs["task_type"] = "reach" - elif key_ == "push-v1" or key_ == "push-wall-v1": - kwargs["task_type"] = "push" - elif key_ == "pick-place-v1" or key_ == "pick-place-wall-v1": - kwargs["task_type"] = "pick_place" - return dict(args=[], kwargs=kwargs) - - -HARD_MODE_ARGS_KWARGS = dict(train={}, test={}) -for key, env_cls in HARD_MODE_CLS_DICT["train"].items(): - HARD_MODE_ARGS_KWARGS["train"][key] = _hard_mode_args_kwargs(env_cls, key) -for key, env_cls in HARD_MODE_CLS_DICT["test"].items(): - HARD_MODE_ARGS_KWARGS["test"][key] = _hard_mode_args_kwargs(env_cls, key) +_NUM_METAWORLD_ENVS = len(ALL_V2_ENVIRONMENTS) # V2 DICTS MT10_V2 = OrderedDict( @@ -411,6 +133,7 @@ def _hard_mode_args_kwargs(env_cls_, key_): ), ) + MT10_V2_ARGS_KWARGS = { key: dict(args=[], kwargs={"task_id": list(ALL_V2_ENVIRONMENTS.keys()).index(key)}) for key, _ in MT10_V2.items() @@ -453,6 +176,7 @@ def _hard_mode_args_kwargs(env_cls_, key_): ) ) + ml10_train_args_kwargs = { key: dict( args=[], @@ -484,7 +208,6 @@ def _hard_mode_args_kwargs(env_cls_, key_): ) for key, _ in ML1_V2["train"].items() } - MT50_V2 = OrderedDict( ( ("assembly-v2", SawyerNutAssemblyEnvV2), @@ -657,7 +380,7 @@ def initialize(env, seed=None): env.reset() env._freeze_rand_vec = True if seed is not None: - env.seed(seed) + env.seed(seed=seed) np.random.set_state(st0) d["__init__"] = initialize @@ -683,6 +406,7 @@ def initialize(env, seed=None): st0 = np.random.get_state() np.random.seed(seed) super(type(env), env).__init__() + env._partially_observable = False env._freeze_rand_vec = False env._set_task_called = True diff --git a/metaworld/envs/mujoco/mujoco_env.py b/metaworld/envs/mujoco/mujoco_env.py index 26b756b0c..60725666f 100644 --- a/metaworld/envs/mujoco/mujoco_env.py +++ b/metaworld/envs/mujoco/mujoco_env.py @@ -1,23 +1,3 @@ -import abc -import warnings -from os import path - -import glfw -import gym -import numpy as np -from gym import error -from gym.utils import seeding - -try: - import mujoco_py -except ImportError as e: - raise error.DependencyNotInstalled( - "{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format( - e - ) - ) - - def _assert_task_is_set(func): def inner(*args, **kwargs): env = args[0] @@ -28,140 +8,3 @@ def inner(*args, **kwargs): return func(*args, **kwargs) return inner - - -DEFAULT_SIZE = 500 - - -class MujocoEnv(gym.Env, abc.ABC): - """This is a simplified version of the gym MujocoEnv class. - - Some differences are: - - Do not automatically set the observation/action space. - """ - - max_path_length = 500 - - def __init__(self, model_path, frame_skip): - if not path.exists(model_path): - raise OSError("File %s does not exist" % model_path) - - self.frame_skip = frame_skip - self.model = mujoco_py.load_model_from_path(model_path) - self.sim = mujoco_py.MjSim(self.model) - self.data = self.sim.data - self.viewer = None - self._viewers = {} - - self.metadata = { - "render.modes": ["human"], - "video.frames_per_second": int(np.round(1.0 / self.dt)), - } - self.init_qpos = self.sim.data.qpos.ravel().copy() - self.init_qvel = self.sim.data.qvel.ravel().copy() - - self._did_see_sim_exception = False - - self.np_random, _ = seeding.np_random(None) - - def seed(self, seed): - assert seed is not None - self.np_random, seed = seeding.np_random(seed) - self.action_space.seed(seed) - self.observation_space.seed(seed) - self.goal_space.seed(seed) - return [seed] - - @abc.abstractmethod - def reset_model(self): - """Reset the robot degrees of freedom (qpos and qvel). - - Implement this in each subclass. - """ - pass - - def viewer_setup(self): - """This method is called when the viewer is initialized and after every reset. - - Optionally implement this method, if you need to tinker with camera position and so forth. - """ - pass - - @_assert_task_is_set - def reset(self): - self._did_see_sim_exception = False - self.sim.reset() - ob = self.reset_model() - if self.viewer is not None: - self.viewer_setup() - return ob - - def set_state(self, qpos, qvel): - assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,) - old_state = self.sim.get_state() - new_state = mujoco_py.MjSimState( - old_state.time, qpos, qvel, old_state.act, old_state.udd_state - ) - self.sim.set_state(new_state) - self.sim.forward() - - @property - def dt(self): - return self.model.opt.timestep * self.frame_skip - - def do_simulation(self, ctrl, n_frames=None): - if getattr(self, "curr_path_length", 0) > self.max_path_length: - raise ValueError( - "Maximum path length allowed by the benchmark has been exceeded" - ) - if self._did_see_sim_exception: - return - - if n_frames is None: - n_frames = self.frame_skip - self.sim.data.ctrl[:] = ctrl - - for _ in range(n_frames): - try: - self.sim.step() - except mujoco_py.MujocoException as err: - warnings.warn(str(err), category=RuntimeWarning) - self._did_see_sim_exception = True - - def render(self, offscreen=False, camera_name="corner2", resolution=(640, 480)): - assert_string = ( - "camera_name should be one of ", - "corner3, corner, corner2, topview, gripperPOV, behindGripper", - ) - assert camera_name in { - "corner3", - "corner", - "corner2", - "topview", - "gripperPOV", - "behindGripper", - }, assert_string - if not offscreen: - self._get_viewer("human").render() - else: - return self.sim.render( - *resolution, mode="offscreen", camera_name=camera_name - ) - - def close(self): - if self.viewer is not None: - glfw.destroy_window(self.viewer.window) - self.viewer = None - - def _get_viewer(self, mode): - self.viewer = self._viewers.get(mode) - if self.viewer is None: - if mode == "human": - self.viewer = mujoco_py.MjViewer(self.sim) - self.viewer_setup() - self._viewers[mode] = self.viewer - self.viewer_setup() - return self.viewer - - def get_body_com(self, body_name): - return self.data.get_body_xpos(body_name) diff --git a/metaworld/envs/mujoco/sawyer_xyz/sawyer_xyz_env.py b/metaworld/envs/mujoco/sawyer_xyz/sawyer_xyz_env.py index caa876caa..91242671f 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/sawyer_xyz_env.py +++ b/metaworld/envs/mujoco/sawyer_xyz/sawyer_xyz_env.py @@ -1,27 +1,44 @@ -import abc import copy import pickle -import mujoco_py +import mujoco import numpy as np -from gym.spaces import Box, Discrete +from gymnasium.envs.mujoco import MujocoEnv as mjenv_gym +from gymnasium.spaces import Box, Discrete +from gymnasium.utils import seeding +from gymnasium.utils.ezpickle import EzPickle from metaworld.envs import reward_utils -from metaworld.envs.mujoco.mujoco_env import MujocoEnv, _assert_task_is_set +from metaworld.envs.mujoco.mujoco_env import _assert_task_is_set -class SawyerMocapBase(MujocoEnv, metaclass=abc.ABCMeta): +class SawyerMocapBase(mjenv_gym): """Provides some commonly-shared functions for Sawyer Mujoco envs that use mocap for XYZ control.""" mocap_low = np.array([-0.2, 0.5, 0.06]) mocap_high = np.array([0.2, 0.7, 0.6]) - - def __init__(self, model_name, frame_skip=5): - MujocoEnv.__init__(self, model_name, frame_skip=frame_skip) + metadata = { + "render_modes": [ + "human", + "rgb_array", + "depth_array", + ], + "render_fps": 80, + } + + def __init__(self, model_name, frame_skip=5, render_mode=None): + mjenv_gym.__init__( + self, + model_name, + frame_skip=frame_skip, + observation_space=self.sawyer_observation_space, + render_mode=render_mode, + ) self.reset_mocap_welds() + self.frame_skip = frame_skip def get_endeff_pos(self): - return self.data.get_body_xpos("hand").copy() + return self.data.body("hand").xpos @property def tcp_center(self): @@ -30,53 +47,47 @@ def tcp_center(self): Returns: (np.ndarray): 3-element position """ - right_finger_pos = self._get_site_pos("rightEndEffector") - left_finger_pos = self._get_site_pos("leftEndEffector") - tcp_center = (right_finger_pos + left_finger_pos) / 2.0 + right_finger_pos = self.data.site("rightEndEffector") + left_finger_pos = self.data.site("leftEndEffector") + tcp_center = (right_finger_pos.xpos + left_finger_pos.xpos) / 2.0 return tcp_center def get_env_state(self): - joint_state = self.sim.get_state() - mocap_state = self.data.mocap_pos, self.data.mocap_quat - state = joint_state, mocap_state - return copy.deepcopy(state) + qpos = np.copy(self.data.qpos) + qvel = np.copy(self.data.qvel) + return copy.deepcopy((qpos, qvel)) def set_env_state(self, state): - joint_state, mocap_state = state - self.sim.set_state(joint_state) - mocap_pos, mocap_quat = mocap_state - self.data.set_mocap_pos("mocap", mocap_pos) - self.data.set_mocap_quat("mocap", mocap_quat) - self.sim.forward() + mocap_pos, mocap_quat = state + self.set_state(mocap_pos, mocap_quat) def __getstate__(self): state = self.__dict__.copy() - del state["model"] - del state["sim"] - del state["data"] - mjb = self.model.get_mjb() - return {"state": state, "mjb": mjb, "env_state": self.get_env_state()} + # del state['model'] + # del state['data'] + return {"state": state, "mjb": self.model_name, "mocap": self.get_env_state()} def __setstate__(self, state): self.__dict__ = state["state"] - self.model = mujoco_py.load_model_from_mjb(state["mjb"]) - self.sim = mujoco_py.MjSim(self.model) - self.data = self.sim.data - self.set_env_state(state["env_state"]) + mjenv_gym.__init__( + self, + state["mjb"], + frame_skip=self.frame_skip, + observation_space=self.sawyer_observation_space, + ) + self.set_env_state(state["mocap"]) def reset_mocap_welds(self): """Resets the mocap welds that we use for actuation.""" - sim = self.sim - if sim.model.nmocap > 0 and sim.model.eq_data is not None: - for i in range(sim.model.eq_data.shape[0]): - if sim.model.eq_type[i] == mujoco_py.const.EQ_WELD: - sim.model.eq_data[i, :] = np.array( - [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0] + if self.model.nmocap > 0 and self.model.eq_data is not None: + for i in range(self.model.eq_data.shape[0]): + if self.model.eq_type[i] == mujoco.mjtEq.mjEQ_WELD: + self.model.eq_data[i] = np.array( + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0] ) - sim.forward() -class SawyerXYZEnv(SawyerMocapBase, metaclass=abc.ABCMeta): +class SawyerXYZEnv(SawyerMocapBase, EzPickle): _HAND_SPACE = Box( np.array([-0.525, 0.348, -0.0525]), np.array([+0.525, 1.025, 0.7]), @@ -86,6 +97,11 @@ class SawyerXYZEnv(SawyerMocapBase, metaclass=abc.ABCMeta): TARGET_RADIUS = 0.05 + current_task = 0 + classes = None + classes_kwargs = None + tasks = None + def __init__( self, model_name, @@ -96,8 +112,8 @@ def __init__( mocap_high=None, action_scale=1.0 / 100, action_rot_scale=1.0, + render_mode=None, ): - super().__init__(model_name, frame_skip=frame_skip) self.action_scale = action_scale self.action_rot_scale = action_rot_scale self.hand_low = np.array(hand_low) @@ -112,6 +128,8 @@ def __init__( self.seeded_rand_vec = False self._freeze_rand_vec = True self._last_rand_vec = None + self.num_resets = 0 + self.current_seed = None # We use continuous goal space by default and # can discretize the goal space by calling @@ -120,6 +138,15 @@ def __init__( self.discrete_goals = [] self.active_discrete_goal = None + self._partially_observable = True + + super().__init__(model_name, frame_skip=frame_skip, render_mode=render_mode) + + mujoco.mj_forward( + self.model, self.data + ) # *** DO NOT REMOVE: EZPICKLE WON'T WORK *** # + + self._did_see_sim_exception = False self.init_left_pad = self.get_body_com("leftpad") self.init_right_pad = self.get_body_com("rightpad") @@ -129,14 +156,11 @@ def __init__( dtype=np.float64, ) - self.isV2 = "V2" in type(self).__name__ # Technically these observation lengths are different between v1 and v2, # but we handle that elsewhere and just stick with v2 numbers here - self._obs_obj_max_len = 14 if self.isV2 else 6 - self._obs_obj_possible_lens = (6, 14) + self._obs_obj_max_len = 14 self._set_task_called = False - self._partially_observable = True self.hand_init_pos = None # OVERRIDE ME self._target_pos = None # OVERRIDE ME @@ -147,9 +171,31 @@ def __init__( # in this initiation of _prev_obs are correct. That being said, it # doesn't seem to matter (it will only effect frame-stacking for the # very first observation) + self._prev_obs = self._get_curr_obs_combined_no_goal() - def _set_task_inner(self): + EzPickle.__init__( + self, + model_name, + frame_skip, + hand_low, + hand_high, + mocap_low, + mocap_high, + action_scale, + action_rot_scale, + ) + + def seed(self, seed): + assert seed is not None + self.np_random, seed = seeding.np_random(seed) + self.action_space.seed(seed) + self.observation_space.seed(seed) + self.goal_space.seed(seed) + return [seed] + + @staticmethod + def _set_task_inner(): # Doesn't absorb "extra" kwargs, to ensure nothing's missed. pass @@ -165,20 +211,18 @@ def set_task(self, task): self._partially_observable = data["partially_observable"] del data["partially_observable"] self._set_task_inner(**data) - self.reset() def set_xyz_action(self, action): action = np.clip(action, -1, 1) pos_delta = action * self.action_scale new_mocap_pos = self.data.mocap_pos + pos_delta[None] - new_mocap_pos[0, :] = np.clip( new_mocap_pos[0, :], self.mocap_low, self.mocap_high, ) - self.data.set_mocap_pos("mocap", new_mocap_pos) - self.data.set_mocap_quat("mocap", np.array([1, 0, 1, 0])) + self.data.mocap_pos = new_mocap_pos + self.data.mocap_quat = np.array([1, 0, 1, 0]) def discretize_goal_space(self, goals): assert False @@ -195,7 +239,7 @@ def _set_obj_xyz(self, pos): self.set_state(qpos, qvel) def _get_site_pos(self, siteName): - _id = self.model.site_names.index(siteName) + _id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, siteName) return self.data.site_xpos[_id].copy() def _set_pos_site(self, name, pos): @@ -208,7 +252,8 @@ def _set_pos_site(self, name, pos): assert isinstance(pos, np.ndarray) assert pos.ndim == 1 - self.data.site_xpos[self.model.site_name2id(name)] = pos[:3] + _id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, name) + self.data.site_xpos[_id] = pos[:3] @property def _target_site_config(self): @@ -238,8 +283,9 @@ def touching_object(self, object_geom_id): (bool): whether the gripper is touching the object """ - leftpad_geom_id = self.unwrapped.model.geom_name2id("leftpad_geom") - rightpad_geom_id = self.unwrapped.model.geom_name2id("rightpad_geom") + + leftpad_geom_id = self.data.geom("leftpad_geom").id + rightpad_geom_id = self.data.geom("rightpad_geom").id leftpad_object_contacts = [ x @@ -273,7 +319,9 @@ def touching_object(self, object_geom_id): @property def _get_id_main_object(self): - return self.unwrapped.model.geom_name2id("objGeom") + return self.data.geom( + "objGeom" + ).id # [mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_GEOM, 'objGeom')] def _get_pos_objects(self): """Retrieves object position(s) from mujoco properties or instance vars. @@ -296,10 +344,7 @@ def _get_quat_objects(self): """ # Throw error rather than making this an @abc.abstractmethod so that # V1 environments don't have to implement it - if self.isV2: - raise NotImplementedError - else: - return None + raise NotImplementedError def _get_pos_goal(self): """Retrieves goal position from mujoco properties or instance vars. @@ -320,46 +365,35 @@ def _get_curr_obs_combined_no_goal(self): np.ndarray: The flat observation array (18 elements) """ + pos_hand = self.get_endeff_pos() finger_right, finger_left = ( - self._get_site_pos("rightEndEffector"), - self._get_site_pos("leftEndEffector"), + self.data.body("rightclaw"), + self.data.body("leftclaw"), ) - # the gripper can be at maximum about ~0.1 m apart. # dividing by 0.1 normalized the gripper distance between # 0 and 1. Further, we clip because sometimes the grippers # are slightly more than 0.1m apart (~0.00045 m) # clipping removes the effects of this random extra distance # that is produced by mujoco - gripper_distance_apart = np.linalg.norm(finger_right - finger_left) + + gripper_distance_apart = np.linalg.norm(finger_right.xpos - finger_left.xpos) gripper_distance_apart = np.clip(gripper_distance_apart / 0.1, 0.0, 1.0) obs_obj_padded = np.zeros(self._obs_obj_max_len) - obj_pos = self._get_pos_objects() assert len(obj_pos) % 3 == 0 - obj_pos_split = np.split(obj_pos, len(obj_pos) // 3) - if self.isV2: - obj_quat = self._get_quat_objects() - assert len(obj_quat) % 4 == 0 - obj_quat_split = np.split(obj_quat, len(obj_quat) // 4) - obs_obj_padded[: len(obj_pos) + len(obj_quat)] = np.hstack( - [ - np.hstack((pos, quat)) - for pos, quat in zip(obj_pos_split, obj_quat_split) - ] - ) - assert len(obs_obj_padded) in self._obs_obj_possible_lens - return np.hstack((pos_hand, gripper_distance_apart, obs_obj_padded)) - else: - # is a v1 environment - obs_obj_padded[: len(obj_pos)] = obj_pos - assert len(obs_obj_padded) in self._obs_obj_possible_lens - return np.hstack((pos_hand, obs_obj_padded)) + obj_quat = self._get_quat_objects() + assert len(obj_quat) % 4 == 0 + obj_quat_split = np.split(obj_quat, len(obj_quat) // 4) + obs_obj_padded[: len(obj_pos) + len(obj_quat)] = np.hstack( + [np.hstack((pos, quat)) for pos, quat in zip(obj_pos_split, obj_quat_split)] + ) + return np.hstack((pos_hand, gripper_distance_apart, obs_obj_padded)) def _get_obs(self): """Frame stacks `_get_curr_obs_combined_no_goal()` and concatenates the goal position to form a single flat observation. @@ -373,10 +407,7 @@ def _get_obs(self): pos_goal = np.zeros_like(pos_goal) curr_obs = self._get_curr_obs_combined_no_goal() # do frame stacking - if self.isV2: - obs = np.hstack((curr_obs, self._prev_obs, pos_goal)) - else: - obs = np.hstack((curr_obs, pos_goal)) + obs = np.hstack((curr_obs, self._prev_obs, pos_goal)) self._prev_obs = curr_obs return obs @@ -389,54 +420,44 @@ def _get_obs_dict(self): ) @property - def observation_space(self): - obs_obj_max_len = self._obs_obj_max_len if self.isV2 else 6 - - obj_low = np.full(obs_obj_max_len, -np.inf) - obj_high = np.full(obs_obj_max_len, +np.inf) + def sawyer_observation_space(self): + obs_obj_max_len = 14 + obj_low = np.full(obs_obj_max_len, -np.inf, dtype=np.float64) + obj_high = np.full(obs_obj_max_len, +np.inf, dtype=np.float64) goal_low = np.zeros(3) if self._partially_observable else self.goal_space.low goal_high = np.zeros(3) if self._partially_observable else self.goal_space.high gripper_low = -1.0 gripper_high = +1.0 - - return ( - Box( - np.hstack( - ( - self._HAND_SPACE.low, - gripper_low, - obj_low, - self._HAND_SPACE.low, - gripper_low, - obj_low, - goal_low, - ) - ), - np.hstack( - ( - self._HAND_SPACE.high, - gripper_high, - obj_high, - self._HAND_SPACE.high, - gripper_high, - obj_high, - goal_high, - ) - ), - dtype=np.float64, - ) - if self.isV2 - else Box( - np.hstack((self._HAND_SPACE.low, obj_low, goal_low)), - np.hstack((self._HAND_SPACE.high, obj_high, goal_high)), - dtype=np.float64, - ) + return Box( + np.hstack( + ( + self._HAND_SPACE.low, + gripper_low, + obj_low, + self._HAND_SPACE.low, + gripper_low, + obj_low, + goal_low, + ) + ), + np.hstack( + ( + self._HAND_SPACE.high, + gripper_high, + obj_high, + self._HAND_SPACE.high, + gripper_high, + obj_high, + goal_high, + ) + ), + dtype=np.float64, ) @_assert_task_is_set def step(self, action): self.set_xyz_action(action[:3]) - self.do_simulation([action[-1], -action[-1]]) + self.do_simulation([action[-1], -action[-1]], n_frames=self.frame_skip) self.curr_path_length += 1 # Running the simulator can sometimes mess up site positions, so @@ -464,19 +485,19 @@ def step(self, action): self._last_stable_obs = np.clip( self._last_stable_obs, - a_max=self.observation_space.high, - a_min=self.observation_space.low, + a_max=self.sawyer_observation_space.high, + a_min=self.sawyer_observation_space.low, dtype=np.float64, ) - if not self.isV2: - # v1 environments expect this superclass step() to return only the - # most recent observation. they override the rest of the - # functionality and end up returning the same sort of tuple that - # this does - return self._last_stable_obs - reward, info = self.evaluate_state(self._last_stable_obs, action) - return self._last_stable_obs, reward, False, info + done = True if int(info["success"]) == 1 else False + return ( + np.array(self._last_stable_obs, dtype=np.float64), + reward, + done, + False, + info, + ) def evaluate_state(self, obs, action): """Does the heavy-lifting for `step()` -- namely, calculating reward and populating the `info` dict with training metrics. @@ -492,35 +513,28 @@ def evaluate_state(self, obs, action): # V1 environments don't have to implement it raise NotImplementedError - def reset(self): + def reset(self, seed=None, options=None): self.curr_path_length = 0 - if not self.isV2: - return super().reset() - else: - obs = np.float64(super().reset()) - self._prev_obs = obs[:18].copy() - obs[18:36] = self._prev_obs - return obs + obs, info = super().reset() + self._prev_obs = obs[:18].copy() + obs[18:36] = self._prev_obs + obs = np.float64(obs) + return obs, info def _reset_hand(self, steps=50): + mocap_id = self.model.body_mocapid[self.data.body("mocap").id] for _ in range(steps): - self.data.set_mocap_pos("mocap", self.hand_init_pos) - self.data.set_mocap_quat("mocap", np.array([1, 0, 1, 0])) + self.data.mocap_pos[mocap_id][:] = self.hand_init_pos + self.data.mocap_quat[mocap_id][:] = np.array([1, 0, 1, 0]) self.do_simulation([-1, 1], self.frame_skip) self.init_tcp = self.tcp_center + self.init_tcp = self.tcp_center + def _get_state_rand_vec(self): if self._freeze_rand_vec: assert self._last_rand_vec is not None return self._last_rand_vec - elif self.seeded_rand_vec: - rand_vec = self.np_random.uniform( - self._random_reset_space.low, - self._random_reset_space.high, - size=self._random_reset_space.low.size, - ).astype(np.float64) - self._last_rand_vec = rand_vec - return rand_vec else: rand_vec = np.random.uniform( self._random_reset_space.low, diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_assembly_peg.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_assembly_peg.py index 61376ed42..070045073 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_assembly_peg.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_assembly_peg.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_basketball.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_basketball.py index 56d8799eb..c472aebd0 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_basketball.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_basketball.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_bin_picking.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_bin_picking.py index 401bc4722..e3f06a347 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_bin_picking.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_bin_picking.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_box_close.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_box_close.py index 05c22e9fb..4c47c40b6 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_box_close.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_box_close.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_button_press.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_button_press.py index 2d4a4a17e..5c1561894 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_button_press.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_button_press.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_button_press_topdown.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_button_press_topdown.py index 54c5fe414..bab9f7820 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_button_press_topdown.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_button_press_topdown.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_button_press_topdown_wall.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_button_press_topdown_wall.py index 8cf241d34..c6465db14 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_button_press_topdown_wall.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_button_press_topdown_wall.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_button_press_wall.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_button_press_wall.py index ebc0d0c67..04a26d55e 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_button_press_wall.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_button_press_wall.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_coffee_button.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_coffee_button.py index bc0dcbfc6..fe555f817 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_coffee_button.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_coffee_button.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_coffee_pull.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_coffee_pull.py index 3bc79dc38..b7223aa97 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_coffee_pull.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_coffee_pull.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_coffee_push.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_coffee_push.py index 5377ec088..30e130441 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_coffee_push.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_coffee_push.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_dial_turn.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_dial_turn.py index 8d5ad8408..40efe8897 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_dial_turn.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_dial_turn.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_disassemble_peg.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_disassemble_peg.py index 9654a3beb..f98dddc3d 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_disassemble_peg.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_disassemble_peg.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_door.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_door.py index f15d803a3..73f146539 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_door.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_door.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( @@ -32,7 +32,9 @@ def __init__(self): } self.goal = np.array([-0.2, 0.7, 0.15]) - self.obj_init_pos = self.init_config["ob_init_pos"] + + self.obj_init_pos = self.init_config["obj_init_pos"] + self.obj_init_angle = self.init_config["obj_init_angle"] self.hand_init_pos = self.init_config["hand_init_pos"] diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_door_lock.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_door_lock.py index c1b8bc478..d019dc601 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_door_lock.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_door_lock.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_door_unlock.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_door_unlock.py index c889eadc4..568aeaea8 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_door_unlock.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_door_unlock.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_drawer_close.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_drawer_close.py index 795bc8ff2..7095b8a02 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_drawer_close.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_drawer_close.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_drawer_open.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_drawer_open.py index c3570ff79..b9142b5b6 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_drawer_open.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_drawer_open.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_faucet_close.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_faucet_close.py index 6efd86aa5..d736057e8 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_faucet_close.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_faucet_close.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_faucet_open.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_faucet_open.py index 6e2d3a716..e5cd2926a 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_faucet_open.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_faucet_open.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_hammer.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_hammer.py index 491de636e..cfd1df68b 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_hammer.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_hammer.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_hand_insert.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_hand_insert.py index 3f2ff2795..fbeadb798 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_hand_insert.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_hand_insert.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_handle_press.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_handle_press.py index 0cdb88811..b8fe329ae 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_handle_press.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_handle_press.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_handle_press_side.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_handle_press_side.py index 6f6966b31..126ce4850 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_handle_press_side.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_handle_press_side.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_handle_pull.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_handle_pull.py index 9b56d32c4..6ccf11311 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_handle_pull.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_handle_pull.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_handle_pull_side.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_handle_pull_side.py index 504afe35c..b4d0f068d 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_handle_pull_side.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_handle_pull_side.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_lever_pull.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_lever_pull.py index d684b1440..520cd6535 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_lever_pull.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_lever_pull.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_peg_insertion_side.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_peg_insertion_side.py index 48bb44386..0e01770a1 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_peg_insertion_side.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_peg_insertion_side.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_peg_unplug_side.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_peg_unplug_side.py index e36ad70a4..bbf3ce824 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_peg_unplug_side.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_peg_unplug_side.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_pick_out_of_hole.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_pick_out_of_hole.py index f648c18dd..a9f822e21 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_pick_out_of_hole.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_pick_out_of_hole.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide.py index 84879c06b..b612471ce 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_back.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_back.py index 40180988c..b474ad4ab 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_back.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_back.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_back_side.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_back_side.py index ae1c431d9..f72fa61b0 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_back_side.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_back_side.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_side.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_side.py index f83d8b48a..a25a9d881 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_side.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_side.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_push_back.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_push_back.py index 6a012ec84..ec018dc53 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_push_back.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_push_back.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_reach_push_pick_place.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_reach_push_pick_place.py index 6921dc655..4d6eca798 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_reach_push_pick_place.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_reach_push_pick_place.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_reach_push_pick_place_wall.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_reach_push_pick_place_wall.py index 120396130..88bbf802f 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_reach_push_pick_place_wall.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_reach_push_pick_place_wall.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_shelf_place.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_shelf_place.py index aac81622d..0d17087f5 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_shelf_place.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_shelf_place.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_soccer.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_soccer.py index 9e23746f3..f5d879071 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_soccer.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_soccer.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_stick_pull.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_stick_pull.py index db6e81151..cdbe37df1 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_stick_pull.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_stick_pull.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_stick_push.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_stick_push.py index 0d45c0a5f..309cc7a92 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_stick_push.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_stick_push.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_sweep.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_sweep.py index 13d0c7f74..a54f5dc49 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_sweep.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_sweep.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_sweep_into_goal.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_sweep_into_goal.py index b61e90957..cd1da5af4 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_sweep_into_goal.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_sweep_into_goal.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_window_close.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_window_close.py index a8a49c0b6..fce7bed9d 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_window_close.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_window_close.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_window_open.py b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_window_open.py index c3df83bb3..484d5fe89 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_window_open.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_window_open.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs.asset_path_utils import full_v1_path_for from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/__init__.py b/metaworld/envs/mujoco/sawyer_xyz/v2/__init__.py index bfe684cb0..95effb11d 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/__init__.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/__init__.py @@ -1,130 +1,252 @@ from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_assembly_peg_v2 import ( SawyerNutAssemblyEnvV2, + TestAssemblyv2, + TrainAssemblyv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_basketball_v2 import ( SawyerBasketballEnvV2, + TestBasketballv2, + TrainBasketballv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_bin_picking_v2 import ( SawyerBinPickingEnvV2, + TestBinPickingv2, + TrainBinPickingv2, +) +from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_box_close_v2 import ( + SawyerBoxCloseEnvV2, + TestBoxClosev2, + TrainBoxClosev2, ) -from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_box_close_v2 import SawyerBoxCloseEnvV2 from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_button_press_topdown_v2 import ( SawyerButtonPressTopdownEnvV2, + TestButtonPressTopdownv2, + TrainButtonPressTopdownv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_button_press_topdown_wall_v2 import ( SawyerButtonPressTopdownWallEnvV2, + TestButtonPressTopdownWallv2, + TrainButtonPressTopdownWallv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_button_press_v2 import ( SawyerButtonPressEnvV2, + TestButtonPressv2, + TrainButtonPressv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_button_press_wall_v2 import ( SawyerButtonPressWallEnvV2, + TestButtonPressWallv2, + TrainButtonPressWallv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_coffee_button_v2 import ( SawyerCoffeeButtonEnvV2, + TestCoffeeButtonv2, + TrainCoffeeButtonv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_coffee_pull_v2 import ( SawyerCoffeePullEnvV2, + TestCoffeePullv2, + TrainCoffeePullv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_coffee_push_v2 import ( SawyerCoffeePushEnvV2, + TestCoffeePushv2, + TrainCoffeePushv2, +) +from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_dial_turn_v2 import ( + SawyerDialTurnEnvV2, + TestDialTurnv2, + TrainDialTurnv2, ) -from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_dial_turn_v2 import SawyerDialTurnEnvV2 from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_disassemble_peg_v2 import ( SawyerNutDisassembleEnvV2, + TestDisassemblev2, + TrainDisassemblev2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_door_close_v2 import ( SawyerDoorCloseEnvV2, + TestDoorClosev2, + TrainDoorClosev2, +) +from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_door_lock_v2 import ( + SawyerDoorLockEnvV2, + TestDoorLockv2, + TrainDoorLockv2, ) -from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_door_lock_v2 import SawyerDoorLockEnvV2 from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_door_unlock_v2 import ( SawyerDoorUnlockEnvV2, + TestDoorUnlockv2, + TrainDoorUnlockv2, +) +from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_door_v2 import ( + SawyerDoorEnvV2, + TestDoorOpenv2, + TrainDoorOpenv2, ) -from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_door_v2 import SawyerDoorEnvV2 from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_drawer_close_v2 import ( SawyerDrawerCloseEnvV2, + TestDrawerClosev2, + TrainDrawerClosev2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_drawer_open_v2 import ( SawyerDrawerOpenEnvV2, + TestDrawerOpenv2, + TrainDrawerOpenv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_faucet_close_v2 import ( SawyerFaucetCloseEnvV2, + TestFaucetClosev2, + TrainFaucetClosev2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_faucet_open_v2 import ( SawyerFaucetOpenEnvV2, + TestFaucetOpenv2, + TrainFaucetOpenv2, +) +from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_hammer_v2 import ( + SawyerHammerEnvV2, + TestHammerv2, + TrainHammerv2, ) -from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_hammer_v2 import SawyerHammerEnvV2 from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_hand_insert_v2 import ( SawyerHandInsertEnvV2, + TestHandInsertv2, + TrainHandInsertv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_handle_press_side_v2 import ( SawyerHandlePressSideEnvV2, + TestHandlePressSidev2, + TrainHandlePressSidev2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_handle_press_v2 import ( SawyerHandlePressEnvV2, + TestHandlePressv2, + TrainHandlePressv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_handle_pull_side_v2 import ( SawyerHandlePullSideEnvV2, + TestHandlePullSidev2, + TrainHandlePullSidev2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_handle_pull_v2 import ( SawyerHandlePullEnvV2, + TestHandlePullv2, + TrainHandlePullv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_lever_pull_v2 import ( SawyerLeverPullEnvV2, + TestLeverPullv2, + TrainLeverPullv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_peg_insertion_side_v2 import ( SawyerPegInsertionSideEnvV2, + TestPegInsertionSidev2, + TrainPegInsertionSidev2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_peg_unplug_side_v2 import ( SawyerPegUnplugSideEnvV2, + TestPegUnplugSidev2, + TrainPegUnplugSidev2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_pick_out_of_hole_v2 import ( SawyerPickOutOfHoleEnvV2, + TestPickOutOfHolev2, + TrainPickOutOfHolev2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_pick_place_v2 import ( SawyerPickPlaceEnvV2, + TestPickPlacev2, + TrainPickPlacev2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_pick_place_wall_v2 import ( SawyerPickPlaceWallEnvV2, + TestPickPlaceWallv2, + TrainPickPlaceWallv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_plate_slide_back_side_v2 import ( SawyerPlateSlideBackSideEnvV2, + TestPlateSlideBackSidev2, + TrainPlateSlideBackSidev2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_plate_slide_back_v2 import ( SawyerPlateSlideBackEnvV2, + TestPlateSlideBackv2, + TrainPlateSlideBackv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_plate_slide_side_v2 import ( SawyerPlateSlideSideEnvV2, + TestPlateSlideSidev2, + TrainPlateSlideSidev2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_plate_slide_v2 import ( SawyerPlateSlideEnvV2, + TestPlateSlidev2, + TrainPlateSlidev2, +) +from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_push_back_v2 import ( + SawyerPushBackEnvV2, + TestPushBackv2, + TrainPushBackv2, +) +from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_push_v2 import ( + SawyerPushEnvV2, + TestPushv2, + TrainPushv2, +) +from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_push_wall_v2 import ( + SawyerPushWallEnvV2, + TestPushWallv2, + TrainPushWallv2, +) +from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_reach_v2 import ( + SawyerReachEnvV2, + TestReachv2, + TrainReachv2, ) -from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_push_back_v2 import SawyerPushBackEnvV2 -from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_push_v2 import SawyerPushEnvV2 -from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_push_wall_v2 import SawyerPushWallEnvV2 -from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_reach_v2 import SawyerReachEnvV2 from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_reach_wall_v2 import ( SawyerReachWallEnvV2, + TestReachWallv2, + TrainReachWallv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_shelf_place_v2 import ( SawyerShelfPlaceEnvV2, + TestShelfPlacev2, + TrainShelfPlacev2, +) +from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_soccer_v2 import ( + SawyerSoccerEnvV2, + TestSoccerv2, + TrainSoccerv2, ) -from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_soccer_v2 import SawyerSoccerEnvV2 from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_stick_pull_v2 import ( SawyerStickPullEnvV2, + TestStickPullv2, + TrainStickPullv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_stick_push_v2 import ( SawyerStickPushEnvV2, + TestStickPushv2, + TrainStickPushv2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_sweep_into_goal_v2 import ( SawyerSweepIntoGoalEnvV2, + TestSweepIntoGoalv2, + TrainSweepIntoGoalv2, +) +from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_sweep_v2 import ( + SawyerSweepEnvV2, + TestSweepv2, + TrainSweepv2, ) -from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_sweep_v2 import SawyerSweepEnvV2 from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_window_close_v2 import ( SawyerWindowCloseEnvV2, + TestWindowClosev2, + TrainWindowClosev2, ) from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_window_open_v2 import ( SawyerWindowOpenEnvV2, + TestWindowOpenv2, + TrainWindowOpenv2, ) __all__ = [ diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_assembly_peg_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_assembly_peg_v2.py index db2c8862b..4f074eb2d 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_assembly_peg_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_assembly_peg_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -12,7 +13,7 @@ class SawyerNutAssemblyEnvV2(SawyerXYZEnv): WRENCH_HANDLE_LENGTH = 0.02 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (0, 0.6, 0.02) @@ -24,13 +25,17 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks self.init_config = { "obj_init_angle": 0.3, "obj_init_pos": np.array([0, 0.6, 0.02], dtype=np.float32), "hand_init_pos": np.array((0, 0.6, 0.2), dtype=np.float32), } + self.goal = np.array([0.1, 0.8, 0.1], dtype=np.float32) self.obj_init_pos = self.init_config["obj_init_pos"] self.obj_init_angle = self.init_config["obj_init_angle"] @@ -73,13 +78,16 @@ def _target_site_config(self): return [("pegTop", self._target_pos)] def _get_id_main_object(self): + """TODO: Reggie""" return self.unwrapped.model.geom_name2id("WrenchHandle") def _get_pos_objects(self): - return self.data.site_xpos[self.model.site_name2id("RoundNut-8")] + return self.data.site_xpos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, "RoundNut-8") + ] def _get_quat_objects(self): - return self.sim.data.get_body_xquat("RoundNut") + return self.data.body("RoundNut").xquat def _get_obs_dict(self): obs_dict = super()._get_obs_dict() @@ -88,19 +96,19 @@ def _get_obs_dict(self): def reset_model(self): self._reset_hand() - self._target_pos = self.goal.copy() - goal_pos = self._get_state_rand_vec() while np.linalg.norm(goal_pos[:2] - goal_pos[-3:-1]) < 0.1: goal_pos = self._get_state_rand_vec() self.obj_init_pos = goal_pos[:3] self._target_pos = goal_pos[-3:] - peg_pos = self._target_pos - np.array([0.0, 0.0, 0.05]) self._set_obj_xyz(self.obj_init_pos) - self.sim.model.body_pos[self.model.body_name2id("peg")] = peg_pos - self.sim.model.site_pos[self.model.site_name2id("pegTop")] = self._target_pos - + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "peg") + ] = peg_pos + self.model.site_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, "pegTop") + ] = self._target_pos return self._get_obs() @staticmethod @@ -186,3 +194,23 @@ def compute_reward(self, actions, obs): reward_in_place, success, ) + + +class TrainAssemblyv2(SawyerNutAssemblyEnvV2): + tasks = None + + def __init__(self): + SawyerNutAssemblyEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestAssemblyv2(SawyerNutAssemblyEnvV2): + tasks = None + + def __init__(self): + SawyerNutAssemblyEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_basketball_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_basketball_v2.py index a50521b5d..f11ed9e7f 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_basketball_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_basketball_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -13,7 +14,7 @@ class SawyerBasketballEnvV2(SawyerXYZEnv): PAD_SUCCESS_MARGIN = 0.06 TARGET_RADIUS = 0.08 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.6, 0.0299) @@ -25,8 +26,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_angle": 0.3, "obj_init_pos": np.array([0, 0.6, 0.03], dtype=np.float32), @@ -53,7 +58,6 @@ def model_name(self): @_assert_task_is_set def evaluate_state(self, obs, action): obj = obs[4:7] - ( reward, tcp_to_obj, @@ -84,25 +88,23 @@ def _get_pos_objects(self): return self.get_body_com("bsktball") def _get_quat_objects(self): - return self.sim.data.get_body_xquat("bsktball") + return self.data.body("bsktball").xquat def reset_model(self): self._reset_hand() self.prev_obs = self._get_curr_obs_combined_no_goal() - - basket_pos = self.goal.copy() - self.sim.model.body_pos[self.model.body_name2id("basket_goal")] = basket_pos - self._target_pos = self.data.site_xpos[self.model.site_name2id("goal")] - goal_pos = self._get_state_rand_vec() basket_pos = goal_pos[3:] while np.linalg.norm(goal_pos[:2] - basket_pos[:2]) < 0.15: goal_pos = self._get_state_rand_vec() basket_pos = goal_pos[3:] self.obj_init_pos = np.concatenate((goal_pos[:2], [self.obj_init_pos[-1]])) - self.sim.model.body_pos[self.model.body_name2id("basket_goal")] = basket_pos - self._target_pos = self.data.site_xpos[self.model.site_name2id("goal")] - + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "basket_goal") + ] = basket_pos + self._target_pos = self.data.site_xpos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, "goal") + ] self._set_obj_xyz(self.obj_init_pos) return self._get_obs() @@ -154,3 +156,23 @@ def compute_reward(self, action, obs): if target_to_obj < self.TARGET_RADIUS: reward = 10.0 return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped, in_place) + + +class TrainBasketballv2(SawyerBasketballEnvV2): + tasks = None + + def __init__(self): + SawyerBasketballEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestBasketballv2(SawyerBasketballEnvV2): + tasks = None + + def __init__(self): + SawyerBasketballEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_bin_picking_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_bin_picking_v2.py index e4ef8eb16..20028bc6d 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_bin_picking_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_bin_picking_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -23,7 +23,7 @@ class SawyerBinPickingEnvV2(SawyerXYZEnv): - (11/23/20) Updated reward function to new pick-place style """ - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.07) hand_high = (0.5, 1, 0.5) obj_low = (-0.21, 0.65, 0.02) @@ -36,8 +36,10 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) - + if tasks is not None: + self.tasks = tasks self.init_config = { "obj_init_angle": 0.3, "obj_init_pos": np.array([-0.12, 0.7, 0.02]), @@ -104,7 +106,7 @@ def _get_pos_objects(self): return self.get_body_com("obj") def _get_quat_objects(self): - return self.sim.data.get_body_xquat("obj") + return self.data.body("obj").xquat def reset_model(self): self._reset_hand() @@ -194,3 +196,23 @@ def compute_reward(self, action, obs): object_grasped, in_place, ) + + +class TrainBinPickingv2(SawyerBinPickingEnvV2): + tasks = None + + def __init__(self): + SawyerBinPickingEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestBinPickingv2(SawyerBinPickingEnvV2): + tasks = None + + def __init__(self): + SawyerBinPickingEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_box_close_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_box_close_v2.py index 66688ca3a..9a764d0c8 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_box_close_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_box_close_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -10,7 +11,7 @@ class SawyerBoxCloseEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.05, 0.5, 0.02) @@ -22,7 +23,10 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks self.init_config = { "obj_init_angle": 0.3, @@ -42,6 +46,8 @@ def __init__(self): np.hstack((obj_high, goal_high)), ) + self.init_obj_quat = None + @property def model_name(self): return full_v2_path_for("sawyer_xyz/sawyer_box.xml") @@ -79,11 +85,10 @@ def _get_pos_objects(self): return self.get_body_com("top_link") def _get_quat_objects(self): - return self.sim.data.get_body_xquat("top_link") + return self.data.body("top_link").xquat def reset_model(self): self._reset_hand() - self._target_pos = self.goal.copy() self.obj_init_pos = self.init_config["obj_init_pos"] self.obj_init_angle = self.init_config["obj_init_angle"] box_height = self.get_body_com("boxbody")[2] @@ -94,9 +99,13 @@ def reset_model(self): self.obj_init_pos = np.concatenate((goal_pos[:2], [self.obj_init_pos[-1]])) self._target_pos = goal_pos[-3:] - self.sim.model.body_pos[self.model.body_name2id("boxbody")] = np.concatenate( - (self._target_pos[:2], [box_height]) - ) + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "boxbody") + ] = np.concatenate((self._target_pos[:2], [box_height])) + + for _ in range(self.frame_skip): + mujoco.mj_step(self.model, self.data) + self._set_obj_xyz(self.obj_init_pos) return self._get_obs() @@ -187,3 +196,23 @@ def compute_reward(self, actions, obs): *reward_steps, success, ) + + +class TrainBoxClosev2(SawyerBoxCloseEnvV2): + tasks = None + + def __init__(self): + SawyerBoxCloseEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestBoxClosev2(SawyerBoxCloseEnvV2): + tasks = None + + def __init__(self): + SawyerBoxCloseEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_button_press_topdown_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_button_press_topdown_v2.py index 43a8e6f86..8175c148e 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_button_press_topdown_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_button_press_topdown_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -10,7 +11,7 @@ class SawyerButtonPressTopdownEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.8, 0.115) @@ -20,8 +21,10 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) - + if tasks is not None: + self.tasks = tasks self.init_config = { "obj_init_pos": np.array([0, 0.8, 0.115], dtype=np.float32), "hand_init_pos": np.array([0, 0.4, 0.2], dtype=np.float32), @@ -53,9 +56,8 @@ def evaluate_state(self, obs, action): near_button, button_pressed, ) = self.compute_reward(action, obs) - info = { - "success": float(obj_to_target <= 0.02), + "success": float(obj_to_target <= 0.024), "near_object": float(tcp_to_obj <= 0.05), "grasp_success": float(tcp_open > 0), "grasp_reward": near_button, @@ -77,7 +79,7 @@ def _get_pos_objects(self): return self.get_body_com("button") + np.array([0.0, 0.0, 0.193]) def _get_quat_objects(self): - return self.sim.data.get_body_xquat("button") + return self.data.body("button").xquat def _set_obj_xyz(self, pos): qpos = self.data.qpos.flat.copy() @@ -88,18 +90,17 @@ def _set_obj_xyz(self, pos): def reset_model(self): self._reset_hand() - self._target_pos = self.goal.copy() - goal_pos = self._get_state_rand_vec() self.obj_init_pos = goal_pos - - self.sim.model.body_pos[self.model.body_name2id("box")] = self.obj_init_pos + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "box") + ] = self.obj_init_pos + mujoco.mj_forward(self.model, self.data) self._target_pos = self._get_site_pos("hole") self._obj_to_target_init = abs( self._target_pos[2] - self._get_site_pos("buttonStart")[2] ) - return self._get_obs() def compute_reward(self, action, obs): @@ -130,3 +131,23 @@ def compute_reward(self, action, obs): reward += 5 * button_pressed return (reward, tcp_to_obj, obs[3], obj_to_target, near_button, button_pressed) + + +class TrainButtonPressTopdownv2(SawyerButtonPressTopdownEnvV2): + tasks = None + + def __init__(self): + SawyerButtonPressTopdownEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestButtonPressTopdownv2(SawyerButtonPressTopdownEnvV2): + tasks = None + + def __init__(self): + SawyerButtonPressTopdownEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_button_press_topdown_wall_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_button_press_topdown_wall_v2.py index 06e641be3..a10cebb80 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_button_press_topdown_wall_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_button_press_topdown_wall_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -10,7 +11,7 @@ class SawyerButtonPressTopdownWallEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.8, 0.115) @@ -20,8 +21,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0, 0.8, 0.115], dtype=np.float32), "hand_init_pos": np.array([0, 0.4, 0.2], dtype=np.float32), @@ -55,7 +60,7 @@ def evaluate_state(self, obs, action): ) = self.compute_reward(action, obs) info = { - "success": float(obj_to_target <= 0.02), + "success": float(obj_to_target <= 0.024), "near_object": float(tcp_to_obj <= 0.05), "grasp_success": float(tcp_open > 0), "grasp_reward": near_button, @@ -77,7 +82,7 @@ def _get_pos_objects(self): return self.get_body_com("button") + np.array([0.0, 0.0, 0.193]) def _get_quat_objects(self): - return self.sim.data.get_body_xquat("button") + return self.data.body("button").xquat def _set_obj_xyz(self, pos): qpos = self.data.qpos.flat.copy() @@ -92,10 +97,12 @@ def reset_model(self): goal_pos = self._get_state_rand_vec() self.obj_init_pos = goal_pos + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "box") + ] = self.obj_init_pos + mujoco.mj_forward(self.model, self.data) - self.sim.model.body_pos[self.model.body_name2id("box")] = self.obj_init_pos self._target_pos = self._get_site_pos("hole") - self._obj_to_target_init = abs( self._target_pos[2] - self._get_site_pos("buttonStart")[2] ) @@ -130,3 +137,23 @@ def compute_reward(self, action, obs): reward += 5 * button_pressed return (reward, tcp_to_obj, obs[3], obj_to_target, near_button, button_pressed) + + +class TrainButtonPressTopdownWallv2(SawyerButtonPressTopdownWallEnvV2): + tasks = None + + def __init__(self): + SawyerButtonPressTopdownWallEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestButtonPressTopdownWallv2(SawyerButtonPressTopdownWallEnvV2): + tasks = None + + def __init__(self): + SawyerButtonPressTopdownWallEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_button_press_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_button_press_v2.py index 2169c27f7..a62083862 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_button_press_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_button_press_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -10,7 +11,7 @@ class SawyerButtonPressEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.85, 0.115) @@ -20,8 +21,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0.0, 0.9, 0.115], dtype=np.float32), "hand_init_pos": np.array([0, 0.4, 0.2], dtype=np.float32), @@ -76,7 +81,7 @@ def _get_pos_objects(self): return self.get_body_com("button") + np.array([0.0, -0.193, 0.0]) def _get_quat_objects(self): - return self.sim.data.get_body_xquat("button") + return self.data.body("button").xquat def _set_obj_xyz(self, pos): qpos = self.data.qpos.flat.copy() @@ -92,8 +97,9 @@ def reset_model(self): goal_pos = self._get_state_rand_vec() self.obj_init_pos = goal_pos - - self.sim.model.body_pos[self.model.body_name2id("box")] = self.obj_init_pos + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "box") + ] = self.obj_init_pos self._set_obj_xyz(0) self._target_pos = self._get_site_pos("hole") @@ -131,3 +137,23 @@ def compute_reward(self, action, obs): reward += 8 * button_pressed return (reward, tcp_to_obj, obs[3], obj_to_target, near_button, button_pressed) + + +class TrainButtonPressv2(SawyerButtonPressEnvV2): + tasks = None + + def __init__(self): + SawyerButtonPressEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestButtonPressv2(SawyerButtonPressEnvV2): + tasks = None + + def __init__(self): + SawyerButtonPressEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_button_press_wall_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_button_press_wall_v2.py index 063ae9977..7548dffde 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_button_press_wall_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_button_press_wall_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -10,7 +11,7 @@ class SawyerButtonPressWallEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.05, 0.85, 0.1149) @@ -20,8 +21,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0.0, 0.9, 0.115], dtype=np.float32), "hand_init_pos": np.array([0, 0.4, 0.2], dtype=np.float32), @@ -78,7 +83,7 @@ def _get_pos_objects(self): return self.get_body_com("button") + np.array([0.0, -0.193, 0.0]) def _get_quat_objects(self): - return self.sim.data.get_body_xquat("button") + return self.data.body("button").xquat def _set_obj_xyz(self, pos): qpos = self.data.qpos.flat.copy() @@ -95,7 +100,10 @@ def reset_model(self): goal_pos = self._get_state_rand_vec() self.obj_init_pos = goal_pos - self.sim.model.body_pos[self.model.body_name2id("box")] = self.obj_init_pos + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "box") + ] = self.obj_init_pos + self._set_obj_xyz(0) self._target_pos = self._get_site_pos("hole") @@ -135,5 +143,24 @@ def compute_reward(self, action, obs): reward = 2 reward += 2 * (1 + obs[3]) reward += 4 * button_pressed**2 - return (reward, tcp_to_obj, obs[3], obj_to_target, near_button, button_pressed) + + +class TrainButtonPressWallv2(SawyerButtonPressWallEnvV2): + tasks = None + + def __init__(self): + SawyerButtonPressWallEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestButtonPressWallv2(SawyerButtonPressWallEnvV2): + tasks = None + + def __init__(self): + SawyerButtonPressWallEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_button_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_button_v2.py index 707c6c385..ab3f36812 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_button_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_button_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -10,7 +11,7 @@ class SawyerCoffeeButtonEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): self.max_dist = 0.03 hand_low = (-0.5, 0.4, 0.05) @@ -26,8 +27,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0, 0.9, 0.28]), "obj_init_angle": 0.3, @@ -95,8 +100,8 @@ def reset_model(self): self._reset_hand() self.obj_init_pos = self._get_state_rand_vec() - self.sim.model.body_pos[ - self.model.body_name2id("coffee_machine") + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "coffee_machine") ] = self.obj_init_pos pos_mug = self.obj_init_pos + np.array([0.0, -0.22, 0.0]) @@ -135,3 +140,23 @@ def compute_reward(self, action, obs): reward += 8 * button_pressed return (reward, tcp_to_obj, obs[3], obj_to_target, near_button, button_pressed) + + +class TrainCoffeeButtonv2(SawyerCoffeeButtonEnvV2): + tasks = None + + def __init__(self): + SawyerCoffeeButtonEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestCoffeeButtonv2(SawyerCoffeeButtonEnvV2): + tasks = None + + def __init__(self): + SawyerCoffeeButtonEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_pull_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_pull_v2.py index bab6f9887..8c3a88c91 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_pull_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_pull_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -11,7 +12,7 @@ class SawyerCoffeePullEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.05, 0.7, -0.001) @@ -23,8 +24,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0, 0.75, 0.0]), "obj_init_angle": 0.3, @@ -79,7 +84,8 @@ def _get_pos_objects(self): return self.get_body_com("obj") def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("mug")).as_quat() + geom_xmat = self.data.geom("mug").xmat.reshape(3, 3) + return Rotation.from_matrix(geom_xmat).as_quat() def _set_obj_xyz(self, pos): qpos = self.data.qpos.flatten() @@ -99,7 +105,9 @@ def reset_model(self): self.obj_init_pos = pos_mug_init pos_machine = pos_mug_init + np.array([0.0, 0.22, 0.0]) - self.sim.model.body_pos[self.model.body_name2id("coffee_machine")] = pos_machine + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "coffee_machine") + ] = pos_machine self._target_pos = pos_mug_goal return self._get_obs() @@ -149,3 +157,23 @@ def compute_reward(self, action, obs): object_grasped, in_place, ) + + +class TrainCoffeePullv2(SawyerCoffeePullEnvV2): + tasks = None + + def __init__(self): + SawyerCoffeePullEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestCoffeePullv2(SawyerCoffeePullEnvV2): + tasks = None + + def __init__(self): + SawyerCoffeePullEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_push_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_push_v2.py index ae88dc2f6..271126320 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_push_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_coffee_push_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -11,7 +12,7 @@ class SawyerCoffeePushEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.55, -0.001) @@ -23,8 +24,11 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks self.init_config = { "obj_init_angle": 0.3, "obj_init_pos": np.array([0.0, 0.6, 0.0]), @@ -79,7 +83,8 @@ def _get_pos_objects(self): return self.get_body_com("obj") def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("mug")).as_quat() + geom_xmat = self.data.geom("mug").xmat.reshape(3, 3) + return Rotation.from_matrix(geom_xmat).as_quat() def _set_obj_xyz(self, pos): qpos = self.data.qpos.flatten() @@ -99,7 +104,10 @@ def reset_model(self): self.obj_init_pos = pos_mug_init pos_machine = pos_mug_goal + np.array([0.0, 0.22, 0.0]) - self.sim.model.body_pos[self.model.body_name2id("coffee_machine")] = pos_machine + + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "coffee_machine") + ] = pos_machine self._target_pos = pos_mug_goal return self._get_obs() @@ -149,3 +157,23 @@ def compute_reward(self, action, obs): object_grasped, in_place, ) + + +class TrainCoffeePushv2(SawyerCoffeePushEnvV2): + tasks = None + + def __init__(self): + SawyerCoffeePushEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestCoffeePushv2(SawyerCoffeePushEnvV2): + tasks = None + + def __init__(self): + SawyerCoffeePushEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_dial_turn_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_dial_turn_v2.py index 0bc6505cf..86e6071d8 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_dial_turn_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_dial_turn_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -12,7 +13,7 @@ class SawyerDialTurnEnvV2(SawyerXYZEnv): TARGET_RADIUS = 0.07 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.7, 0.0) @@ -24,8 +25,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0, 0.7, 0.0]), "hand_init_pos": np.array([0, 0.6, 0.2], dtype=np.float32), @@ -69,9 +74,11 @@ def evaluate_state(self, obs, action): def _get_pos_objects(self): dial_center = self.get_body_com("dial").copy() - dial_angle_rad = self.data.get_joint_qpos("knob_Joint_1") + dial_angle_rad = self.data.joint("knob_Joint_1").qpos - offset = np.array([np.sin(dial_angle_rad), -np.cos(dial_angle_rad), 0]) + offset = np.array( + [np.sin(dial_angle_rad), -np.cos(dial_angle_rad), 0], dtype=object + ) dial_radius = 0.05 offset *= dial_radius @@ -79,7 +86,7 @@ def _get_pos_objects(self): return dial_center + offset def _get_quat_objects(self): - return self.sim.data.get_body_xquat("dial") + return self.data.body("dial").xquat def reset_model(self): self._reset_hand() @@ -91,10 +98,11 @@ def reset_model(self): self.obj_init_pos = goal_pos[:3] final_pos = goal_pos.copy() + np.array([0, 0.03, 0.03]) self._target_pos = final_pos - - self.sim.model.body_pos[self.model.body_name2id("dial")] = self.obj_init_pos + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "dial") + ] = self.obj_init_pos self.dial_push_position = self._get_pos_objects() + np.array([0.05, 0.02, 0.09]) - + mujoco.mj_forward(self.model, self.data) return self._get_obs() def compute_reward(self, action, obs): @@ -131,5 +139,31 @@ def compute_reward(self, action, obs): object_grasped = reach reward = 10 * reward_utils.hamacher_product(reach, in_place) + return ( + reward[0], + tcp_to_obj, + tcp_opened, + target_to_obj, + object_grasped, + in_place, + ) + + +class TrainDialTurnv2(SawyerDialTurnEnvV2): + tasks = None + + def __init__(self): + SawyerDialTurnEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestDialTurnv2(SawyerDialTurnEnvV2): + tasks = None + + def __init__(self): + SawyerDialTurnEnvV2.__init__(self, self.tasks) - return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped, in_place) + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_disassemble_peg_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_disassemble_peg_v2.py index e5fa9d68b..5b7907e47 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_disassemble_peg_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_disassemble_peg_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -12,7 +13,7 @@ class SawyerNutDisassembleEnvV2(SawyerXYZEnv): WRENCH_HANDLE_LENGTH = 0.02 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (0.0, 0.6, 0.025) @@ -24,8 +25,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_angle": 0.3, "obj_init_pos": np.array([0, 0.7, 0.025]), @@ -82,7 +87,7 @@ def _get_pos_objects(self): return self._get_site_pos("RoundNut-8") def _get_quat_objects(self): - return self.sim.data.get_body_xquat("RoundNut") + return self.data.body("RoundNut").xquat def _get_obs_dict(self): obs_dict = super()._get_obs_dict() @@ -103,10 +108,14 @@ def reset_model(self): peg_pos = self.obj_init_pos + np.array([0.0, 0.0, 0.03]) peg_top_pos = self.obj_init_pos + np.array([0.0, 0.0, 0.08]) - self.sim.model.body_pos[self.model.body_name2id("peg")] = peg_pos - self.sim.model.site_pos[self.model.site_name2id("pegTop")] = peg_top_pos + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "peg") + ] = peg_pos + self.model.site_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, "pegTop") + ] = peg_top_pos + mujoco.mj_forward(self.model, self.data) self._set_obj_xyz(self.obj_init_pos) - return self._get_obs() @staticmethod @@ -175,3 +184,23 @@ def compute_reward(self, actions, obs): reward_in_place, success, ) + + +class TrainDisassemblev2(SawyerNutDisassembleEnvV2): + tasks = None + + def __init__(self): + SawyerNutDisassembleEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestDisassemblev2(SawyerNutDisassembleEnvV2): + tasks = None + + def __init__(self): + SawyerNutDisassembleEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_close_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_close_v2.py index 762e71b19..e554b1ff6 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_close_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_close_v2.py @@ -1,17 +1,33 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box +from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils -from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import _assert_task_is_set -from metaworld.envs.mujoco.sawyer_xyz.v2.sawyer_door_v2 import SawyerDoorEnvV2 +from metaworld.envs.asset_path_utils import full_v2_path_for +from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import ( + SawyerXYZEnv, + _assert_task_is_set, +) -class SawyerDoorCloseEnvV2(SawyerDoorEnvV2): - def __init__(self): +class SawyerDoorCloseEnvV2(SawyerXYZEnv): + def __init__(self, tasks=None, render_mode=None): goal_low = (0.2, 0.65, 0.1499) goal_high = (0.3, 0.75, 0.1501) + hand_low = (-0.5, 0.40, 0.05) + hand_high = (0.5, 1, 0.5) + obj_low = (0.0, 0.85, 0.15) + obj_high = (0.1, 0.95, 0.15) + + super().__init__( + self.model_name, + hand_low=hand_low, + hand_high=hand_high, + render_mode=render_mode, + ) - super().__init__() + if tasks is not None: + self.tasks = tasks self.init_config = { "obj_init_angle": 0.3, @@ -23,20 +39,45 @@ def __init__(self): self.obj_init_angle = self.init_config["obj_init_angle"] self.hand_init_pos = self.init_config["hand_init_pos"] + self.door_qpos_adr = self.model.joint("doorjoint").qposadr.item() + self.door_qvel_adr = self.model.joint("doorjoint").dofadr.item() + self.goal_space = Box(np.array(goal_low), np.array(goal_high)) + self._random_reset_space = Box( + np.array(obj_low), + np.array(obj_high), + ) + + @property + def model_name(self): + return full_v2_path_for("sawyer_xyz/sawyer_door_pull.xml") + + def _get_pos_objects(self): + return self.data.geom("handle").xpos.copy() + + def _get_quat_objects(self): + return Rotation.from_matrix( + self.data.geom("handle").xmat.reshape(3, 3) + ).as_quat() + + def _set_obj_xyz(self, pos): + qpos = self.data.qpos.copy() + qvel = self.data.qvel.copy() + qpos[self.door_qpos_adr] = pos + qvel[self.door_qvel_adr] = 0 + self.set_state(qpos.flatten(), qvel.flatten()) + def reset_model(self): self._reset_hand() - self._target_pos = self.goal.copy() - self.objHeight = self.data.get_geom_xpos("handle")[2] - + self.objHeight = self.data.geom("handle").xpos[2] obj_pos = self._get_state_rand_vec() self.obj_init_pos = obj_pos goal_pos = obj_pos.copy() + np.array([0.2, -0.2, 0.0]) self._target_pos = goal_pos - self.sim.model.body_pos[self.model.body_name2id("door")] = self.obj_init_pos - self.sim.model.site_pos[self.model.site_name2id("goal")] = self._target_pos + self.model.body("door").pos = self.obj_init_pos + self.model.site("goal").pos = self._target_pos # keep the door open after resetting initial positions self._set_obj_xyz(-1.5708) @@ -89,3 +130,23 @@ def compute_reward(self, actions, obs): reward = 10 return [reward, obj_to_target, hand_in_place] + + +class TrainDoorClosev2(SawyerDoorCloseEnvV2): + tasks = None + + def __init__(self): + SawyerDoorCloseEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestDoorClosev2(SawyerDoorCloseEnvV2): + tasks = None + + def __init__(self): + SawyerDoorCloseEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_lock_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_lock_v2.py index a56c5749c..f4b465280 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_lock_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_lock_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -10,7 +11,7 @@ class SawyerDoorLockEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, -0.15) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.8, 0.15) @@ -20,10 +21,14 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { - "obj_init_pos": np.array([0, 0.85, 0.15]), + "obj_init_pos": np.array([0, 0.85, 0.15], dtype=np.float32), "hand_init_pos": np.array([0, 0.6, 0.2], dtype=np.float32), } self.goal = np.array([0, 0.85, 0.1]) @@ -82,19 +87,17 @@ def _get_pos_objects(self): return self._get_site_pos("lockStartLock") def _get_quat_objects(self): - return self.sim.data.get_body_xquat("door_link") + return self.data.body("door_link").xquat def reset_model(self): self._reset_hand() door_pos = self._get_state_rand_vec() + self.model.body("door").pos = door_pos - self.sim.model.body_pos[self.model.body_name2id("door")] = door_pos for _ in range(self.frame_skip): - self.sim.step() - - self.obj_init_pos = self.get_body_com("lock_link") + mujoco.mj_step(self.model, self.data) + self.obj_init_pos = self.data.body("lock_link").xpos self._target_pos = self.obj_init_pos + np.array([0.0, -0.04, -0.1]) - return self._get_obs() def compute_reward(self, action, obs): @@ -126,3 +129,23 @@ def compute_reward(self, action, obs): reward += 8 * lock_pressed return (reward, tcp_to_obj, obs[3], obj_to_target, near_lock, lock_pressed) + + +class TrainDoorLockv2(SawyerDoorLockEnvV2): + tasks = None + + def __init__(self): + SawyerDoorLockEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestDoorLockv2(SawyerDoorLockEnvV2): + tasks = None + + def __init__(self): + SawyerDoorLockEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_unlock_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_unlock_v2.py index 3406799ce..e8eca0095 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_unlock_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_unlock_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -10,7 +10,7 @@ class SawyerDoorUnlockEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, -0.15) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.8, 0.15) @@ -22,8 +22,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0, 0.85, 0.15]), "hand_init_pos": np.array([0, 0.6, 0.2], dtype=np.float32), @@ -81,7 +85,7 @@ def _get_pos_objects(self): return self._get_site_pos("lockStartUnlock") def _get_quat_objects(self): - return self.sim.data.get_body_xquat("door_link") + return self.data.body("door_link").xquat def _set_obj_xyz(self, pos): qpos = self.data.qpos.flat.copy() @@ -92,12 +96,10 @@ def _set_obj_xyz(self, pos): def reset_model(self): self._reset_hand() - door_pos = self._get_state_rand_vec() - - self.sim.model.body_pos[self.model.body_name2id("door")] = door_pos + self.model.body("door").pos = self._get_state_rand_vec() self._set_obj_xyz(1.5708) - self.obj_init_pos = self.get_body_com("lock_link") + self.obj_init_pos = self.data.body("lock_link").xpos self._target_pos = self.obj_init_pos + np.array([0.1, -0.04, 0.0]) return self._get_obs() @@ -142,3 +144,23 @@ def compute_reward(self, action, obs): ready_to_push, pushed, ) + + +class TrainDoorUnlockv2(SawyerDoorUnlockEnvV2): + tasks = None + + def __init__(self): + SawyerDoorUnlockEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestDoorUnlockv2(SawyerDoorUnlockEnvV2): + tasks = None + + def __init__(self): + SawyerDoorUnlockEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_v2.py index 2da3cbf2e..a0f4aedce 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -11,7 +11,7 @@ class SawyerDoorEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (0.0, 0.85, 0.15) @@ -23,14 +23,14 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { - "obj_init_angle": np.array( - [ - 0.3, - ] - ), + "obj_init_angle": np.array([0.3]), "obj_init_pos": np.array([0.1, 0.95, 0.15]), "hand_init_pos": np.array([0, 0.6, 0.2]), } @@ -40,7 +40,8 @@ def __init__(self): self.obj_init_angle = self.init_config["obj_init_angle"] self.hand_init_pos = self.init_config["hand_init_pos"] - self.door_angle_idx = self.model.get_joint_qpos_addr("doorjoint") + self.door_qpos_adr = self.model.joint("doorjoint").qposadr.item() + self.door_qvel_adr = self.model.joint("doorjoint").dofadr.item() self._random_reset_space = Box( np.array(obj_low), @@ -80,31 +81,32 @@ def _target_site_config(self): return [] def _get_pos_objects(self): - return self.data.get_geom_xpos("handle").copy() + return self.data.geom("handle").xpos.copy() def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("handle")).as_quat() + return Rotation.from_matrix( + self.data.geom("handle").xmat.reshape(3, 3) + ).as_quat() def _set_obj_xyz(self, pos): qpos = self.data.qpos.copy() qvel = self.data.qvel.copy() - qpos[self.door_angle_idx] = pos - qvel[self.door_angle_idx] = 0 + qpos[self.door_qpos_adr] = pos + qvel[self.door_qvel_adr] = 0 self.set_state(qpos.flatten(), qvel.flatten()) def reset_model(self): self._reset_hand() - - self.objHeight = self.data.get_geom_xpos("handle")[2] + self.objHeight = self.data.geom("handle").xpos[2] self.obj_init_pos = self._get_state_rand_vec() self._target_pos = self.obj_init_pos + np.array([-0.3, -0.45, 0.0]) - self.sim.model.body_pos[self.model.body_name2id("door")] = self.obj_init_pos - self.sim.model.site_pos[self.model.site_name2id("goal")] = self._target_pos + self.model.body("door").pos = self.obj_init_pos + self.model.site("goal").pos = self._target_pos self._set_obj_xyz(0) self.maxPullDist = np.linalg.norm( - self.data.get_geom_xpos("handle")[:-1] - self._target_pos[:-1] + self.data.geom("handle").xpos[:-1] - self._target_pos[:-1] ) self.target_reward = 1000 * self.maxPullDist + 1000 * 2 @@ -161,7 +163,7 @@ def _reward_pos(obs, theta): return ready_to_open, opened def compute_reward(self, actions, obs): - theta = self.data.get_joint_qpos("doorjoint") + theta = self.data.joint("doorjoint").qpos reward_grab = SawyerDoorEnvV2._reward_grab_effort(actions) reward_steps = SawyerDoorEnvV2._reward_pos(obs, theta) @@ -174,6 +176,7 @@ def compute_reward(self, actions, obs): ) # Override reward on success flag + reward = reward[0] if abs(obs[4] - self._target_pos[0]) <= 0.08: reward = 10.0 @@ -182,3 +185,23 @@ def compute_reward(self, actions, obs): reward_grab, *reward_steps, ) + + +class TrainDoorOpenv2(SawyerDoorEnvV2): + tasks = None + + def __init__(self): + SawyerDoorEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestDoorOpenv2(SawyerDoorEnvV2): + tasks = None + + def __init__(self): + SawyerDoorEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_close_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_close_v2.py index 3a5c4072f..48bcb30b7 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_close_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_close_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -12,7 +13,7 @@ class SawyerDrawerCloseEnvV2(SawyerXYZEnv): _TARGET_RADIUS = 0.04 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.9, 0.0) @@ -22,8 +23,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_angle": np.array( [ @@ -95,7 +100,10 @@ def reset_model(self): # Compute nightstand position self.obj_init_pos = self._get_state_rand_vec() # Set mujoco body to computed position - self.sim.model.body_pos[self.model.body_name2id("drawer")] = self.obj_init_pos + + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "drawer") + ] = self.obj_init_pos # Set _target_pos to current drawer position (closed) self._target_pos = self.obj_init_pos + np.array([0.0, -0.16, 0.09]) # Pull drawer out all the way and mark its starting position @@ -144,3 +152,23 @@ def compute_reward(self, action, obs): reward *= 10 return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped, in_place) + + +class TrainDrawerClosev2(SawyerDrawerCloseEnvV2): + tasks = None + + def __init__(self): + SawyerDrawerCloseEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestDrawerClosev2(SawyerDrawerCloseEnvV2): + tasks = None + + def __init__(self): + SawyerDrawerCloseEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_open_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_open_v2.py index 28cd3f8e8..f66b87a72 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_open_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_open_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -10,7 +11,7 @@ class SawyerDrawerOpenEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.9, 0.0) @@ -20,8 +21,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_angle": np.array( [ @@ -82,7 +87,7 @@ def _get_pos_objects(self): return self.get_body_com("drawer_link") + np.array([0.0, -0.16, 0.0]) def _get_quat_objects(self): - return self.sim.data.get_body_xquat("drawer_link") + return self.data.body("drawer_link").xquat def reset_model(self): self._reset_hand() @@ -91,11 +96,15 @@ def reset_model(self): # Compute nightstand position self.obj_init_pos = self._get_state_rand_vec() # Set mujoco body to computed position - self.sim.model.body_pos[self.model.body_name2id("drawer")] = self.obj_init_pos + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "drawer") + ] = self.obj_init_pos + # Set _target_pos to current drawer position (closed) minus an offset self._target_pos = self.obj_init_pos + np.array( [0.0, -0.16 - self.maxDist, 0.09] ) + mujoco.mj_forward(self.model, self.data) return self._get_obs() @@ -137,3 +146,23 @@ def compute_reward(self, action, obs): reward_for_caging, reward_for_opening, ) + + +class TrainDrawerOpenv2(SawyerDrawerOpenEnvV2): + tasks = None + + def __init__(self): + SawyerDrawerOpenEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestDrawerOpenv2(SawyerDrawerOpenEnvV2): + tasks = None + + def __init__(self): + SawyerDrawerOpenEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_faucet_close_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_faucet_close_v2.py index 246134e72..432b35131 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_faucet_close_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_faucet_close_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -10,7 +11,7 @@ class SawyerFaucetCloseEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, -0.15) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.8, 0.0) @@ -22,8 +23,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0, 0.8, 0.0]), "hand_init_pos": np.array([0.0, 0.4, 0.2]), @@ -75,7 +80,7 @@ def _target_site_config(self): ] def _get_quat_objects(self): - return self.sim.data.get_body_xquat("faucetBase") + return self.data.body("faucetBase").xquat def _get_pos_objects(self): return self._get_site_pos("handleStartClose") + np.array([0.0, 0.0, -0.01]) @@ -86,14 +91,14 @@ def reset_model(self): # Compute faucet position self.obj_init_pos = self._get_state_rand_vec() # Set mujoco body to computed position - self.sim.model.body_pos[ - self.model.body_name2id("faucetBase") + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "faucetBase") ] = self.obj_init_pos self._target_pos = self.obj_init_pos + np.array( [-self._handle_length, 0.0, 0.125] ) - + mujoco.mj_forward(self.model, self.data) return self._get_obs() def _reset_hand(self): @@ -135,3 +140,23 @@ def compute_reward(self, action, obs): reward = 10 if target_to_obj <= self._target_radius else reward return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped, in_place) + + +class TrainFaucetClosev2(SawyerFaucetCloseEnvV2): + tasks = None + + def __init__(self): + SawyerFaucetCloseEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestFaucetClosev2(SawyerFaucetCloseEnvV2): + tasks = None + + def __init__(self): + SawyerFaucetCloseEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_faucet_open_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_faucet_open_v2.py index b4a9004de..21ef8d449 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_faucet_open_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_faucet_open_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -10,7 +11,7 @@ class SawyerFaucetOpenEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, -0.15) hand_high = (0.5, 1, 0.5) obj_low = (-0.05, 0.8, 0.0) @@ -22,8 +23,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0, 0.8, 0.0]), "hand_init_pos": np.array([0.0, 0.4, 0.2]), @@ -78,7 +83,7 @@ def _get_pos_objects(self): return self._get_site_pos("handleStartOpen") + np.array([0.0, 0.0, -0.01]) def _get_quat_objects(self): - return self.sim.data.get_body_xquat("faucetBase") + return self.data.body("faucetBase").xquat def reset_model(self): self._reset_hand() @@ -86,14 +91,14 @@ def reset_model(self): # Compute faucet position self.obj_init_pos = self._get_state_rand_vec() # Set mujoco body to computed position - self.sim.model.body_pos[ - self.model.body_name2id("faucetBase") + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "faucetBase") ] = self.obj_init_pos self._target_pos = self.obj_init_pos + np.array( [+self._handle_length, 0.0, 0.125] ) - + mujoco.mj_forward(self.model, self.data) return self._get_obs() def _reset_hand(self): @@ -138,3 +143,23 @@ def compute_reward(self, action, obs): reward = 10 if target_to_obj <= self._target_radius else reward return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped, in_place) + + +class TrainFaucetOpenv2(SawyerFaucetOpenEnvV2): + tasks = None + + def __init__(self): + SawyerFaucetOpenEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestFaucetOpenv2(SawyerFaucetOpenEnvV2): + tasks = None + + def __init__(self): + SawyerFaucetOpenEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hammer_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hammer_v2.py index 13e5ed502..6ed0e2f55 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hammer_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hammer_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -12,7 +13,7 @@ class SawyerHammerEnvV2(SawyerXYZEnv): HAMMER_HANDLE_LENGTH = 0.14 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.4, 0.0) @@ -24,8 +25,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "hammer_init_pos": np.array([0, 0.5, 0.0]), "hand_init_pos": np.array([0, 0.4, 0.2]), @@ -75,10 +80,7 @@ def _get_pos_objects(self): def _get_quat_objects(self): return np.hstack( - ( - self.sim.data.get_body_xquat("hammer"), - self.sim.data.get_body_xquat("nail_link"), - ) + (self.data.body("hammer").xquat, self.data.body("nail_link").xquat) ) def _set_hammer_xyz(self, pos): @@ -92,9 +94,9 @@ def reset_model(self): self._reset_hand() # Set position of box & nail (these are not randomized) - self.sim.model.body_pos[self.model.body_name2id("box")] = np.array( - [0.24, 0.85, 0.0] - ) + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "box") + ] = np.array([0.24, 0.85, 0.0]) # Update _target_pos self._target_pos = self._get_site_pos("goal") @@ -160,7 +162,7 @@ def compute_reward(self, actions, obs): reward = (2.0 * reward_grab + 6.0 * reward_in_place) * reward_quat # Override reward on success. We check that reward is above a threshold # because this env's success metric could be hacked easily - success = self.data.get_joint_qpos("NailSlideJoint") > 0.09 + success = self.data.joint("NailSlideJoint").qpos > 0.09 if success and reward > 5.0: reward = 10.0 @@ -171,3 +173,23 @@ def compute_reward(self, actions, obs): reward_in_place, success, ) + + +class TrainHammerv2(SawyerHammerEnvV2): + tasks = None + + def __init__(self): + SawyerHammerEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestHammerv2(SawyerHammerEnvV2): + tasks = None + + def __init__(self): + SawyerHammerEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hand_insert_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hand_insert_v2.py index 06150c7e1..32ea219f2 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hand_insert_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hand_insert_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -12,7 +12,7 @@ class SawyerHandInsertEnvV2(SawyerXYZEnv): TARGET_RADIUS = 0.05 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, -0.15) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.6, 0.05) @@ -24,8 +24,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0, 0.6, 0.05]), "obj_init_angle": 0.3, @@ -77,18 +81,17 @@ def evaluate_state(self, obs, action): @property def _get_id_main_object(self): - return self.unwrapped.model.geom_name2id("objGeom") + return self.model.geom("objGeom").id def _get_pos_objects(self): return self.get_body_com("obj") def _get_quat_objects(self): - return self.sim.data.get_body_xquat("obj") + return self.data.body("obj").xquat def reset_model(self): self._reset_hand() self.prev_obs = self._get_curr_obs_combined_no_goal() - self._target_pos = self.goal.copy() self.obj_init_angle = self.init_config["obj_init_angle"] self.objHeight = self.get_body_com("obj")[2] @@ -133,3 +136,23 @@ def compute_reward(self, action, obs): if target_to_obj < self.TARGET_RADIUS: reward = 10.0 return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped, in_place) + + +class TrainHandInsertv2(SawyerHandInsertEnvV2): + tasks = None + + def __init__(self): + SawyerHandInsertEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestHandInsertv2(SawyerHandInsertEnvV2): + tasks = None + + def __init__(self): + SawyerHandInsertEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_press_side_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_press_side_v2.py index 2df864364..9ab35aa61 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_press_side_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_press_side_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -25,18 +26,22 @@ class SawyerHandlePressSideEnvV2(SawyerXYZEnv): TARGET_RADIUS = 0.02 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) - hand_high = (0.5, 1, 0.5) + hand_high = (0.5, 1.0, 0.5) obj_low = (-0.35, 0.65, -0.001) - obj_high = (-0.25, 0.75, +0.001) + obj_high = (-0.25, 0.75, 0.001) super().__init__( self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([-0.3, 0.7, 0.0]), "hand_init_pos": np.array( @@ -104,7 +109,10 @@ def reset_model(self): self._reset_hand() self.obj_init_pos = self._get_state_rand_vec() - self.sim.model.body_pos[self.model.body_name2id("box")] = self.obj_init_pos + + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "box") + ] = self.obj_init_pos self._set_obj_xyz(-0.001) self._target_pos = self._get_site_pos("goalPress") self._handle_init_pos = self._get_pos_objects() @@ -113,8 +121,6 @@ def reset_model(self): def compute_reward(self, actions, obs): del actions - - # objPos = obs[4:7] obj = self._get_pos_objects() tcp = self.tcp_center target = self._target_pos.copy() @@ -147,3 +153,23 @@ def compute_reward(self, actions, obs): reward = 1 if target_to_obj <= self.TARGET_RADIUS else reward reward *= 10 return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped, in_place) + + +class TrainHandlePressSidev2(SawyerHandlePressSideEnvV2): + tasks = None + + def __init__(self): + SawyerHandlePressSideEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestHandlePressSidev2(SawyerHandlePressSideEnvV2): + tasks = None + + def __init__(self): + SawyerHandlePressSideEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_press_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_press_v2.py index 63fa0ada7..ac77fb84c 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_press_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_press_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -12,11 +13,11 @@ class SawyerHandlePressEnvV2(SawyerXYZEnv): TARGET_RADIUS = 0.02 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) - hand_high = (0.5, 1, 0.5) + hand_high = (0.5, 1.0, 0.5) obj_low = (-0.1, 0.8, -0.001) - obj_high = (0.1, 0.9, +0.001) + obj_high = (0.1, 0.9, 0.001) goal_low = (-0.1, 0.55, 0.04) goal_high = (0.1, 0.70, 0.08) @@ -24,8 +25,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0, 0.9, 0.0]), "hand_init_pos": np.array( @@ -90,12 +95,15 @@ def reset_model(self): self._reset_hand() self.obj_init_pos = self._get_state_rand_vec() - - self.sim.model.body_pos[self.model.body_name2id("box")] = self.obj_init_pos + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "box") + ] = self.obj_init_pos self._set_obj_xyz(-0.001) self._target_pos = self._get_site_pos("goalPress") self.maxDist = np.abs( - self.data.site_xpos[self.model.site_name2id("handleStart")][-1] + self.data.site_xpos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, "handleStart") + ][-1] - self._target_pos[-1] ) self.target_reward = 1000 * self.maxDist + 1000 * 2 @@ -105,8 +113,6 @@ def reset_model(self): def compute_reward(self, actions, obs): del actions - - # objPos = obs[4:7] obj = self._get_pos_objects() tcp = self.tcp_center target = self._target_pos.copy() @@ -139,3 +145,23 @@ def compute_reward(self, actions, obs): reward = 1 if target_to_obj <= self.TARGET_RADIUS else reward reward *= 10 return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped, in_place) + + +class TrainHandlePressv2(SawyerHandlePressEnvV2): + tasks = None + + def __init__(self): + SawyerHandlePressEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestHandlePressv2(SawyerHandlePressEnvV2): + tasks = None + + def __init__(self): + SawyerHandlePressEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_pull_side_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_pull_side_v2.py index 224b475a4..fea80b929 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_pull_side_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_pull_side_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -10,9 +11,9 @@ class SawyerHandlePullSideEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) - hand_high = (0.5, 1, 0.5) + hand_high = (0.5, 1.0, 0.5) obj_low = (-0.35, 0.65, 0.0) obj_high = (-0.25, 0.75, 0.0) @@ -20,8 +21,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([-0.3, 0.7, 0.0]), "hand_init_pos": np.array( @@ -48,7 +53,6 @@ def model_name(self): @_assert_task_is_set def evaluate_state(self, obs, action): obj = obs[4:7] - ( reward, tcp_to_obj, @@ -93,12 +97,15 @@ def reset_model(self): self._reset_hand() self.obj_init_pos = self._get_state_rand_vec() - - self.sim.model.body_pos[self.model.body_name2id("box")] = self.obj_init_pos + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "box") + ] = self.obj_init_pos self._set_obj_xyz(-0.1) self._target_pos = self._get_site_pos("goalPull") self.maxDist = np.abs( - self.data.site_xpos[self.model.site_name2id("handleStart")][-1] + self.data.site_xpos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, "handleStart") + ][-1] - self._target_pos[-1] ) self.target_reward = 1000 * self.maxDist + 1000 * 2 @@ -149,3 +156,23 @@ def compute_reward(self, action, obs): if target_to_obj < self.TARGET_RADIUS: reward = 10.0 return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped, in_place) + + +class TrainHandlePullSidev2(SawyerHandlePullSideEnvV2): + tasks = None + + def __init__(self): + SawyerHandlePullSideEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestHandlePullSidev2(SawyerHandlePullSideEnvV2): + tasks = None + + def __init__(self): + SawyerHandlePullSideEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_pull_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_pull_v2.py index 2710cf3cc..f29df9ca2 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_pull_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_handle_pull_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -10,11 +11,11 @@ class SawyerHandlePullEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) - hand_high = (0.5, 1, 0.5) + hand_high = (0.5, 1.0, 0.5) obj_low = (-0.1, 0.8, -0.001) - obj_high = (0.1, 0.9, +0.001) + obj_high = (0.1, 0.9, 0.001) goal_low = (-0.1, 0.55, 0.04) goal_high = (0.1, 0.70, 0.18) @@ -22,8 +23,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0, 0.9, 0.0]), "hand_init_pos": np.array( @@ -47,7 +52,6 @@ def model_name(self): @_assert_task_is_set def evaluate_state(self, obs, action): obj = obs[4:7] - ( reward, tcp_to_obj, @@ -92,7 +96,9 @@ def reset_model(self): self._reset_hand() self.obj_init_pos = self._get_state_rand_vec() - self.sim.model.body_pos[self.model.body_name2id("box")] = self.obj_init_pos + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "box") + ] = self.obj_init_pos self._set_obj_xyz(-0.1) self._target_pos = self._get_site_pos("goalPull") @@ -126,13 +132,32 @@ def compute_reward(self, action, obs): tcp_opened = obs[3] tcp_to_obj = np.linalg.norm(obj - self.tcp_center) - if ( tcp_to_obj < 0.035 and tcp_opened > 0 - and obj[2] - 0.01 > self.obj_init_pos[2] + and obj[1] - 0.01 > self.obj_init_pos[2] ): reward += 1.0 + 5.0 * in_place if target_to_obj < self.TARGET_RADIUS: reward = 10.0 return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped, in_place) + + +class TrainHandlePullv2(SawyerHandlePullEnvV2): + tasks = None + + def __init__(self): + SawyerHandlePullEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestHandlePullv2(SawyerHandlePullEnvV2): + tasks = None + + def __init__(self): + SawyerHandlePullEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_lever_pull_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_lever_pull_v2.py index 176470c78..af67a7ad8 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_lever_pull_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_lever_pull_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -26,7 +27,7 @@ class SawyerLeverPullEnvV2(SawyerXYZEnv): LEVER_RADIUS = 0.2 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, -0.15) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.7, 0.0) @@ -36,8 +37,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0, 0.7, 0.0]), "hand_init_pos": np.array([0, 0.4, 0.2], dtype=np.float32), @@ -89,19 +94,22 @@ def _get_pos_objects(self): return self._get_site_pos("leverStart") def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("objGeom")).as_quat() + geom_xmat = self.data.geom("objGeom").xmat.reshape(3, 3) + return Rotation.from_matrix(geom_xmat).as_quat() def reset_model(self): self._reset_hand() self.obj_init_pos = self._get_state_rand_vec() - self.sim.model.body_pos[self.model.body_name2id("lever")] = self.obj_init_pos - + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "lever") + ] = self.obj_init_pos self._lever_pos_init = self.obj_init_pos + np.array( [0.12, -self.LEVER_RADIUS, 0.25] ) self._target_pos = self.obj_init_pos + np.array( [0.12, 0.0, 0.25 + self.LEVER_RADIUS] ) + mujoco.mj_forward(self.model, self.data) return self._get_obs() def compute_reward(self, action, obs): @@ -131,7 +139,7 @@ def compute_reward(self, action, obs): # The skill of the agent should be measured by its ability to get the # lever to point straight upward. This means we'll be measuring the # current angle of the lever's joint, and comparing with 90deg. - lever_angle = -self.data.get_joint_qpos("LeverAxis") + lever_angle = -self.data.joint("LeverAxis").qpos lever_angle_desired = np.pi / 2.0 lever_error = abs(lever_angle - lever_angle_desired) @@ -166,3 +174,23 @@ def compute_reward(self, action, obs): lever_error, lever_engagement, ) + + +class TrainLeverPullv2(SawyerLeverPullEnvV2): + tasks = None + + def __init__(self): + SawyerLeverPullEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestLeverPullv2(SawyerLeverPullEnvV2): + tasks = None + + def __init__(self): + SawyerLeverPullEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_peg_insertion_side_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_peg_insertion_side_v2.py index c50c74f2e..568457f6f 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_peg_insertion_side_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_peg_insertion_side_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -29,7 +30,7 @@ class SawyerPegInsertionSideEnvV2(SawyerXYZEnv): the hole's position, as opposed to hand_low and hand_high """ - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_init_pos = (0, 0.6, 0.2) hand_low = (-0.5, 0.40, 0.05) @@ -43,8 +44,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0, 0.6, 0.02]), "hand_init_pos": np.array([0, 0.6, 0.2]), @@ -108,24 +113,21 @@ def _get_pos_objects(self): return self._get_site_pos("pegGrasp") def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_site_xmat("pegGrasp")).as_quat() + geom_xmat = self.data.site("pegGrasp").xmat.reshape(3, 3) + return Rotation.from_matrix(geom_xmat).as_quat() def reset_model(self): self._reset_hand() - - pos_peg = self.obj_init_pos - pos_box = self.goal pos_peg, pos_box = np.split(self._get_state_rand_vec(), 2) while np.linalg.norm(pos_peg[:2] - pos_box[:2]) < 0.1: pos_peg, pos_box = np.split(self._get_state_rand_vec(), 2) - self.obj_init_pos = pos_peg self.peg_head_pos_init = self._get_site_pos("pegHead") self._set_obj_xyz(self.obj_init_pos) - - self.sim.model.body_pos[self.model.body_name2id("box")] = pos_box + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "box") + ] = pos_box self._target_pos = pos_box + np.array([0.03, 0.0, 0.13]) - return self._get_obs() def compute_reward(self, action, obs): @@ -208,3 +210,23 @@ def compute_reward(self, action, obs): collision_boxes, ip_orig, ] + + +class TrainPegInsertionSidev2(SawyerPegInsertionSideEnvV2): + tasks = None + + def __init__(self): + SawyerPegInsertionSideEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestPegInsertionSidev2(SawyerPegInsertionSideEnvV2): + tasks = None + + def __init__(self): + SawyerPegInsertionSideEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_peg_unplug_side_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_peg_unplug_side_v2.py index 70257fd22..1b2d776ec 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_peg_unplug_side_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_peg_unplug_side_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -10,7 +11,7 @@ class SawyerPegUnplugSideEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.25, 0.6, -0.001) @@ -22,8 +23,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([-0.225, 0.6, 0.05]), "hand_init_pos": np.array((0, 0.6, 0.2)), @@ -74,7 +79,7 @@ def _get_pos_objects(self): return self._get_site_pos("pegEnd") def _get_quat_objects(self): - return self.sim.data.get_body_xquat("plug1") + return self.data.body("plug1").xquat def _set_obj_xyz(self, pos): qpos = self.data.qpos.flat.copy() @@ -88,8 +93,9 @@ def reset_model(self): self._reset_hand() pos_box = self._get_state_rand_vec() - self.sim.model.body_pos[self.model.body_name2id("box")] = pos_box - + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "box") + ] = pos_box pos_plug = pos_box + np.array([0.044, 0.0, 0.131]) self._set_obj_xyz(pos_plug) self.obj_init_pos = self._get_site_pos("pegEnd") @@ -147,3 +153,23 @@ def compute_reward(self, action, obs): in_place, float(grasp_success), ) + + +class TrainPegUnplugSidev2(SawyerPegUnplugSideEnvV2): + tasks = None + + def __init__(self): + SawyerPegUnplugSideEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestPegUnplugSidev2(SawyerPegUnplugSideEnvV2): + tasks = None + + def __init__(self): + SawyerPegUnplugSideEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_out_of_hole_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_out_of_hole_v2.py index 8a9341dd0..b3ffeb807 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_out_of_hole_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_out_of_hole_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -12,7 +12,7 @@ class SawyerPickOutOfHoleEnvV2(SawyerXYZEnv): _TARGET_RADIUS = 0.02 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, -0.05) hand_high = (0.5, 1, 0.5) obj_low = (0, 0.75, 0.02) @@ -24,8 +24,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0, 0.6, 0.0]), "obj_init_angle": 0.3, @@ -88,7 +92,7 @@ def _get_pos_objects(self): return self.get_body_com("obj") def _get_quat_objects(self): - return self.sim.data.get_body_xquat("obj") + return self.data.body("obj").xquat def reset_model(self): self._reset_hand() @@ -163,3 +167,23 @@ def compute_reward(self, action, obs): object_grasped, in_place, ) + + +class TrainPickOutOfHolev2(SawyerPickOutOfHoleEnvV2): + tasks = None + + def __init__(self): + SawyerPickOutOfHoleEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestPickOutOfHolev2(SawyerPickOutOfHoleEnvV2): + tasks = None + + def __init__(self): + SawyerPickOutOfHoleEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_place_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_place_v2.py index 5ac84f1fd..5cbbdac9e 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_place_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_place_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -25,7 +25,7 @@ class SawyerPickPlaceEnvV2(SawyerXYZEnv): - (6/15/20) Separated reach-push-pick-place into 3 separate envs. """ - def __init__(self): + def __init__(self, tasks=None, render_mode=None): goal_low = (-0.1, 0.8, 0.05) goal_high = (0.1, 0.9, 0.3) hand_low = (-0.5, 0.40, 0.05) @@ -37,8 +37,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_angle": 0.3, "obj_init_pos": np.array([0, 0.6, 0.02]), @@ -97,13 +101,15 @@ def evaluate_state(self, obs, action): @property def _get_id_main_object(self): - return self.unwrapped.model.geom_name2id("objGeom") + return self.data.geom("objGeom").id def _get_pos_objects(self): return self.get_body_com("obj") def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("objGeom")).as_quat() + return Rotation.from_matrix( + self.data.geom("objGeom").xmat.reshape(3, 3) + ).as_quat() def fix_extreme_obj_pos(self, orig_init_pos): # This is to account for meshes for the geom and object are not @@ -133,7 +139,6 @@ def reset_model(self): self.init_right_pad = self.get_body_com("rightpad") self._set_obj_xyz(self.obj_init_pos) - self.num_resets += 1 return self._get_obs() @@ -230,3 +235,23 @@ def compute_reward(self, action, obs): if obj_to_target < _TARGET_RADIUS: reward = 10.0 return [reward, tcp_to_obj, tcp_opened, obj_to_target, object_grasped, in_place] + + +class TrainPickPlacev2(SawyerPickPlaceEnvV2): + tasks = None + + def __init__(self): + SawyerPickPlaceEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestPickPlacev2(SawyerPickPlaceEnvV2): + tasks = None + + def __init__(self): + SawyerPickPlaceEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_place_wall_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_place_wall_v2.py index 7c7785bb1..b16401988 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_place_wall_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_pick_place_wall_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -26,7 +26,7 @@ class SawyerPickPlaceWallEnvV2(SawyerXYZEnv): reach-push-pick-place-wall. """ - def __init__(self): + def __init__(self, tasks=None, render_mode=None): goal_low = (-0.05, 0.85, 0.05) goal_high = (0.05, 0.9, 0.3) hand_low = (-0.5, 0.40, 0.05) @@ -38,8 +38,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_angle": 0.3, "obj_init_pos": np.array([0, 0.6, 0.02]), @@ -96,19 +100,21 @@ def evaluate_state(self, obs, action): return reward, info def _get_pos_objects(self): - return self.data.get_geom_xpos("objGeom") + return self.data.geom("objGeom").xpos def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("objGeom")).as_quat() + return Rotation.from_matrix( + self.data.geom("objGeom").xmat.reshape(3, 3) + ).as_quat() def adjust_initObjPos(self, orig_init_pos): # This is to account for meshes for the geom and object are not aligned # If this is not done, the object could be initialized in an extreme position - diff = self.get_body_com("obj")[:2] - self.data.get_geom_xpos("objGeom")[:2] + diff = self.get_body_com("obj")[:2] - self.data.geom("objGeom").xpos[:2] adjustedPos = orig_init_pos[:2] + diff # The convention we follow is that body_com[2] is always 0, and geom_pos[2] is the object height - return [adjustedPos[0], adjustedPos[1], self.data.get_geom_xpos("objGeom")[-1]] + return [adjustedPos[0], adjustedPos[1], self.data.geom("objGeom").xpos[-1]] def reset_model(self): self._reset_hand() @@ -125,7 +131,6 @@ def reset_model(self): self.obj_init_pos = goal_pos[:3] self._set_obj_xyz(self.obj_init_pos) - self.num_resets += 1 return self._get_obs() @@ -197,3 +202,23 @@ def compute_reward(self, action, obs): object_grasped, in_place_part2, ] + + +class TrainPickPlaceWallv2(SawyerPickPlaceWallEnvV2): + tasks = None + + def __init__(self): + SawyerPickPlaceWallEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestPickPlaceWallv2(SawyerPickPlaceWallEnvV2): + tasks = None + + def __init__(self): + SawyerPickPlaceWallEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_plate_slide_back_side_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_plate_slide_back_side_v2.py index b2a49f3ce..9ae3cef37 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_plate_slide_back_side_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_plate_slide_back_side_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -26,7 +27,7 @@ class SawyerPlateSlideBackSideEnvV2(SawyerXYZEnv): - (6/22/20) Cabinet now sits on ground, instead of .02 units above it """ - def __init__(self): + def __init__(self, tasks=None, render_mode=None): goal_low = (-0.05, 0.6, 0.015) goal_high = (0.15, 0.6, 0.015) hand_low = (-0.5, 0.40, 0.05) @@ -38,8 +39,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_angle": 0.3, "obj_init_pos": np.array([-0.25, 0.6, 0.02], dtype=np.float32), @@ -86,10 +91,11 @@ def evaluate_state(self, obs, action): return reward, info def _get_pos_objects(self): - return self.data.get_geom_xpos("puck") + return self.data.geom("puck").xpos def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("puck")).as_quat() + geom_xmat = self.data.geom("puck").xmat.reshape(3, 3) + return Rotation.from_matrix(geom_xmat).as_quat() def _get_obs_dict(self): return dict( @@ -113,9 +119,8 @@ def reset_model(self): rand_vec = self._get_state_rand_vec() self.obj_init_pos = rand_vec[:3] self._target_pos = rand_vec[3:] - - self.sim.model.body_pos[ - self.model.body_name2id("puck_goal") + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "puck_goal") ] = self.obj_init_pos self._set_obj_xyz(np.array([-0.15, 0.0])) @@ -154,3 +159,23 @@ def compute_reward(self, actions, obs): if obj_to_target < _TARGET_RADIUS: reward = 10.0 return [reward, tcp_to_obj, tcp_opened, obj_to_target, object_grasped, in_place] + + +class TrainPlateSlideBackSidev2(SawyerPlateSlideBackSideEnvV2): + tasks = None + + def __init__(self): + SawyerPlateSlideBackSideEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestPlateSlideBackSidev2(SawyerPlateSlideBackSideEnvV2): + tasks = None + + def __init__(self): + SawyerPlateSlideBackSideEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_plate_slide_back_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_plate_slide_back_v2.py index fec7fffbc..546169115 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_plate_slide_back_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_plate_slide_back_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -11,7 +11,7 @@ class SawyerPlateSlideBackEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): goal_low = (-0.1, 0.6, 0.015) goal_high = (0.1, 0.6, 0.015) hand_low = (-0.5, 0.40, 0.05) @@ -23,8 +23,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_angle": 0.3, "obj_init_pos": np.array([0.0, 0.85, 0.0], dtype=np.float32), @@ -71,10 +75,11 @@ def evaluate_state(self, obs, action): return reward, info def _get_pos_objects(self): - return self.data.get_geom_xpos("puck") + return self.data.geom("puck").xpos def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("puck")).as_quat() + geom_xmat = self.data.geom("puck").xmat.reshape(3, 3) + return Rotation.from_matrix(geom_xmat).as_quat() def _set_obj_xyz(self, pos): qpos = self.data.qpos.flat.copy() @@ -91,10 +96,7 @@ def reset_model(self): rand_vec = self._get_state_rand_vec() self.obj_init_pos = rand_vec[:3] self._target_pos = rand_vec[3:] - - self.sim.model.body_pos[ - self.model.body_name2id("puck_goal") - ] = self.obj_init_pos + self.data.body("puck_goal").xpos = self._target_pos self._set_obj_xyz(np.array([0, 0.15])) return self._get_obs() @@ -132,3 +134,23 @@ def compute_reward(self, actions, obs): if obj_to_target < _TARGET_RADIUS: reward = 10.0 return [reward, tcp_to_obj, tcp_opened, obj_to_target, object_grasped, in_place] + + +class TrainPlateSlideBackv2(SawyerPlateSlideBackEnvV2): + tasks = None + + def __init__(self): + SawyerPlateSlideBackEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestPlateSlideBackv2(SawyerPlateSlideBackEnvV2): + tasks = None + + def __init__(self): + SawyerPlateSlideBackEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_plate_slide_side_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_plate_slide_side_v2.py index 48e3c62f0..652ced72d 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_plate_slide_side_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_plate_slide_side_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -11,7 +11,7 @@ class SawyerPlateSlideSideEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): goal_low = (-0.3, 0.54, 0.0) goal_high = (-0.25, 0.66, 0.0) hand_low = (-0.5, 0.40, 0.05) @@ -23,8 +23,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_angle": 0.3, "obj_init_pos": np.array([0.0, 0.6, 0.0], dtype=np.float32), @@ -71,10 +75,11 @@ def evaluate_state(self, obs, action): return reward, info def _get_pos_objects(self): - return self.data.get_geom_xpos("puck") + return self.data.geom("puck").xpos def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("puck")).as_quat() + geom_xmat = self.data.geom("puck").xmat.reshape(3, 3) + return Rotation.from_matrix(geom_xmat).as_quat() def _set_obj_xyz(self, pos): qpos = self.data.qpos.flat.copy() @@ -91,8 +96,7 @@ def reset_model(self): rand_vec = self._get_state_rand_vec() self.obj_init_pos = rand_vec[:3] self._target_pos = rand_vec[3:] - - self.sim.model.body_pos[self.model.body_name2id("puck_goal")] = self._target_pos + self.data.body("puck_goal").xpos = self._target_pos self._set_obj_xyz(np.zeros(2)) return self._get_obs() @@ -133,3 +137,23 @@ def compute_reward(self, actions, obs): if obj_to_target < _TARGET_RADIUS: reward = 10.0 return [reward, tcp_to_obj, tcp_opened, obj_to_target, object_grasped, in_place] + + +class TrainPlateSlideSidev2(SawyerPlateSlideSideEnvV2): + tasks = None + + def __init__(self): + SawyerPlateSlideSideEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestPlateSlideSidev2(SawyerPlateSlideSideEnvV2): + tasks = None + + def __init__(self): + SawyerPlateSlideSideEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_plate_slide_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_plate_slide_v2.py index 7af51a402..18c0751fa 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_plate_slide_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_plate_slide_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -13,7 +13,7 @@ class SawyerPlateSlideEnvV2(SawyerXYZEnv): OBJ_RADIUS = 0.04 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): goal_low = (-0.1, 0.85, 0.0) goal_high = (0.1, 0.9, 0.0) hand_low = (-0.5, 0.40, 0.05) @@ -25,8 +25,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_angle": 0.3, "obj_init_pos": np.array([0.0, 0.6, 0.0], dtype=np.float32), @@ -73,10 +77,11 @@ def evaluate_state(self, obs, action): return reward, info def _get_pos_objects(self): - return self.data.get_geom_xpos("puck") + return self.data.geom("puck").xpos def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("puck")).as_quat() + geom_xmat = self.data.geom("puck").xmat.reshape(3, 3) + return Rotation.from_matrix(geom_xmat).as_quat() def _set_obj_xyz(self, pos): qpos = self.data.qpos.flat.copy() @@ -95,7 +100,7 @@ def reset_model(self): self.obj_init_pos = rand_vec[:3] self._target_pos = rand_vec[3:] - self.sim.model.body_pos[self.model.body_name2id("puck_goal")] = self._target_pos + self.model.body("puck_goal").pos = self._target_pos self._set_obj_xyz(np.zeros(2)) return self._get_obs() @@ -135,3 +140,23 @@ def compute_reward(self, action, obs): if obj_to_target < _TARGET_RADIUS: reward = 10.0 return [reward, tcp_to_obj, tcp_opened, obj_to_target, object_grasped, in_place] + + +class TrainPlateSlidev2(SawyerPlateSlideEnvV2): + tasks = None + + def __init__(self): + SawyerPlateSlideEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestPlateSlidev2(SawyerPlateSlideEnvV2): + tasks = None + + def __init__(self): + SawyerPlateSlideEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_back_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_back_v2.py index 06b09520c..1f8373aa8 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_back_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_back_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -14,7 +14,7 @@ class SawyerPushBackEnvV2(SawyerXYZEnv): OBJ_RADIUS = 0.007 TARGET_RADIUS = 0.05 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): goal_low = (-0.1, 0.6, 0.0199) goal_high = (0.1, 0.7, 0.0201) hand_low = (-0.5, 0.40, 0.05) @@ -26,8 +26,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0, 0.8, 0.02]), "obj_init_angle": 0.3, @@ -79,19 +83,21 @@ def evaluate_state(self, obs, action): return reward, info def _get_pos_objects(self): - return self.data.get_geom_xpos("objGeom") + return self.data.geom("objGeom").xpos def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("objGeom")).as_quat() + return Rotation.from_matrix( + self.data.geom("objGeom").xmat.reshape(3, 3) + ).as_quat() def adjust_initObjPos(self, orig_init_pos): # This is to account for meshes for the geom and object are not aligned # If this is not done, the object could be initialized in an extreme position - diff = self.get_body_com("obj")[:2] - self.data.get_geom_xpos("objGeom")[:2] + diff = self.get_body_com("obj")[:2] - self.data.geom("objGeom").xpos[:2] adjustedPos = orig_init_pos[:2] + diff # The convention we follow is that body_com[2] is always 0, and geom_pos[2] is the object height - return [adjustedPos[0], adjustedPos[1], self.data.get_geom_xpos("objGeom")[-1]] + return [adjustedPos[0], adjustedPos[1], self.data.geom("objGeom").xpos[-1]] def reset_model(self): self._reset_hand() @@ -224,3 +230,23 @@ def compute_reward(self, action, obs): if target_to_obj < self.TARGET_RADIUS: reward = 10.0 return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped, in_place) + + +class TrainPushBackv2(SawyerPushBackEnvV2): + tasks = None + + def __init__(self): + SawyerPushBackEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestPushBackv2(SawyerPushBackEnvV2): + tasks = None + + def __init__(self): + SawyerPushBackEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_v2.py index 00848b508..52122c050 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -27,7 +27,7 @@ class SawyerPushEnvV2(SawyerXYZEnv): TARGET_RADIUS = 0.05 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.6, 0.02) @@ -39,8 +39,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_angle": 0.3, "obj_init_pos": np.array([0.0, 0.6, 0.02]), @@ -99,7 +103,8 @@ def evaluate_state(self, obs, action): return reward, info def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("objGeom")).as_quat() + geom_xmat = self.data.geom("objGeom").xmat.reshape(3, 3) + return Rotation.from_matrix(geom_xmat).as_quat() def _get_pos_objects(self): return self.get_body_com("obj") @@ -131,7 +136,6 @@ def reset_model(self): self.obj_init_pos = np.concatenate((goal_pos[:2], [self.obj_init_pos[-1]])) self._set_obj_xyz(self.obj_init_pos) - self.num_resets += 1 return self._get_obs() @@ -164,5 +168,24 @@ def compute_reward(self, action, obs): reward += 1.0 + reward + 5.0 * in_place if target_to_obj < self.TARGET_RADIUS: reward = 10.0 - return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped, in_place) + + +class TrainPushv2(SawyerPushEnvV2): + tasks = None + + def __init__(self): + SawyerPushEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestPushv2(SawyerPushEnvV2): + tasks = None + + def __init__(self): + SawyerPushEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_wall_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_wall_v2.py index 9841bf49c..d4ee92936 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_wall_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_wall_v2.py @@ -1,7 +1,7 @@ """Version 2 of SawyerPushWallEnv.""" import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -30,7 +30,7 @@ class SawyerPushWallEnvV2(SawyerXYZEnv): OBJ_RADIUS = 0.02 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.05, 0.6, 0.015) @@ -42,8 +42,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_angle": 0.3, "obj_init_pos": np.array([0, 0.6, 0.02]), @@ -99,15 +103,16 @@ def evaluate_state(self, obs, action): return reward, info def _get_pos_objects(self): - return self.data.get_geom_xpos("objGeom") + return self.data.geom("objGeom").xpos def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("objGeom")).as_quat() + geom_xmat = self.data.geom("objGeom").xmat.reshape(3, 3) + return Rotation.from_matrix(geom_xmat).as_quat() def adjust_initObjPos(self, orig_init_pos): - diff = self.get_body_com("obj")[:2] - self.data.get_geom_xpos("objGeom")[:2] + diff = self.get_body_com("obj")[:2] - self.data.geom("objGeom").xpos[:2] adjustedPos = orig_init_pos[:2] + diff - return [adjustedPos[0], adjustedPos[1], self.data.get_geom_xpos("objGeom")[-1]] + return [adjustedPos[0], adjustedPos[1], self.data.geom("objGeom").xpos[-1]] def reset_model(self): self._reset_hand() @@ -124,7 +129,6 @@ def reset_model(self): self.obj_init_pos = np.concatenate((goal_pos[:2], [self.obj_init_pos[-1]])) self._set_obj_xyz(self.obj_init_pos) - self.num_resets += 1 return self._get_obs() def compute_reward(self, action, obs): @@ -187,3 +191,23 @@ def compute_reward(self, action, obs): object_grasped, in_place_part2, ] + + +class TrainPushWallv2(SawyerPushWallEnvV2): + tasks = None + + def __init__(self): + SawyerPushWallEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestPushWallv2(SawyerPushWallEnvV2): + tasks = None + + def __init__(self): + SawyerPushWallEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_v2.py index 4073b4d94..7b3f9cd6a 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -25,7 +26,7 @@ class SawyerReachEnvV2(SawyerXYZEnv): - (6/15/20) Separated reach-push-pick-place into 3 separate envs. """ - def __init__(self): + def __init__(self, tasks=None, render_mode=None): goal_low = (-0.1, 0.8, 0.05) goal_high = (0.1, 0.9, 0.3) hand_low = (-0.5, 0.40, 0.05) @@ -37,8 +38,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_angle": 0.3, "obj_init_pos": np.array([0.0, 0.6, 0.02]), @@ -57,8 +62,6 @@ def __init__(self): ) self.goal_space = Box(np.array(goal_low), np.array(goal_high)) - self.num_resets = 0 - @property def model_name(self): return full_v2_path_for("sawyer_xyz/sawyer_reach_v2.xml") @@ -84,7 +87,8 @@ def _get_pos_objects(self): return self.get_body_com("obj") def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("objGeom")).as_quat() + geom_xmat = self.data.geom("objGeom").xmat.reshape(3, 3) + return Rotation.from_matrix(geom_xmat).as_quat() def fix_extreme_obj_pos(self, orig_init_pos): # This is to account for meshes for the geom and object are not @@ -109,10 +113,8 @@ def reset_model(self): self._target_pos = goal_pos[3:] self._target_pos = goal_pos[-3:] self.obj_init_pos = goal_pos[:3] - self._set_obj_xyz(self.obj_init_pos) - self.num_resets += 1 - + mujoco.mj_forward(self.model, self.data) return self._get_obs() def compute_reward(self, actions, obs): @@ -134,3 +136,23 @@ def compute_reward(self, actions, obs): ) return [10 * in_place, tcp_to_target, in_place] + + +class TrainReachv2(SawyerReachEnvV2): + tasks = None + + def __init__(self): + SawyerReachEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestReachv2(SawyerReachEnvV2): + tasks = None + + def __init__(self): + SawyerReachEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_wall_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_wall_v2.py index 7c1095a58..cf9049049 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_wall_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_wall_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -25,7 +25,7 @@ class SawyerReachWallEnvV2(SawyerXYZEnv): i.e. (self._target_pos - pos_hand) """ - def __init__(self): + def __init__(self, tasks=None, render_mode=None): goal_low = (-0.05, 0.85, 0.05) goal_high = (0.05, 0.9, 0.3) hand_low = (-0.5, 0.40, 0.05) @@ -37,8 +37,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_angle": 0.3, "obj_init_pos": np.array([0, 0.6, 0.02]), @@ -84,7 +88,8 @@ def _get_pos_objects(self): return self.get_body_com("obj") def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("objGeom")).as_quat() + geom_xmat = self.data.geom("objGeom").xmat.reshape(3, 3) + return Rotation.from_matrix(geom_xmat).as_quat() def reset_model(self): self._reset_hand() @@ -100,7 +105,6 @@ def reset_model(self): self.obj_init_pos = goal_pos[:3] self._set_obj_xyz(self.obj_init_pos) - self.num_resets += 1 return self._get_obs() @@ -123,3 +127,23 @@ def compute_reward(self, actions, obs): ) return [10 * in_place, tcp_to_target, in_place] + + +class TrainReachWallv2(SawyerReachWallEnvV2): + tasks = None + + def __init__(self): + SawyerReachWallEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestReachWallv2(SawyerReachWallEnvV2): + tasks = None + + def __init__(self): + SawyerReachWallEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_shelf_place_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_shelf_place_v2.py index 421463a6f..9ac7f33de 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_shelf_place_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_shelf_place_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -11,7 +12,7 @@ class SawyerShelfPlaceEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): goal_low = (-0.1, 0.8, 0.299) goal_high = (0.1, 0.9, 0.301) hand_low = (-0.5, 0.40, 0.05) @@ -23,8 +24,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0, 0.6, 0.02]), "obj_init_angle": 0.3, @@ -82,12 +87,13 @@ def _get_pos_objects(self): return self.get_body_com("obj") def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("objGeom")).as_quat() + geom_xmat = self.data.geom("objGeom").xmat.reshape(3, 3) + return Rotation.from_matrix(geom_xmat).as_quat() def adjust_initObjPos(self, orig_init_pos): # This is to account for meshes for the geom and object are not aligned # If this is not done, the object could be initialized in an extreme position - diff = self.get_body_com("obj")[:2] - self.data.get_geom_xpos("objGeom")[:2] + diff = self.get_body_com("obj")[:2] - self.data.geom("objGeom").xpos[:2] adjustedPos = orig_init_pos[:2] + diff # The convention we follow is that body_com[2] is always 0, and geom_pos[2] is the object height @@ -95,13 +101,6 @@ def adjust_initObjPos(self, orig_init_pos): def reset_model(self): self._reset_hand() - self.sim.model.body_pos[ - self.model.body_name2id("shelf") - ] = self.goal.copy() - np.array([0, 0, 0.3]) - self._target_pos = ( - self.sim.model.site_pos[self.model.site_name2id("goal")] - + self.sim.model.body_pos[self.model.body_name2id("shelf")] - ) self.obj_init_pos = self.adjust_initObjPos(self.init_config["obj_init_pos"]) self.obj_init_angle = self.init_config["obj_init_angle"] @@ -112,14 +111,21 @@ def reset_model(self): self.obj_init_pos = np.concatenate( (base_shelf_pos[:2], [self.obj_init_pos[-1]]) ) - self.sim.model.body_pos[self.model.body_name2id("shelf")] = base_shelf_pos[-3:] + + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "shelf") + ] = base_shelf_pos[-3:] + mujoco.mj_forward(self.model, self.data) self._target_pos = ( - self.sim.model.site_pos[self.model.site_name2id("goal")] - + self.sim.model.body_pos[self.model.body_name2id("shelf")] + self.model.site_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, "goal") + ] + + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "shelf") + ] ) self._set_obj_xyz(self.obj_init_pos) - self.num_resets += 1 return self._get_obs() @@ -180,5 +186,24 @@ def compute_reward(self, action, obs): if obj_to_target < _TARGET_RADIUS: reward = 10.0 - return [reward, tcp_to_obj, tcp_opened, obj_to_target, object_grasped, in_place] + + +class TrainShelfPlacev2(SawyerShelfPlaceEnvV2): + tasks = None + + def __init__(self): + SawyerShelfPlaceEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestShelfPlacev2(SawyerShelfPlaceEnvV2): + tasks = None + + def __init__(self): + SawyerShelfPlaceEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_soccer_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_soccer_v2.py index 06bec0b99..d09c05014 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_soccer_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_soccer_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -14,7 +15,7 @@ class SawyerSoccerEnvV2(SawyerXYZEnv): OBJ_RADIUS = 0.013 TARGET_RADIUS = 0.07 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): goal_low = (-0.1, 0.8, 0.0) goal_high = (0.1, 0.9, 0.0) hand_low = (-0.5, 0.40, 0.05) @@ -26,8 +27,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0, 0.6, 0.03]), "obj_init_angle": 0.3, @@ -83,7 +88,8 @@ def _get_pos_objects(self): return self.get_body_com("soccer_ball") def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_body_xmat("soccer_ball")).as_quat() + geom_xmat = self.data.body("soccer_ball").xmat.reshape(3, 3) + return Rotation.from_matrix(geom_xmat).as_quat() def reset_model(self): self._reset_hand() @@ -96,10 +102,9 @@ def reset_model(self): goal_pos = self._get_state_rand_vec() self._target_pos = goal_pos[3:] self.obj_init_pos = np.concatenate((goal_pos[:2], [self.obj_init_pos[-1]])) - self.sim.model.body_pos[ - self.model.body_name2id("goal_whole") + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "goal_whole") ] = self._target_pos - self._set_obj_xyz(self.obj_init_pos) self.maxPushDist = np.linalg.norm( self.obj_init_pos[:2] - np.array(self._target_pos)[:2] @@ -228,3 +233,23 @@ def compute_reward(self, action, obs): object_grasped, in_place, ) + + +class TrainSoccerv2(SawyerSoccerEnvV2): + tasks = None + + def __init__(self): + SawyerSoccerEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestSoccerv2(SawyerSoccerEnvV2): + tasks = None + + def __init__(self): + SawyerSoccerEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_stick_pull_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_stick_pull_v2.py index 41d8bf83d..8acd36c2d 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_stick_pull_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_stick_pull_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -11,7 +11,7 @@ class SawyerStickPullEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.35, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.55, 0.000) @@ -23,8 +23,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "stick_init_pos": np.array([0, 0.6, 0.02]), "hand_init_pos": np.array([0, 0.6, 0.2]), @@ -93,10 +97,18 @@ def _get_pos_objects(self): ) def _get_quat_objects(self): + geom_xmat = self.data.body("stick").xmat.reshape(3, 3) return np.hstack( ( - Rotation.from_matrix(self.data.get_body_xmat("stick")).as_quat(), - np.array([0.0, 0.0, 0.0, 0.0]), + Rotation.from_matrix(geom_xmat).as_quat(), + np.array( + [ + 0.0, + 0.0, + 0.0, + 0.0, + ] + ), ) ) @@ -233,3 +245,23 @@ def compute_reward(self, action, obs): object_grasped, stick_in_place, ] + + +class TrainStickPullv2(SawyerStickPullEnvV2): + tasks = None + + def __init__(self): + SawyerStickPullEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestStickPullv2(SawyerStickPullEnvV2): + tasks = None + + def __init__(self): + SawyerStickPullEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_stick_push_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_stick_push_v2.py index 81ca6bad0..e83c69509 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_stick_push_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_stick_push_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -11,7 +11,7 @@ class SawyerStickPushEnvV2(SawyerXYZEnv): - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.08, 0.58, 0.000) @@ -23,8 +23,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "stick_init_pos": np.array([-0.1, 0.6, 0.02]), "hand_init_pos": np.array([0, 0.6, 0.2]), @@ -88,10 +92,18 @@ def _get_pos_objects(self): ) def _get_quat_objects(self): + geom_xmat = self.data.body("stick").xmat.reshape(3, 3) return np.hstack( ( - Rotation.from_matrix(self.data.get_body_xmat("stick")).as_quat(), - np.array([0.0, 0.0, 0.0, 0.0]), + Rotation.from_matrix(geom_xmat).as_quat(), + np.array( + [ + 0.0, + 0.0, + 0.0, + 0.0, + ] + ), ) ) @@ -283,7 +295,6 @@ def compute_reward(self, action, obs): if container_to_target <= _TARGET_RADIUS: reward = 10.0 - return [ reward, tcp_to_stick, @@ -292,3 +303,23 @@ def compute_reward(self, action, obs): object_grasped, stick_in_place, ] + + +class TrainStickPushv2(SawyerStickPushEnvV2): + tasks = None + + def __init__(self): + SawyerStickPushEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestStickPushv2(SawyerStickPushEnvV2): + tasks = None + + def __init__(self): + SawyerStickPushEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_into_goal_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_into_goal_v2.py index 0e5ce0f2f..a2075a21a 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_into_goal_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_into_goal_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from scipy.spatial.transform import Rotation from metaworld.envs import reward_utils @@ -13,7 +13,7 @@ class SawyerSweepIntoGoalEnvV2(SawyerXYZEnv): OBJ_RADIUS = 0.02 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.6, 0.02) @@ -25,8 +25,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0.0, 0.6, 0.02]), "obj_init_angle": 0.3, @@ -73,7 +77,8 @@ def evaluate_state(self, obs, action): return reward, info def _get_quat_objects(self): - return Rotation.from_matrix(self.data.get_geom_xmat("objGeom")).as_quat() + geom_xmat = self.data.geom("objGeom").xmat.reshape(3, 3) + return Rotation.from_matrix(geom_xmat).as_quat() def _get_pos_objects(self): return self.get_body_com("obj") @@ -211,3 +216,23 @@ def compute_reward(self, action, obs): if obj_to_target < _TARGET_RADIUS: reward = 10.0 return [reward, tcp_to_obj, tcp_opened, obj_to_target, object_grasped, in_place] + + +class TrainSweepIntoGoalv2(SawyerSweepIntoGoalEnvV2): + tasks = None + + def __init__(self): + SawyerSweepIntoGoalEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestSweepIntoGoalv2(SawyerSweepIntoGoalEnvV2): + tasks = None + + def __init__(self): + SawyerSweepIntoGoalEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_v2.py index 98af3aedf..64e8a0f1e 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_v2.py @@ -1,5 +1,5 @@ import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -12,7 +12,7 @@ class SawyerSweepEnvV2(SawyerXYZEnv): OBJ_RADIUS = 0.02 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): init_puck_z = 0.1 hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1.0, 0.5) @@ -25,8 +25,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_pos": np.array([0.0, 0.6, 0.02]), "obj_init_angle": 0.3, @@ -74,16 +78,16 @@ def evaluate_state(self, obs, action): return reward, info def _get_quat_objects(self): - return self.data.get_body_xquat("obj") + return self.data.body("obj").xquat def _get_pos_objects(self): - return self.get_body_com("obj") + return self.data.body("obj").xpos def reset_model(self): self._reset_hand() self._target_pos = self.goal.copy() self.obj_init_pos = self.init_config["obj_init_pos"] - self.objHeight = self.get_body_com("obj")[2] + self.objHeight = self._get_pos_objects()[2] obj_pos = self._get_state_rand_vec() self.obj_init_pos = np.concatenate((obj_pos[:2], [self.obj_init_pos[-1]])) @@ -211,3 +215,23 @@ def compute_reward(self, action, obs): if obj_to_target < _TARGET_RADIUS: reward = 10.0 return [reward, tcp_to_obj, tcp_opened, obj_to_target, object_grasped, in_place] + + +class TrainSweepv2(SawyerSweepEnvV2): + tasks = None + + def __init__(self): + SawyerSweepEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestSweepv2(SawyerSweepEnvV2): + tasks = None + + def __init__(self): + SawyerSweepEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_close_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_close_v2.py index c12bcf0ab..c64a046a2 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_close_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_close_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -24,7 +25,7 @@ class SawyerWindowCloseEnvV2(SawyerXYZEnv): TARGET_RADIUS = 0.05 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): liftThresh = 0.02 hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) @@ -35,8 +36,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_angle": 0.3, "obj_init_pos": np.array([0.1, 0.785, 0.16], dtype=np.float32), @@ -100,13 +105,15 @@ def reset_model(self): self.obj_init_pos = self._get_state_rand_vec() self._target_pos = self.obj_init_pos.copy() + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "window") + ] = self.obj_init_pos - self.sim.model.body_pos[self.model.body_name2id("window")] = self.obj_init_pos self.window_handle_pos_init = self._get_pos_objects() + np.array( [0.2, 0.0, 0.0] ) - self.data.set_joint_qpos("window_slide", 0.2) - + self.data.joint("window_slide").qpos = 0.2 + mujoco.mj_forward(self.model, self.data) return self._get_obs() def _reset_hand(self): @@ -147,3 +154,23 @@ def compute_reward(self, actions, obs): reward = 10 * reward_utils.hamacher_product(reach, in_place) return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped, in_place) + + +class TrainWindowClosev2(SawyerWindowCloseEnvV2): + tasks = None + + def __init__(self): + SawyerWindowCloseEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestWindowClosev2(SawyerWindowCloseEnvV2): + tasks = None + + def __init__(self): + SawyerWindowCloseEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_open_v2.py b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_open_v2.py index a846119cb..7ad337a64 100644 --- a/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_open_v2.py +++ b/metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_window_open_v2.py @@ -1,5 +1,6 @@ +import mujoco import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from metaworld.envs import reward_utils from metaworld.envs.asset_path_utils import full_v2_path_for @@ -23,7 +24,7 @@ class SawyerWindowOpenEnvV2(SawyerXYZEnv): TARGET_RADIUS = 0.05 - def __init__(self): + def __init__(self, tasks=None, render_mode=None): hand_low = (-0.5, 0.40, 0.05) hand_high = (0.5, 1, 0.5) obj_low = (-0.1, 0.7, 0.16) @@ -33,8 +34,12 @@ def __init__(self): self.model_name, hand_low=hand_low, hand_high=hand_high, + render_mode=render_mode, ) + if tasks is not None: + self.tasks = tasks + self.init_config = { "obj_init_angle": np.array( [ @@ -101,11 +106,13 @@ def reset_model(self): self.obj_init_pos = self._get_state_rand_vec() self._target_pos = self.obj_init_pos + np.array([0.2, 0.0, 0.0]) + self.model.body_pos[ + mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "window") + ] = self.obj_init_pos - self.sim.model.body_pos[self.model.body_name2id("window")] = self.obj_init_pos self.window_handle_pos_init = self._get_pos_objects() - self.data.set_joint_qpos("window_slide", 0.0) - + self.data.joint("window_slide").qpos = 0.0 + mujoco.mj_forward(self.model, self.data) return self._get_obs() def compute_reward(self, actions, obs): @@ -140,3 +147,23 @@ def compute_reward(self, actions, obs): reward = 10 * reward_utils.hamacher_product(reach, in_place) return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped, in_place) + + +class TrainWindowOpenv2(SawyerWindowOpenEnvV2): + tasks = None + + def __init__(self): + SawyerWindowOpenEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) + + +class TestWindowOpenv2(SawyerWindowOpenEnvV2): + tasks = None + + def __init__(self): + SawyerWindowOpenEnvV2.__init__(self, self.tasks) + + def reset(self, seed=None, options=None): + return super().reset(seed=seed, options=options) diff --git a/metaworld/policies/sawyer_assembly_v2_policy.py b/metaworld/policies/sawyer_assembly_v2_policy.py index 891fb9770..492f84686 100644 --- a/metaworld/policies/sawyer_assembly_v2_policy.py +++ b/metaworld/policies/sawyer_assembly_v2_policy.py @@ -18,14 +18,12 @@ def _parse_obs(obs): def get_action(self, obs): o_d = self._parse_obs(obs) - action = Action({"delta_pos": np.arange(3), "grab_effort": 3}) action["delta_pos"] = move( o_d["hand_pos"], to_xyz=self._desired_pos(o_d), p=10.0 ) action["grab_effort"] = self._grab_effort(o_d) - return action.array @staticmethod diff --git a/metaworld/policies/sawyer_basketball_v2_policy.py b/metaworld/policies/sawyer_basketball_v2_policy.py index 25c132033..cd0cb9bb7 100644 --- a/metaworld/policies/sawyer_basketball_v2_policy.py +++ b/metaworld/policies/sawyer_basketball_v2_policy.py @@ -19,14 +19,11 @@ def _parse_obs(obs): def get_action(self, obs): o_d = self._parse_obs(obs) - action = Action({"delta_pos": np.arange(3), "grab_effort": 3}) - action["delta_pos"] = move( o_d["hand_pos"], to_xyz=self._desired_pos(o_d), p=25.0 ) action["grab_effort"] = self._grab_effort(o_d) - return action.array @staticmethod @@ -51,7 +48,6 @@ def _desired_pos(o_d): def _grab_effort(o_d): pos_curr = o_d["hand_pos"] pos_ball = o_d["ball_pos"] - if ( np.linalg.norm(pos_curr[:2] - pos_ball[:2]) > 0.04 or abs(pos_curr[2] - pos_ball[2]) > 0.15 diff --git a/metaworld/policies/sawyer_box_close_v2_policy.py b/metaworld/policies/sawyer_box_close_v2_policy.py index 9472a86e2..45605068e 100644 --- a/metaworld/policies/sawyer_box_close_v2_policy.py +++ b/metaworld/policies/sawyer_box_close_v2_policy.py @@ -21,7 +21,6 @@ def get_action(self, obs): o_d = self._parse_obs(obs) action = Action({"delta_pos": np.arange(3), "grab_effort": 3}) - action["delta_pos"] = move( o_d["hand_pos"], to_xyz=self._desired_pos(o_d), p=25.0 ) diff --git a/metaworld/policies/sawyer_door_close_v2_policy.py b/metaworld/policies/sawyer_door_close_v2_policy.py index 192910cae..619a17c52 100644 --- a/metaworld/policies/sawyer_door_close_v2_policy.py +++ b/metaworld/policies/sawyer_door_close_v2_policy.py @@ -30,23 +30,23 @@ def get_action(self, obs): @staticmethod def _desired_pos(o_d): - # pos_curr = o_d["hand_pos"] + pos_curr = o_d["hand_pos"] pos_door = o_d["door_pos"] pos_door += np.array([0.05, 0.12, 0.1]) pos_goal = o_d["goal_pos"] # # if to the right of door handle/// - # if pos_curr[0] > pos_door[0]: - # # if below door handle by more than 0.2 - # if pos_curr[2] < pos_door[2] + 0.2: - # # rise above door handle by ~0.2 - # return np.array([pos_curr[0], pos_curr[1], pos_door[2] + 0.25]) - # else: - # # move toward door handle in XY plane - # return np.array([pos_door[0] - 0.02, pos_door[1], pos_curr[2]]) - # # put end effector on the outer edge of door handle (still above it) - # elif abs(pos_curr[2] - pos_door[2]) > 0.04: - # return pos_door + np.array([-0.02, 0., 0.]) - # # push from outer edge toward door handle's centroid + if pos_curr[0] > pos_door[0]: + # if below door handle by more than 0.2 + if pos_curr[2] < pos_door[2] + 0.2: + # rise above door handle by ~0.2 + return np.array([pos_curr[0], pos_curr[1], pos_door[2] + 0.25]) + else: + # move toward door handle in XY plane + return np.array([pos_door[0] - 0.02, pos_door[1], pos_curr[2]]) + # put end effector on the outer edge of door handle (still above it) + elif abs(pos_curr[2] - pos_door[2]) > 0.04: + return pos_door + np.array([-0.02, 0.0, 0.0]) + # push from outer edge toward door handle's centroid # else: return pos_goal diff --git a/metaworld/policies/sawyer_hammer_v2_policy.py b/metaworld/policies/sawyer_hammer_v2_policy.py index ab5fe6db4..707c95e52 100644 --- a/metaworld/policies/sawyer_hammer_v2_policy.py +++ b/metaworld/policies/sawyer_hammer_v2_policy.py @@ -17,7 +17,6 @@ def _parse_obs(obs): def get_action(self, obs): o_d = self._parse_obs(obs) - action = Action({"delta_pos": np.arange(3), "grab_effort": 3}) action["delta_pos"] = move( diff --git a/metaworld/policies/sawyer_hand_insert_v2_policy.py b/metaworld/policies/sawyer_hand_insert_v2_policy.py index af807d2b4..44e03b528 100644 --- a/metaworld/policies/sawyer_hand_insert_v2_policy.py +++ b/metaworld/policies/sawyer_hand_insert_v2_policy.py @@ -50,7 +50,6 @@ def _desired_pos(o_d): def _grab_effort(o_d): hand_pos = o_d["hand_pos"] obj_pos = o_d["obj_pos"] - if ( np.linalg.norm(hand_pos[:2] - obj_pos[:2]) > 0.02 or abs(hand_pos[2] - obj_pos[2]) > 0.1 diff --git a/metaworld/policies/sawyer_pick_place_wall_v2_policy.py b/metaworld/policies/sawyer_pick_place_wall_v2_policy.py index e694cb36c..0d5f74e41 100644 --- a/metaworld/policies/sawyer_pick_place_wall_v2_policy.py +++ b/metaworld/policies/sawyer_pick_place_wall_v2_policy.py @@ -57,7 +57,7 @@ def desired_pos(o_d): ): return np.array([pos_goal[0], pos_goal[1], pos_curr[2]]) # If not at the same Z height as the goal, move up to that plane - elif abs(pos_curr[2] - pos_goal[2]) > 0.04: + elif abs(pos_curr[2] - pos_goal[2]) > 0.01: return np.array([pos_curr[0], pos_curr[1], pos_goal[2]]) return pos_goal @@ -65,12 +65,12 @@ def desired_pos(o_d): def grab_effort(o_d): pos_curr = o_d["hand_pos"] pos_puck = o_d["puck_pos"] - if ( np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.015 or abs(pos_curr[2] - pos_puck[2]) > 0.1 ): return 0.0 + # While end effector is moving down toward the puck, begin closing the grabber else: return 0.9 diff --git a/metaworld/policies/sawyer_push_back_v2_policy.py b/metaworld/policies/sawyer_push_back_v2_policy.py index 8d9c3dc69..db080be9b 100644 --- a/metaworld/policies/sawyer_push_back_v2_policy.py +++ b/metaworld/policies/sawyer_push_back_v2_policy.py @@ -50,7 +50,7 @@ def _grab_effort(o_d): if ( np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.04 - or abs(pos_curr[2] - pos_puck[2]) > 0.085 + or abs(pos_curr[2] - pos_puck[2]) > 0.05 ): return 0.0 # While end effector is moving down toward the puck, begin closing the grabber diff --git a/metaworld/policies/sawyer_push_wall_v2_policy.py b/metaworld/policies/sawyer_push_wall_v2_policy.py index 98270fdd3..0b237246d 100644 --- a/metaworld/policies/sawyer_push_wall_v2_policy.py +++ b/metaworld/policies/sawyer_push_wall_v2_policy.py @@ -54,7 +54,6 @@ def desired_pos(o_d): def grab_effort(o_d): pos_curr = o_d["hand_pos"] pos_obj = o_d["obj_pos"] - if ( np.linalg.norm(pos_curr[:2] - pos_obj[:2]) > 0.02 or abs(pos_curr[2] - pos_obj[2]) > 0.1 diff --git a/pyproject.toml b/pyproject.toml index e64056bdb..2ec754f8d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,10 +25,11 @@ classifiers = [ 'Topic :: Scientific/Engineering :: Artificial Intelligence', ] dependencies = [ - "gym>=0.15.4", - "mujoco-py<2.2,>=2.0", + "gymnasium>=0.28.0", + "mujoco", "numpy>=1.18", "scipy>=1.4.1", + "imageio" ] [project.optional-dependencies] diff --git a/scripts/policy_testing.py b/scripts/policy_testing.py new file mode 100644 index 000000000..333bf40b3 --- /dev/null +++ b/scripts/policy_testing.py @@ -0,0 +1,49 @@ +import random +import time + +import numpy as np + +import metaworld +from metaworld.policies.sawyer_door_lock_v2_policy import ( + SawyerDoorLockV2Policy as policy, +) + +np.set_printoptions(suppress=True) + +seed = 42 +env_name = "door-lock-v2" + +random.seed(seed) +ml1 = metaworld.MT50(seed=seed) +env = ml1.train_classes[env_name]() +task = [t for t in ml1.train_tasks if t.env_name == env_name][0] +env.set_task(task) +env.seed(seed) +env.action_space.seed(seed) +env.observation_space.seed(seed) +obs = env.reset() + +p = policy() +count = 0 +done = False + +states = [] +actions = [] +next_states = [] +rewards = [] + +dones = [] +info = {} + +while count < 500 and not done: + action = p.get_action(obs) + next_obs, _, _, _, info = env.step(action) + # env.render() + print(count, next_obs) + if int(info["success"]) == 1: + done = True + obs = next_obs + time.sleep(0.02) + count += 1 + +print(info) diff --git a/tests/helpers.py b/tests/helpers.py index b3c90094b..06ad4063d 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -4,9 +4,11 @@ def step_env(env, max_path_length=100, iterations=1, render=True): """Step env helper.""" for _ in range(iterations): - obs = env.reset() + obs, info = env.reset() for _ in range(max_path_length): - next_obs, _, done, info = env.step(env.action_space.sample()) + next_obs, _, terminated, truncated, info = env.step( + env.action_space.sample() + ) if env._partially_observable: assert (next_obs[-3:] == np.zeros(3)).all() else: @@ -27,5 +29,5 @@ def step_env(env, max_path_length=100, iterations=1, render=True): obs = next_obs if render: env.render() - if done: + if truncated or terminated: break diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py new file mode 100644 index 000000000..5d0f1ff84 --- /dev/null +++ b/tests/integration/helpers.py @@ -0,0 +1,33 @@ +import numpy as np + + +def step_env(env, max_path_length=100, iterations=1, render=True): + """Step env helper.""" + for _ in range(iterations): + obs = env.reset()[0] + for _ in range(max_path_length): + next_obs, _, terminated, truncated, info = env.step( + env.action_space.sample() + ) + if env._partially_observable: + assert (next_obs[-3:] == np.zeros(3)).all() + else: + assert (next_obs[-3:] == env._get_pos_goal()).all() + assert (next_obs[:3] == env.get_endeff_pos()).all() + internal_obs = env._get_pos_objects() + internal_quat = env._get_quat_objects() + assert (next_obs[4:7] == internal_obs[:3]).all() + assert (next_obs[7:11] == internal_quat[:4]).all() + if internal_obs.shape == (6,): + assert internal_quat.shape == (8,) + assert (next_obs[11:14] == internal_obs[3:]).all() + assert (next_obs[14:18] == internal_quat[4:]).all() + else: + assert (next_obs[11:14] == np.zeros(3)).all() + assert (next_obs[14:18] == np.zeros(4)).all() + assert (obs[:18] == next_obs[18:-3]).all() + obs = next_obs + if render: + env.render() + if terminated or truncated: + break diff --git a/tests/integration/test_memory_usage.py b/tests/integration/test_memory_usage.py index ea803ac0c..ef30bb43d 100644 --- a/tests/integration/test_memory_usage.py +++ b/tests/integration/test_memory_usage.py @@ -1,7 +1,7 @@ import memory_profiler import pytest -from metaworld.envs.mujoco.env_dict import ALL_V1_ENVIRONMENTS +from metaworld.envs.mujoco.env_dict import ALL_V2_ENVIRONMENTS from tests.helpers import step_env @@ -21,7 +21,7 @@ def build_and_step_all(classes): @pytest.fixture(scope="module") def mt50_usage(): profile = {} - for env_cls in ALL_V1_ENVIRONMENTS.values(): + for env_cls in ALL_V2_ENVIRONMENTS.values(): target = (build_and_step, [env_cls], {}) memory_usage = memory_profiler.memory_usage(target) profile[env_cls] = max(memory_usage) @@ -30,7 +30,7 @@ def mt50_usage(): @pytest.mark.skip -@pytest.mark.parametrize("env_cls", ALL_V1_ENVIRONMENTS.values()) +@pytest.mark.parametrize("env_cls", ALL_V2_ENVIRONMENTS.values()) def test_max_memory_usage(env_cls, mt50_usage): # No env should use more than 250MB # @@ -43,14 +43,14 @@ def test_max_memory_usage(env_cls, mt50_usage): @pytest.mark.skip def test_avg_memory_usage(): # average usage no greater than 60MB/env - target = (build_and_step_all, [ALL_V1_ENVIRONMENTS.values()], {}) + target = (build_and_step_all, [ALL_V2_ENVIRONMENTS.values()], {}) usage = memory_profiler.memory_usage(target) - average = max(usage) / len(ALL_V1_ENVIRONMENTS) + average = max(usage) / len(ALL_V2_ENVIRONMENTS) assert average < 60 @pytest.mark.skip def test_from_task_memory_usage(): - target = (ALL_V1_ENVIRONMENTS["reach-v1"], (), {}) + target = (ALL_V2_ENVIRONMENTS["reach-v1"], (), {}) usage = memory_profiler.memory_usage(target) assert max(usage) < 250 diff --git a/tests/integration/test_new_api.py b/tests/integration/test_new_api.py index 7468fecdf..d8e836dba 100644 --- a/tests/integration/test_new_api.py +++ b/tests/integration/test_new_api.py @@ -124,7 +124,7 @@ def test_all_ml45(): for task in ml45.train_tasks: env = train_env_instances[task.env_name] env.set_task(task) - obs = env.reset() + obs, info = env.reset() old_obj_init = env.obj_init_pos old_target_pos = env._target_pos step_env(env, max_path_length=STEPS, render=False) @@ -142,7 +142,7 @@ def test_all_ml45(): for task in ml45.test_tasks: env = test_env_instances[task.env_name] env.set_task(task) - obs = env.reset() + obs, info = env.reset() assert np.all(obs[-3:] == np.array([0, 0, 0])) assert env.observation_space.shape == (39,) old_obj_init = env.obj_init_pos @@ -208,7 +208,7 @@ def test_all_mt50(): for task in mt50.train_tasks: env = train_env_instances[task.env_name] env.set_task(task) - obs = env.reset() + obs, info = env.reset() assert np.any(obs[-3:] != np.array([0, 0, 0])) assert env.observation_space.shape == (39,) old_obj_init = env.obj_init_pos diff --git a/tests/integration/test_single_goal_envs.py b/tests/integration/test_single_goal_envs.py index 7efdf814a..ebba8bdea 100644 --- a/tests/integration/test_single_goal_envs.py +++ b/tests/integration/test_single_goal_envs.py @@ -73,10 +73,10 @@ def test_seeding_observable(): env2.reset() a1 = env1.action_space.sample() # Sample an action a2 = env2.action_space.sample() - next_obs1, _, _, _ = env1.step( + next_obs1, _, _, _, _ = env1.step( a1 ) # Step the environoment with the sampled random action - next_obs2, _, _, _ = env2.step(a2) + next_obs2, _, _, _, _ = env2.step(a2) assert ( next_obs1[-3:] == next_obs2[-3:] ).all() # 2 envs initialized with the same seed will have the same goal @@ -91,10 +91,10 @@ def test_seeding_observable(): env3.reset() a1 = env1.action_space.sample() # Sample an action a3 = env3.action_space.sample() - next_obs1, _, _, _ = env1.step( + next_obs1, _, _, _, _ = env1.step( a1 ) # Step the environoment with the sampled random action - next_obs3, _, _, _ = env3.step(a3) + next_obs3, _, _, _, _ = env3.step(a3) assert not ( next_obs1[-3:] == next_obs3[-3:] @@ -116,10 +116,10 @@ def test_seeding_hidden(): env2.reset() a1 = env1.action_space.sample() # Sample an action a2 = env2.action_space.sample() - next_obs1, _, _, _ = env1.step( + next_obs1, _, _, _, _ = env1.step( a1 ) # Step the environoment with the sampled random action - next_obs2, _, _, _ = env2.step(a2) + next_obs2, _, _, _, _ = env2.step(a2) assert ( env1._target_pos == env2._target_pos ).all() # 2 envs initialized with the same seed will have the same goal @@ -134,10 +134,10 @@ def test_seeding_hidden(): env3.reset() a1 = env1.action_space.sample() # Sample an action a3 = env3.action_space.sample() - next_obs1, _, _, _ = env1.step( + next_obs1, _, _, _, _ = env1.step( a1 ) # Step the environoment with the sampled random action - next_obs3, _, _, _ = env3.step(a3) + next_obs3, _, _, _, _ = env3.step(a3) assert not ( env1._target_pos[-3:] == env3._target_pos[-3:] diff --git a/tests/metaworld/envs/mujoco/sawyer_xyz/helpers.py b/tests/metaworld/envs/mujoco/sawyer_xyz/helpers.py new file mode 100644 index 000000000..5d0f1ff84 --- /dev/null +++ b/tests/metaworld/envs/mujoco/sawyer_xyz/helpers.py @@ -0,0 +1,33 @@ +import numpy as np + + +def step_env(env, max_path_length=100, iterations=1, render=True): + """Step env helper.""" + for _ in range(iterations): + obs = env.reset()[0] + for _ in range(max_path_length): + next_obs, _, terminated, truncated, info = env.step( + env.action_space.sample() + ) + if env._partially_observable: + assert (next_obs[-3:] == np.zeros(3)).all() + else: + assert (next_obs[-3:] == env._get_pos_goal()).all() + assert (next_obs[:3] == env.get_endeff_pos()).all() + internal_obs = env._get_pos_objects() + internal_quat = env._get_quat_objects() + assert (next_obs[4:7] == internal_obs[:3]).all() + assert (next_obs[7:11] == internal_quat[:4]).all() + if internal_obs.shape == (6,): + assert internal_quat.shape == (8,) + assert (next_obs[11:14] == internal_obs[3:]).all() + assert (next_obs[14:18] == internal_quat[4:]).all() + else: + assert (next_obs[11:14] == np.zeros(3)).all() + assert (next_obs[14:18] == np.zeros(4)).all() + assert (obs[:18] == next_obs[18:-3]).all() + obs = next_obs + if render: + env.render() + if terminated or truncated: + break diff --git a/tests/metaworld/envs/mujoco/sawyer_xyz/test_obs_space_hand.py b/tests/metaworld/envs/mujoco/sawyer_xyz/test_obs_space_hand.py index 638e92bf4..f015d143e 100644 --- a/tests/metaworld/envs/mujoco/sawyer_xyz/test_obs_space_hand.py +++ b/tests/metaworld/envs/mujoco/sawyer_xyz/test_obs_space_hand.py @@ -52,7 +52,7 @@ def test_reaching_limit(target): env.reset() env.reset_model() - o_prev = env.reset() + o_prev, info = env.reset() for _ in range(env.max_path_length): a = policy.get_action(o_prev) @@ -61,4 +61,4 @@ def test_reaching_limit(target): break o_prev = o - assert SawyerXYZEnv._HAND_SPACE.contains(o[:3]), SawyerXYZEnv._HAND_SPACE + assert SawyerXYZEnv._HAND_SPACE.contains(o[:3]) diff --git a/tests/metaworld/envs/mujoco/sawyer_xyz/test_sawyer_xyz_env.py b/tests/metaworld/envs/mujoco/sawyer_xyz/test_sawyer_xyz_env.py index 6dd48fb31..f671ae87a 100644 --- a/tests/metaworld/envs/mujoco/sawyer_xyz/test_sawyer_xyz_env.py +++ b/tests/metaworld/envs/mujoco/sawyer_xyz/test_sawyer_xyz_env.py @@ -22,7 +22,7 @@ def test_reset_returns_same_obj_and_goal(): # Step through environment for a fixed number of episodes. for _ in range(2): # Reset environment and extract initial object position. - obs = env.reset() + obs, info = env.reset() goal = obs[-3:] goal_poses[env_name].append(goal) initial_obj_pos = obs[3:9] @@ -31,11 +31,15 @@ def test_reset_returns_same_obj_and_goal(): # Display initial object positions and find environments with non-unique positions. violating_envs_obs = [] for env_name, task_initial_pos in initial_obj_poses.items(): - if len(np.unique(np.array(task_initial_pos), axis=0)) > 1: + if len(np.unique(np.array(task_initial_pos), axis=0)) > 1 and not np.allclose( + task_initial_pos[0], task_initial_pos[1], rtol=1e-2, atol=1e-2 + ): violating_envs_obs.append(env_name) violating_envs_goals = [] for env_name, target_pos in goal_poses.items(): - if len(np.unique(np.array(target_pos), axis=0)) > 1: + if len(np.unique(np.array(target_pos), axis=0)) > 1 and not np.allclose( + target_pos[0], target_pos[1], rtol=1e-2, atol=1e-3 + ): violating_envs_goals.append(env_name) assert not violating_envs_obs assert not violating_envs_goals diff --git a/tests/metaworld/envs/mujoco/sawyer_xyz/test_scripted_policies.py b/tests/metaworld/envs/mujoco/sawyer_xyz/test_scripted_policies.py index c26360c0b..76b2c4056 100644 --- a/tests/metaworld/envs/mujoco/sawyer_xyz/test_scripted_policies.py +++ b/tests/metaworld/envs/mujoco/sawyer_xyz/test_scripted_policies.py @@ -1,6 +1,6 @@ import pytest -from metaworld.envs.mujoco.env_dict import ALL_V2_ENVIRONMENTS +from metaworld import MT1 from metaworld.policies import ( SawyerAssemblyV2Policy, SawyerBasketballV2Policy, @@ -53,158 +53,82 @@ SawyerWindowCloseV2Policy, SawyerWindowOpenV2Policy, ) -from tests.metaworld.envs.mujoco.sawyer_xyz.utils import trajectory_summary -test_cases_latest_nonoise = [ - # name, policy, action noise pct, success rate - ["assembly-v2", SawyerAssemblyV2Policy(), 0.0, 1.0], - ["basketball-v2", SawyerBasketballV2Policy(), 0.0, 0.98], - ["bin-picking-v2", SawyerBinPickingV2Policy(), 0.0, 0.98], - ["box-close-v2", SawyerBoxCloseV2Policy(), 0.0, 0.90], - ["button-press-topdown-v2", SawyerButtonPressTopdownV2Policy(), 0.0, 0.95], - ["button-press-topdown-wall-v2", SawyerButtonPressTopdownWallV2Policy(), 0.0, 0.95], - ["button-press-v2", SawyerButtonPressV2Policy(), 0.0, 1.0], - ["button-press-wall-v2", SawyerButtonPressWallV2Policy(), 0.0, 0.93], - ["coffee-button-v2", SawyerCoffeeButtonV2Policy(), 0.0, 1.0], - ["coffee-pull-v2", SawyerCoffeePullV2Policy(), 0.0, 0.94], - ["coffee-push-v2", SawyerCoffeePushV2Policy(), 0.0, 0.93], - ["dial-turn-v2", SawyerDialTurnV2Policy(), 0.0, 0.96], - ["disassemble-v2", SawyerDisassembleV2Policy(), 0.0, 0.92], - ["door-close-v2", SawyerDoorCloseV2Policy(), 0.0, 0.99], - ["door-lock-v2", SawyerDoorLockV2Policy(), 0.0, 1.0], - ["door-open-v2", SawyerDoorOpenV2Policy(), 0.0, 0.94], - ["door-unlock-v2", SawyerDoorUnlockV2Policy(), 0.0, 1.0], - ["drawer-close-v2", SawyerDrawerCloseV2Policy(), 0.0, 0.99], - ["drawer-open-v2", SawyerDrawerOpenV2Policy(), 0.0, 0.99], - ["faucet-close-v2", SawyerFaucetCloseV2Policy(), 0.0, 1.0], - ["faucet-open-v2", SawyerFaucetOpenV2Policy(), 0.0, 1.0], - ["hammer-v2", SawyerHammerV2Policy(), 0.0, 1.0], - ["hand-insert-v2", SawyerHandInsertV2Policy(), 0.0, 0.96], - ["handle-press-side-v2", SawyerHandlePressSideV2Policy(), 0.0, 0.99], - ["handle-press-v2", SawyerHandlePressV2Policy(), 0.0, 1.0], - ["handle-pull-v2", SawyerHandlePullV2Policy(), 0.0, 0.93], - ["handle-pull-side-v2", SawyerHandlePullSideV2Policy(), 0.0, 1.0], - ["peg-insert-side-v2", SawyerPegInsertionSideV2Policy(), 0.0, 0.89], - ["lever-pull-v2", SawyerLeverPullV2Policy(), 0.0, 0.94], - ["peg-unplug-side-v2", SawyerPegUnplugSideV2Policy(), 0.0, 0.99], - ["pick-out-of-hole-v2", SawyerPickOutOfHoleV2Policy(), 0.0, 1.0], - ["pick-place-v2", SawyerPickPlaceV2Policy(), 0.0, 0.95], - ["pick-place-wall-v2", SawyerPickPlaceWallV2Policy(), 0.0, 0.95], - ["plate-slide-back-side-v2", SawyerPlateSlideBackSideV2Policy(), 0.0, 1.0], - ["plate-slide-back-v2", SawyerPlateSlideBackV2Policy(), 0.0, 1.0], - ["plate-slide-side-v2", SawyerPlateSlideSideV2Policy(), 0.0, 1.0], - ["plate-slide-v2", SawyerPlateSlideV2Policy(), 0.0, 1.0], - ["reach-v2", SawyerReachV2Policy(), 0.0, 0.99], - ["reach-wall-v2", SawyerReachWallV2Policy(), 0.0, 0.98], - ["push-back-v2", SawyerPushBackV2Policy(), 0.0, 0.97], - ["push-v2", SawyerPushV2Policy(), 0.0, 0.97], - ["push-wall-v2", SawyerPushWallV2Policy(), 0.0, 0.97], - ["shelf-place-v2", SawyerShelfPlaceV2Policy(), 0.0, 0.96], - ["soccer-v2", SawyerSoccerV2Policy(), 0.0, 0.88], - ["stick-pull-v2", SawyerStickPullV2Policy(), 0.0, 0.96], - ["stick-push-v2", SawyerStickPushV2Policy(), 0.0, 0.98], - ["sweep-into-v2", SawyerSweepIntoV2Policy(), 0.0, 0.98], - ["sweep-v2", SawyerSweepV2Policy(), 0.0, 0.99], - ["window-close-v2", SawyerWindowCloseV2Policy(), 0.0, 0.98], - ["window-open-v2", SawyerWindowOpenV2Policy(), 0.0, 0.94], -] - -test_cases_latest_noisy = [ - # name, policy, action noise pct, success rate - ["assembly-v2", SawyerAssemblyV2Policy(), 0.1, 0.70], - ["basketball-v2", SawyerBasketballV2Policy(), 0.1, 0.96], - ["bin-picking-v2", SawyerBinPickingV2Policy(), 0.1, 0.96], - ["box-close-v2", SawyerBoxCloseV2Policy(), 0.1, 0.82], - ["button-press-topdown-v2", SawyerButtonPressTopdownV2Policy(), 0.1, 0.93], - ["button-press-topdown-wall-v2", SawyerButtonPressTopdownWallV2Policy(), 0.1, 0.95], - ["button-press-v2", SawyerButtonPressV2Policy(), 0.1, 0.98], - ["button-press-wall-v2", SawyerButtonPressWallV2Policy(), 0.1, 0.92], - ["coffee-button-v2", SawyerCoffeeButtonV2Policy(), 0.1, 0.99], - ["coffee-pull-v2", SawyerCoffeePullV2Policy(), 0.1, 0.82], - ["coffee-push-v2", SawyerCoffeePushV2Policy(), 0.1, 0.88], - ["dial-turn-v2", SawyerDialTurnV2Policy(), 0.1, 0.84], - ["disassemble-v2", SawyerDisassembleV2Policy(), 0.1, 0.88], - ["door-close-v2", SawyerDoorCloseV2Policy(), 0.1, 0.97], - ["door-lock-v2", SawyerDoorLockV2Policy(), 0.1, 0.96], - ["door-open-v2", SawyerDoorOpenV2Policy(), 0.1, 0.92], - ["door-unlock-v2", SawyerDoorUnlockV2Policy(), 0.1, 0.97], - ["drawer-close-v2", SawyerDrawerCloseV2Policy(), 0.1, 0.99], - ["drawer-open-v2", SawyerDrawerOpenV2Policy(), 0.1, 0.97], - ["faucet-close-v2", SawyerFaucetCloseV2Policy(), 0.1, 1.0], - ["faucet-open-v2", SawyerFaucetOpenV2Policy(), 0.1, 0.99], - ["hammer-v2", SawyerHammerV2Policy(), 0.1, 0.96], - ["hand-insert-v2", SawyerHandInsertV2Policy(), 0.1, 0.86], - ["handle-press-side-v2", SawyerHandlePressSideV2Policy(), 0.1, 0.98], - ["handle-press-v2", SawyerHandlePressV2Policy(), 0.1, 1.0], - ["handle-pull-v2", SawyerHandlePullV2Policy(), 0.1, 0.99], - ["handle-pull-side-v2", SawyerHandlePullSideV2Policy(), 0.1, 0.71], - ["peg-insert-side-v2", SawyerPegInsertionSideV2Policy(), 0.1, 0.87], - ["lever-pull-v2", SawyerLeverPullV2Policy(), 0.1, 0.90], - ["peg-unplug-side-v2", SawyerPegUnplugSideV2Policy(), 0.1, 0.80], - ["pick-out-of-hole-v2", SawyerPickOutOfHoleV2Policy(), 0.1, 0.89], - ["pick-place-v2", SawyerPickPlaceV2Policy(), 0.1, 0.83], - ["pick-place-wall-v2", SawyerPickPlaceWallV2Policy(), 0.1, 0.83], - ["plate-slide-back-side-v2", SawyerPlateSlideBackSideV2Policy(), 0.1, 0.95], - ["plate-slide-back-v2", SawyerPlateSlideBackV2Policy(), 0.1, 0.94], - ["plate-slide-side-v2", SawyerPlateSlideSideV2Policy(), 0.1, 0.78], - ["plate-slide-v2", SawyerPlateSlideV2Policy(), 0.1, 0.97], - ["reach-v2", SawyerReachV2Policy(), 0.1, 0.98], - ["reach-wall-v2", SawyerReachWallV2Policy(), 0.1, 0.96], - ["push-back-v2", SawyerPushBackV2Policy(), 0.0, 0.91], - ["push-v2", SawyerPushV2Policy(), 0.1, 0.88], - ["push-wall-v2", SawyerPushWallV2Policy(), 0.1, 0.82], - ["shelf-place-v2", SawyerShelfPlaceV2Policy(), 0.1, 0.89], - ["soccer-v2", SawyerSoccerV2Policy(), 0.1, 0.81], - ["stick-pull-v2", SawyerStickPullV2Policy(), 0.1, 0.81], - ["stick-push-v2", SawyerStickPushV2Policy(), 0.1, 0.95], - ["sweep-into-v2", SawyerSweepIntoV2Policy(), 0.1, 0.86], - ["sweep-v2", SawyerSweepV2Policy(), 0.0, 0.99], - ["window-close-v2", SawyerWindowCloseV2Policy(), 0.1, 0.95], - ["window-open-v2", SawyerWindowOpenV2Policy(), 0.1, 0.93], -] - -# Combine test cases into a single array to pass to parameterized test function -test_cases = [] -for row in test_cases_latest_nonoise: - test_cases.append(pytest.param(*row, marks=pytest.mark.skip)) -for row in test_cases_latest_noisy: - test_cases.append(pytest.param(*row, marks=pytest.mark.basic)) - -ALL_ENVS = {**ALL_V2_ENVIRONMENTS} - - -@pytest.fixture(scope="function") -def env(request): - e = ALL_ENVS[request.param]() - e._partially_observable = False - e._freeze_rand_vec = False - e._set_task_called = True - return e - - -@pytest.mark.parametrize( - "env,policy,act_noise_pct,expected_success_rate", test_cases, indirect=["env"] +policies = dict( + { + "assembly-v2": SawyerAssemblyV2Policy, + "basketball-v2": SawyerBasketballV2Policy, + "bin-picking-v2": SawyerBinPickingV2Policy, + "box-close-v2": SawyerBoxCloseV2Policy, + "button-press-topdown-v2": SawyerButtonPressTopdownV2Policy, + "button-press-topdown-wall-v2": SawyerButtonPressTopdownWallV2Policy, + "button-press-v2": SawyerButtonPressV2Policy, + "button-press-wall-v2": SawyerButtonPressWallV2Policy, + "coffee-button-v2": SawyerCoffeeButtonV2Policy, + "coffee-pull-v2": SawyerCoffeePullV2Policy, + "coffee-push-v2": SawyerCoffeePushV2Policy, + "dial-turn-v2": SawyerDialTurnV2Policy, + "disassemble-v2": SawyerDisassembleV2Policy, + "door-close-v2": SawyerDoorCloseV2Policy, + "door-lock-v2": SawyerDoorLockV2Policy, + "door-open-v2": SawyerDoorOpenV2Policy, + "door-unlock-v2": SawyerDoorUnlockV2Policy, + "drawer-close-v2": SawyerDrawerCloseV2Policy, + "drawer-open-v2": SawyerDrawerOpenV2Policy, + "faucet-close-v2": SawyerFaucetCloseV2Policy, + "faucet-open-v2": SawyerFaucetOpenV2Policy, + "hammer-v2": SawyerHammerV2Policy, + "hand-insert-v2": SawyerHandInsertV2Policy, + "handle-press-side-v2": SawyerHandlePressSideV2Policy, + "handle-press-v2": SawyerHandlePressV2Policy, + "handle-pull-v2": SawyerHandlePullV2Policy, + "handle-pull-side-v2": SawyerHandlePullSideV2Policy, + "peg-insert-side-v2": SawyerPegInsertionSideV2Policy, + "lever-pull-v2": SawyerLeverPullV2Policy, + "peg-unplug-side-v2": SawyerPegUnplugSideV2Policy, + "pick-out-of-hole-v2": SawyerPickOutOfHoleV2Policy, + "pick-place-v2": SawyerPickPlaceV2Policy, + "pick-place-wall-v2": SawyerPickPlaceWallV2Policy, + "plate-slide-back-side-v2": SawyerPlateSlideBackSideV2Policy, + "plate-slide-back-v2": SawyerPlateSlideBackV2Policy, + "plate-slide-side-v2": SawyerPlateSlideSideV2Policy, + "plate-slide-v2": SawyerPlateSlideV2Policy, + "reach-v2": SawyerReachV2Policy, + "reach-wall-v2": SawyerReachWallV2Policy, + "push-back-v2": SawyerPushBackV2Policy, + "push-v2": SawyerPushV2Policy, + "push-wall-v2": SawyerPushWallV2Policy, + "shelf-place-v2": SawyerShelfPlaceV2Policy, + "soccer-v2": SawyerSoccerV2Policy, + "stick-pull-v2": SawyerStickPullV2Policy, + "stick-push-v2": SawyerStickPushV2Policy, + "sweep-into-v2": SawyerSweepIntoV2Policy, + "sweep-v2": SawyerSweepV2Policy, + "window-close-v2": SawyerWindowCloseV2Policy, + "window-open-v2": SawyerWindowOpenV2Policy, + } ) -def test_scripted_policy(env, policy, act_noise_pct, expected_success_rate, iters=100): - """Tests whether a given policy solves an environment in a stateless manner - Args: - env (metaworld.envs.MujocoEnv): Environment to test - policy (metaworld.policies.policy.Policy): Policy that's supposed to - succeed in env - act_noise_pct (np.ndarray): Decimal value(s) indicating std deviation of - the noise as a % of action space - expected_success_rate (float): Decimal value indicating % of runs that - must be successful - iters (int): How many times the policy should be tested - """ - assert len(vars(policy)) == 0, "{} has state variable(s)".format( - policy.__class__.__name__ - ) - successes = 0 - for _ in range(iters): - successes += float( - trajectory_summary(env, policy, act_noise_pct, render=False)[0] - ) - print(successes) - assert successes >= expected_success_rate * iters + +@pytest.mark.parametrize("env_name", MT1.ENV_NAMES) +def test_policy(env_name): + mt1 = MT1(env_name) + env = mt1.train_classes[env_name]() + p = policies[env_name]() + completed = 0 + for task in mt1.train_tasks: + env.set_task(task) + obs, info = env.reset() + done = False + count = 0 + while count < 500 and not done: + count += 1 + a = p.get_action(obs) + next_obs, _, trunc, termn, info = env.step(a) + done = trunc or termn + obs = next_obs + if int(info["success"]) == 1: + completed += 1 + break + print(float(completed) / 50) + assert (float(completed) / 50) > 0.80 diff --git a/tests/metaworld/envs/mujoco/sawyer_xyz/test_seeded_rand_vec.py b/tests/metaworld/envs/mujoco/sawyer_xyz/test_seeded_rand_vec.py index a80df260d..ddfcbcb32 100644 --- a/tests/metaworld/envs/mujoco/sawyer_xyz/test_seeded_rand_vec.py +++ b/tests/metaworld/envs/mujoco/sawyer_xyz/test_seeded_rand_vec.py @@ -14,13 +14,13 @@ def test_observations_match(env_name): env2 = ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_name](seed=seed) env2.seeded_rand_vec = True - obs1, obs2 = env1.reset(), env2.reset() + (obs1, _), (obs2, _) = env1.reset(), env2.reset() assert (obs1 == obs2).all() for i in range(env1.max_path_length): a = np.random.uniform(low=-1, high=-1, size=4) - obs1, r1, done1, _ = env1.step(a) - obs2, r2, done2, _ = env2.step(a) + obs1, r1, done1, _, _ = env1.step(a) + obs2, r2, done2, _, _ = env2.step(a) assert (obs1 == obs2).all() assert r1 == r2 assert not done1 diff --git a/tests/metaworld/envs/mujoco/sawyer_xyz/utils.py b/tests/metaworld/envs/mujoco/sawyer_xyz/utils.py index 343ee30e2..2aa63e8cd 100644 --- a/tests/metaworld/envs/mujoco/sawyer_xyz/utils.py +++ b/tests/metaworld/envs/mujoco/sawyer_xyz/utils.py @@ -1,9 +1,10 @@ import numpy as np -def trajectory_summary(env, policy, act_noise_pct, render=False, end_on_success=True): - """Tests whether a given policy solves an environment. - +def trajectory_summary( + env, policy, act_noise_pct, iters=500, render=False, end_on_success=True +): + """Tests whether a given policy solves an environment Args: env (metaworld.envs.MujocoEnv): Environment to test policy (metaworld.policies.policies.Policy): Policy that's supposed to @@ -24,7 +25,7 @@ def trajectory_summary(env, policy, act_noise_pct, render=False, end_on_success= trajectory_generator(env, policy, act_noise_pct, render) ): rewards.append(r) - assert not env.isV2 or set(info.keys()) == { + assert set(info.keys()) == { "success", "near_object", "grasp_success", @@ -46,8 +47,7 @@ def trajectory_summary(env, policy, act_noise_pct, render=False, end_on_success= def trajectory_generator(env, policy, act_noise_pct, render=False): - """Tests whether a given policy solves an environment. - + """Tests whether a given policy solves an environment Args: env (metaworld.envs.MujocoEnv): Environment to test policy (metaworld.policies.policies.Policy): Policy that's supposed to @@ -59,23 +59,27 @@ def trajectory_generator(env, policy, act_noise_pct, render=False): (float, bool, dict): Reward, Done flag, Info dictionary """ action_space_ptp = env.action_space.high - env.action_space.low - + env._partially_observable = True env.reset() env.reset_model() - o = env.reset() + o, info = env.reset() assert o.shape == env.observation_space.shape assert env.observation_space.contains(o), obs_space_error_text(env, o) - + last_info = None + print(act_noise_pct * action_space_ptp) for _ in range(env.max_path_length): a = policy.get_action(o) a = np.random.normal(a, act_noise_pct * action_space_ptp) - o, r, done, info = env.step(a) + o, r, terminated, truncated, info = env.step(a) + done = terminated or truncated assert env.observation_space.contains(o), obs_space_error_text(env, o) + last_info = info if render: env.render() - - yield r, done, info + if done: + break + return last_info def obs_space_error_text(env, obs):