diff --git a/.gitignore b/.gitignore index 65249c39..d53318df 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ dist/ # local test dataset that is lazily downloaded by example scripts tests/assets/test.hdf5 +tests/assets/test_v141.hdf5 # pycharm configs .idea/ @@ -119,3 +120,6 @@ venv.bak/ .mypy_cache/ *.mp4 *.pth + +# private macros +macros_private.py diff --git a/README.md b/README.md index 28a5dd27..cdd6cf4a 100644 --- a/README.md +++ b/README.md @@ -15,10 +15,19 @@ ------- ## Latest Updates +- [07/03/2023] **v0.3**: BC-Transformer and IQL :brain:, support for DeepMind MuJoCo bindings :robot:, pre-trained image reps :eye:, wandb logging :chart_with_upwards_trend:, and more - [05/23/2022] **v0.2.1**: Updated website and documentation to feature more tutorials :notebook_with_decorative_cover: -- [12/16/2021] **v0.2.0**: Modular observation modalities and encoders :wrench:, support for [MOMART](https://sites.google.com/view/il-for-mm/home) datasets :open_file_folder: +- [12/16/2021] **v0.2.0**: Modular observation modalities and encoders :wrench:, support for [MOMART](https://sites.google.com/view/il-for-mm/home) datasets :open_file_folder: [[release notes]](https://github.com/ARISE-Initiative/robomimic/releases/tag/v0.2.0) [[documentation]](https://robomimic.github.io/docs/v0.2/introduction/overview.html) - [08/09/2021] **v0.1.0**: Initial code and paper release +------- + +## Colab quickstart +Get started with a quick colab notebook demo of robomimic with installing anything locally. + +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1b62r_km9pP40fKF0cBdpdTO2P_2eIbC6?usp=sharing) + + ------- **robomimic** is a framework for robot learning from demonstration. diff --git a/docs/api/robomimic.algo.rst b/docs/api/robomimic.algo.rst index 7a6dd1ed..c2b32890 100644 --- a/docs/api/robomimic.algo.rst +++ b/docs/api/robomimic.algo.rst @@ -52,6 +52,14 @@ robomimic.algo.hbc module :undoc-members: :show-inheritance: +robomimic.algo.iql module +------------------------- + +.. automodule:: robomimic.algo.iql + :members: + :undoc-members: + :show-inheritance: + robomimic.algo.iris module -------------------------- diff --git a/docs/api/robomimic.config.rst b/docs/api/robomimic.config.rst index e7a4b32b..1c53be6c 100644 --- a/docs/api/robomimic.config.rst +++ b/docs/api/robomimic.config.rst @@ -60,6 +60,14 @@ robomimic.config.hbc\_config module :undoc-members: :show-inheritance: +robomimic.config.iql\_config module +----------------------------------- + +.. automodule:: robomimic.config.iql_config + :members: + :undoc-members: + :show-inheritance: + robomimic.config.iris\_config module ------------------------------------ diff --git a/docs/api/robomimic.envs.rst b/docs/api/robomimic.envs.rst index 94bfb690..5e7ab732 100644 --- a/docs/api/robomimic.envs.rst +++ b/docs/api/robomimic.envs.rst @@ -36,6 +36,14 @@ robomimic.envs.env\_robosuite module :undoc-members: :show-inheritance: +robomimic.envs.wrappers module +------------------------------ + +.. automodule:: robomimic.envs.wrappers + :members: + :undoc-members: + :show-inheritance: + Module contents --------------- diff --git a/docs/api/robomimic.models.rst b/docs/api/robomimic.models.rst index a81e10a9..a50f8760 100644 --- a/docs/api/robomimic.models.rst +++ b/docs/api/robomimic.models.rst @@ -20,6 +20,14 @@ robomimic.models.distributions module :undoc-members: :show-inheritance: +robomimic.models.obs\_core module +--------------------------------- + +.. automodule:: robomimic.models.obs_core + :members: + :undoc-members: + :show-inheritance: + robomimic.models.obs\_nets module --------------------------------- @@ -36,6 +44,14 @@ robomimic.models.policy\_nets module :undoc-members: :show-inheritance: +robomimic.models.transformers module +------------------------------------ + +.. automodule:: robomimic.models.transformers + :members: + :undoc-members: + :show-inheritance: + robomimic.models.vae\_nets module --------------------------------- diff --git a/docs/api/robomimic.rst b/docs/api/robomimic.rst index e40292b2..0d8ad0a7 100644 --- a/docs/api/robomimic.rst +++ b/docs/api/robomimic.rst @@ -13,6 +13,17 @@ Subpackages robomimic.models robomimic.utils +Submodules +---------- + +robomimic.macros module +----------------------- + +.. automodule:: robomimic.macros + :members: + :undoc-members: + :show-inheritance: + Module contents --------------- diff --git a/docs/api/robomimic.utils.rst b/docs/api/robomimic.utils.rst index 57160ef4..51095559 100644 --- a/docs/api/robomimic.utils.rst +++ b/docs/api/robomimic.utils.rst @@ -52,14 +52,6 @@ robomimic.utils.loss\_utils module :undoc-members: :show-inheritance: -robomimic.utils.macros module ------------------------------ - -.. automodule:: robomimic.utils.macros - :members: - :undoc-members: - :show-inheritance: - robomimic.utils.obs\_utils module --------------------------------- diff --git a/docs/conf.py b/docs/conf.py index 7402a994..59eff968 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -59,17 +59,17 @@ # General information about the project. project = 'robomimic' -copyright = '2022, Ajay Mandlekar, Danfei Xu, Josiah Wong, Soroush Nasiriany, Chen Wang' -author = 'Ajay Mandlekar, Danfei Xu, Josiah Wong, Soroush Nasiriany, Chen Wang' +copyright = 'the robomimic core team, 2023' +author = 'the robomimic core team' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = robomimic.__version__ +version = (".").join(robomimic.__version__.split(".")[:-1]) # The full version, including alpha/beta/rc tags. -release = robomimic.__version__ +release = (".").join(robomimic.__version__.split(".")[:-1]) # The language for content autogenerated by Sphinx. Refer to documentation @@ -98,6 +98,7 @@ # a list of builtin themes. # html_theme = 'sphinx_book_theme' +html_logo = "robomimic_logo.png" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/docs/datasets/d4rl.md b/docs/datasets/d4rl.md index ba19e82a..9e7165f5 100644 --- a/docs/datasets/d4rl.md +++ b/docs/datasets/d4rl.md @@ -9,9 +9,9 @@ Use `convert_d4rl.py` in the `scripts/conversion` folder to automatically downlo ```sh # by default, download to robomimic/datasets -$ python convert_d4rl.py --env walker2d-medium-expert-v0 +$ python convert_d4rl.py --env walker2d-medium-expert-v2 # download to specific folder -$ python convert_d4rl.py --env walker2d-medium-expert-v0 --folder /path/to/output/folder/ +$ python convert_d4rl.py --env walker2d-medium-expert-v2 --folder /path/to/output/folder/ ``` - `--env` specifies the dataset to download @@ -25,19 +25,19 @@ No postprocessing is required, assuming the above script is run! ## D4RL Results -Below, we provide a table of results on common D4RL datasets using the algorithms included in the released codebase. We follow the convention in the TD3-BC paper, where we average results over the final 10 rollout evaluations, but we use 50 rollouts instead of 10 for each evaluation. Apart from a small handful of the halfcheetah results, the results align with those presented in the [TD3_BC paper](https://arxiv.org/abs/2106.06860). We suspect the halfcheetah results are different because we used `mujoco-py` version `2.0.2.13` in our evaluations, as opposed to `1.5` in order to be consistent with the version we were using for robosuite datasets. The results below were generated with `gym` version `0.17.3` and this `d4rl` [commit](https://github.com/rail-berkeley/d4rl/tree/9b68f31bab6a8546edfb28ff0bd9d5916c62fd1f). +Below, we provide a table of results on common D4RL datasets using the algorithms included in the released codebase. We follow the convention in the TD3-BC paper, where we average results over the final 10 rollout evaluations, but we use 50 rollouts instead of 10 for each evaluation. All results are reported on the `-v2` environment variants. Apart from a small handful of the halfcheetah results, the results align with those presented in the [TD3_BC paper](https://arxiv.org/abs/2106.06860). We suspect the halfcheetah results are different because we used `mujoco-py` version `2.1.2.14` in our evaluations, as opposed to `1.5` in order to be consistent with the version we were using for robosuite datasets. The results below were generated with `gym` version `0.24.1` and this `d4rl` [commit](https://github.com/Farama-Foundation/D4RL/tree/305676ebb2e26582d50c6518c8df39fd52dea587). -| | **BCQ** | **CQL** | **TD3-BC** | -| ----------------------------- | ------------- | ------------- | ------------- | -| **HalfCheetah-Medium** | 40.8% (4791) | 38.5% (4497) | 41.7% (4902) | -| **Hopper-Medium** | 36.9% (1181) | 30.7% (980) | 97.9% (3167) | -| **Walker2d-Medium** | 66.4% (3050) | 65.2% (2996) | 77.0% (3537) | -| **HalfCheetah-Medium-Expert** | 74.9% (9016) | 21.5% (2389) | 79.4% (9578) | -| **Hopper-Medium-Expert** | 83.8% (2708) | 111.7% (3614) | 112.2% (3631) | -| **Walker2d-Medium-Expert** | 70.2% (3224) | 77.4% (3554) | 102.0% (4683) | -| **HalfCheetah-Expert** | 94.3% (11427) | 29.2% (3342) | 95.4% (11569) | -| **Hopper-Expert** | 104.7% (3389) | 111.8% (3619) | 112.2% (3633) | -| **Walker2d-Expert** | 80.5% (3699) | 108.0% (4958) | 105.3% (4837) | +| | **BCQ** | **CQL** | **TD3-BC** | **IQL** | +| ----------------------------- | ------------- | ------------- | ------------- | ------------- | +| **HalfCheetah-Medium** | 46.8% (5535) | 46.7% (5516) | 47.9% (5664) | 45.6% (5379) | +| **Hopper-Medium** | 63.9% (2059) | 59.2% (1908) | 61.0% (1965) | 53.7% (1729) | +| **Walker2d-Medium** | 74.6% (3426) | 79.7% (3659) | 82.9% (3806) | 77.0% (3537) | +| **HalfCheetah-Medium-Expert** | 89.9% (10875) | 77.6% (9358) | 92.1% (11154) | 89.0% (10773) | +| **Hopper-Medium-Expert** | 79.5% (2566) | 62.9% (2027) | 89.7% (2900) | 110.1% (3564) | +| **Walker2d-Medium-Expert** | 98.7% (4535) | 109.0% (5007) | 111.1% (5103) | 109.7% (5037) | +| **HalfCheetah-Expert** | 92.9% (11249) | 67.7% (8126) | 94.6% (11469) | 93.3% (11304) | +| **Hopper-Expert** | 92.3% (2984) | 104.2% (3370) | 108.5% (3512) | 110.5% (3577) | +| **Walker2d-Expert** | 108.6% (4987) | 108.5% (4983) | 110.3% (5066) | 109.1% (5008) | ### Reproducing D4RL Results @@ -52,4 +52,4 @@ In order to reproduce the results above, first make sure that the `generate_pape journal={arXiv preprint arXiv:2004.07219}, year={2020} } -``` \ No newline at end of file +``` diff --git a/docs/datasets/overview.md b/docs/datasets/overview.md index ac9731e7..0f260e59 100644 --- a/docs/datasets/overview.md +++ b/docs/datasets/overview.md @@ -159,8 +159,6 @@ You can easily list the filter keys present in a dataset with the `get_dataset_i -Using filter keys during training is easy. To use the generated train-valid split, you can set `config.experiment.validate=True` so that the demos under `mask/train` are used for training, and the demos under `mask/valid` are used for validation. - -You can also use a custom filter key for training by setting `config.train.hdf5_filter_key=`. This ensures that only the demos under `mask/` are used during training. If you also set `config.experiment.validate=True`, this filter key's train-valid split will be used. - +Using filter keys during training is easy. To use the generated train-valid split, you can set `config.experiment.validate=True` to ensure that validation will run after each training epoch, and then set `config.train.hdf5_filter_key="train"` and `config.train.hdf5_validation_filter_key="valid"` so that the demos under `mask/train` are used for training, and the demos under `mask/valid` are used for validation. +You can also use a custom filter key for training by setting `config.train.hdf5_filter_key=`. This ensures that only the demos under `mask/` are used during training. You can also specify a custom filter key for validation by setting `config.train.hdf5_validation_filter_key`. diff --git a/docs/datasets/robomimic_v0.1.md b/docs/datasets/robomimic_v0.1.md index d8134ec9..43e2b047 100644 --- a/docs/datasets/robomimic_v0.1.md +++ b/docs/datasets/robomimic_v0.1.md @@ -12,8 +12,11 @@ robomimic v0.1 datasets is a large-scale, diverse collection of task demonstrati

Warning!

+ -When working with these datasets, please make sure that you have installed [robosuite](https://robosuite.ai/) from source and are on the `offline_study` branch. +Since the release of robomimic v0.3, we recommend that users install [robosuite](https://robosuite.ai/) from source and are on the `v1.4.1` branch. The `v1.4.1` branch of robosuite uses Google DeepMind's [mujoco python bindings](https://github.com/deepmind/mujoco) which replaces the now deprecated [mujoco-py](https://github.com/openai/mujoco-py). + +Note that the CoRL 2021 datasets relied on the mujoco-py based `offline_study` branch of robosuite. In this version of robomimic we provide users with datasets based on `v1.4.1`, which we found produce similar results to the original `offline_study` datasets. However if you would like to download the `offline_study` datasets, please use the `v0.2.0` branch of robomimic and consult the [v0.2 docs](https://robomimic.github.io/docs/v0.2/datasets/robomimic_v0.1.html) for instructions.
@@ -33,8 +36,8 @@ $ python download_datasets.py $ python download_datasets.py --tasks sim --dataset_types ph --hdf5_types low_dim --dry_run $ python download_datasets.py --tasks sim --dataset_types ph --hdf5_types low_dim -# download all low-dim and image multi-human datasets for the can and square tasks -$ python download_datasets.py --tasks can square --dataset_types mh --hdf5_types low_dim image +# download all low-dim multi-human datasets for the can and square tasks +$ python download_datasets.py --tasks can square --dataset_types mh --hdf5_types low_dim # download the sparse reward machine-generated low-dim datasets $ python download_datasets.py --tasks all --dataset_types mg --hdf5_types low_dim_sparse @@ -61,9 +64,8 @@ We also provide direct download links for each hdf5 dataset (the download links | **Lift
(PH)** | **Can
(PH)** | **Square
(PH)** | **Transport
(PH)** | **Tool Hang
(PH)** | | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | | ![lift](../images/lift.png) | ![can](../images/can.png) | ![square](../images/square.png) | ![transport](../images/transport.png) | ![tool_hang](../images/tool_hang.png) | -| [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/ph/demo.hdf5)
(21 MB) | [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/ph/demo.hdf5)
(45 MB) | [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/square/ph/demo.hdf5)
(49 MB) | [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/transport/ph/demo.hdf5)
(185 MB) | [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/tool_hang/ph/demo.hdf5)
(127 MB) | -| [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/ph/low_dim.hdf5)
(18 MB) | [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/ph/low_dim.hdf5)
(44 MB) | [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/square/ph/low_dim.hdf5)
(48 MB) | [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/transport/ph/low_dim.hdf5)
(296 MB) | [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/tool_hang/ph/low_dim.hdf5)
(193 MB) | -| [image](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/ph/image.hdf5)
(801 MB) | [image](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/ph/image.hdf5)
(1.9 GB) | [image](http://downloads.cs.stanford.edu/downloads/rt_benchmark/square/ph/image.hdf5)
(2.5 GB) | [image](http://downloads.cs.stanford.edu/downloads/rt_benchmark/transport/ph/image.hdf5)
(16 GB) | [image](http://downloads.cs.stanford.edu/downloads/rt_benchmark/tool_hang/ph/image.hdf5)
(63 GB) | +| [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/ph/demo_v141.hdf5) | [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/ph/demo_v141.hdf5) | [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/square/ph/demo_v141.hdf5) | [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/transport/ph/demo_v141.hdf5) | [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/tool_hang/ph/demo_v141.hdf5) | +| [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/ph/low_dim_v141.hdf5) | [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/ph/low_dim_v141.hdf5) | [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/square/ph/low_dim_v141.hdf5) | [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/transport/ph/low_dim_v141.hdf5) | [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/tool_hang/ph/low_dim_v141.hdf5) | | **Lift Real
(PH)** | **Can Real
(PH)** | **Tool Hang Real
(PH)** | | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | @@ -86,10 +88,8 @@ We also provide direct download links for each hdf5 dataset (the download links | **Lift
(MH)** | **Can
(MH)** | **Square
(MH)** | **Transport
(MH)** | | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | | ![lift](../images/lift.png) | ![can](../images/can.png) | ![square](../images/square.png) | ![transport](../images/transport.png) | -| [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/mh/demo.hdf5)
(20 MB) | [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/mh/demo.hdf5)
(51 MB) | [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/square/mh/demo.hdf5)
(45 MB) | [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/transport/mh/demo.hdf5)
(212 MB) | -| [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/mh/low_dim.hdf5)
(46 MB) | [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/mh/low_dim.hdf5)
(108 MB) | [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/square/mh/low_dim.hdf5)
(119 MB) | [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/transport/mh/low_dim.hdf5)
(609 MB) | -| [image](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/mh/image.hdf5)
(2.6 GB) | [image](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/mh/image.hdf5)
(5.1 GB) | [image](http://downloads.cs.stanford.edu/downloads/rt_benchmark/square/mh/image.hdf5)
(6.5 GB) | [image](http://downloads.cs.stanford.edu/downloads/rt_benchmark/transport/mh/image.hdf5)
(32 GB) | - +| [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/mh/demo_v141.hdf5) | [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/mh/demo_v141.hdf5) | [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/square/mh/demo_v141.hdf5) | [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/transport/mh/demo_v141.hdf5) | +| [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/mh/low_dim_v141.hdf5) | [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/mh/low_dim_v141.hdf5) | [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/square/mh/low_dim_v141.hdf5) | [low_dim](http://downloads.cs.stanford.edu/downloads/rt_benchmark/transport/mh/low_dim_v141.hdf5) |

@@ -107,11 +107,9 @@ We also provide direct download links for each hdf5 dataset (the download links | **Lift
(MG)** | **Can
(MG)** | | :----------------------------------------------------------: | :----------------------------------------------------------: | | ![lift](../images/lift.png) | ![can](../images/can.png) | -| [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/mg/demo.hdf5)
(96 MB) | [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/mg/demo.hdf5)
(457 MB) | -| [low_dim (sparse)](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/mg/low_dim_sparse.hdf5)
(303 MB) | [low_dim (sparse)](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/mg/low_dim_sparse.hdf5)
(1.1 GB) | -| [low_dim (dense)](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/mg/low_dim_dense.hdf5)
(303 MB) | [low_dim (dense)](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/mg/low_dim_dense.hdf5)
(1.1 GB) | -| [image (sparse)](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/mg/image_sparse.hdf5)
(19 GB) | [image (sparse)](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/mg/image_sparse.hdf5)
(48 GB) | -| [image (dense)](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/mg/image_dense.hdf5)
(19 GB) | [image (dense)](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/mg/image_dense.hdf5)
(48 GB) | +| [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/mg/demo_v141.hdf5) | [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/mg/demo_v141.hdf5) | +| [low_dim (sparse)](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/mg/low_dim_sparse_v141.hdf5) | [low_dim (sparse)](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/mg/low_dim_sparse_v141.hdf5) | +| [low_dim (dense)](http://downloads.cs.stanford.edu/downloads/rt_benchmark/lift/mg/low_dim_dense_v141.hdf5) | [low_dim (dense)](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/mg/low_dim_dense_v141.hdf5) |

@@ -128,9 +126,8 @@ We also provide direct download links for each hdf5 dataset (the download links | **Can Paired** | | :----------------------------------------------------------: | | can_paired | -| [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/demo.hdf5)
(39 MB) | -| [low_dim (sparse)](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/low_dim.hdf5)
(39 MB) | -| [image (sparse)](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/image.hdf5)
(1.7 GB) | +| [raw](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/demo_v141.hdf5) | +| [low_dim (sparse)](http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/low_dim_v141.hdf5) |

@@ -138,7 +135,7 @@ We also provide direct download links for each hdf5 dataset (the download links ## Postprocessing If a **low_dim** or **image** dataset was downloaded, the dataset works out of the box! No postprocessing is needed. -If a **raw** dataset was downloaded, the dataset must be postprocessed since there are no observations stored. You must run `dataset_states_to_obs.py`. For more information, see [this page](robosuite.html#extracting-observations-from-mujoco-states). +If a **raw** dataset was downloaded, the dataset must be postprocessed since there are no observations stored. To extract observations, please see the `extract_obs_from_raw_datasets.sh` script. ## Info @@ -191,7 +188,7 @@ python /path/to/robomimic/scripts/train.py --config /path/to/robomimic/exps/pape

Want to Run Experiments on Custom Observations?

-We provide the raw (observation-free) `demo.hdf5` datasets so that you can generate your own custom set of observations, such as additional camera viewpoints. For information, see [Extracting Observations from Datasets](robosuite.md#extracting-observations-from-mujoco-states). +We provide the raw (observation-free) `demo_v141.hdf5` datasets so that you can generate your own custom set of observations, such as additional camera viewpoints. For information, see [Extracting Observations from Datasets](robosuite.md#extracting-observations-from-mujoco-states). **NOTE**: To compare against how our paper's released datasets were generated, please see the `extract_obs_from_raw_datasets.sh` script. @@ -227,13 +224,13 @@ $ python generate_paper_configs.py --output_dir /tmp/experiment_results # the training command can be found in robomimic/exps/paper/core.sh # Training results can be viewed at /tmp/experiment_results (--output_dir when generating paper configs). -$ python train.py --config ../exps/paper/core/lift/ph/low_dim/bc.json +$ python train.py --config ../exps/paper/core/lift/ph/low_dim/bc_rnn.json ``` ## Citation -```sh -@inproceedings{mandlekar2021matters, +```bibtex +@inproceedings{robomimic2021, title={What Matters in Learning from Offline Human Demonstrations for Robot Manipulation}, author={Mandlekar, Ajay and Xu, Danfei and Wong, Josiah and Nasiriany, Soroush and Wang, Chen and Kulkarni, Rohun and Fei-Fei, Li and Savarese, Silvio and Zhu, Yuke and Mart{\'\i}n-Mart{\'\i}n, Roberto}, booktitle={5th Annual Conference on Robot Learning}, diff --git a/docs/datasets/robosuite.md b/docs/datasets/robosuite.md index 91a66f4a..774b17ee 100644 --- a/docs/datasets/robosuite.md +++ b/docs/datasets/robosuite.md @@ -67,6 +67,12 @@ $ python dataset_states_to_obs.py --dataset /path/to/demo.hdf5 --output_name ima # Using dense rewards $ python dataset_states_to_obs.py --dataset /path/to/demo.hdf5 --output_name image_dense.hdf5 --done_mode 2 --dense --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 +# (space saving option) extract 84x84 image observations with compression and without +# extracting next obs (not needed for pure imitation learning algos) +python dataset_states_to_obs.py --dataset /path/to/demo.hdf5 --output_name image.hdf5 \ + --done_mode 2 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 \ + --compress --exclude-next-obs + # Only writing done at the end of the trajectory $ python dataset_states_to_obs.py --dataset /path/to/demo.hdf5 --output_name image_done_1.hdf5 --done_mode 1 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 @@ -74,11 +80,19 @@ $ python dataset_states_to_obs.py --dataset /path/to/demo.hdf5 --output_name ima $ python dataset_states_to_obs.py --help ``` +
+

Saving storage space

+ +Image datasets can be quite large in terms of storage, but we also offer two flags that might be useful to save on storage. First, the `--compress` flag will run lossless compression on the extracted observations, resulting in datasets that are up to 5x smaller in storage (in our testing). However, training will be marginally slower due to uncompression costs when loading batches. Second, the `--exclude-next-obs` will exclude the `next_obs` keys per trajectory, since they are not needed for imitation learning algorithms like `BC` and `BC-RNN`. + +In our testing, enabling both flags reduced the Square (PH) Image dataset size from 2.5 GB to 307 MB at the cost of increasing BC-RNN training time from 7 hours to 8.5 hours. +
+ ## Citation ```sh @article{zhu2020robosuite, title={robosuite: A modular simulation framework and benchmark for robot learning}, - author={Zhu, Yuke and Wong, Josiah and Mandlekar, Ajay and Mart{\'\i}n-Mart{\'\i}n, Roberto}, + author={Zhu, Yuke and Wong, Josiah and Mandlekar, Ajay and Mart{\'\i}n-Mart{\'\i}n, Roberto and Joshi, Abhishek and Nasiriany, Soroush and Zhu, Yifeng}, journal={arXiv preprint arXiv:2009.12293}, year={2020} } diff --git a/docs/images/core_features.png b/docs/images/core_features.png index 3251fcf8..f2da5f18 100644 Binary files a/docs/images/core_features.png and b/docs/images/core_features.png differ diff --git a/docs/index.rst b/docs/index.rst index e8c60ab2..dc2dc316 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -43,6 +43,7 @@ Welcome to robomimic's documentation! tutorials/dataset_contents tutorials/using_pretrained_models tutorials/observations + tutorials/pretrained_representations tutorials/custom_algorithms tutorials/tensor_collections @@ -74,6 +75,12 @@ Welcome to robomimic's documentation! miscellaneous/acknowledgments miscellaneous/references +.. toctree:: + :maxdepth: 1 + :caption: Previous Versions + + versions/v0.2 + Indices and tables ================== diff --git a/docs/introduction/getting_started.md b/docs/introduction/getting_started.md index 0279cc6b..122bf9c9 100644 --- a/docs/introduction/getting_started.md +++ b/docs/introduction/getting_started.md @@ -1,5 +1,9 @@ # Getting Started +## Quickstart colab notebook + +If you prefer to test the basic functionality of robomimic without installing anything locally, try the quickstart [Colab notebook](https://colab.research.google.com/drive/1b62r_km9pP40fKF0cBdpdTO2P_2eIbC6?usp=sharing). + ## Running experiments We begin with a quick tutorial on downloading datasets and running experiments. @@ -15,14 +19,14 @@ Download the robosuite **Lift (PH)** dataset (see [this link](../datasets/robomi $ python robomimic/scripts/download_datasets.py --tasks lift --dataset_types ph ``` -The dataset can be found at `datasets/lift/ph/low_dim.hdf5` +The dataset can be found at `datasets/lift/ph/low_dim_v141.hdf5` ### Step 2: Launch experiment Now, we will run an experiment using `train.py`. In this case we would like to run behavior cloning (BC) for the lift dataset we just downloaded. ```sh -$ python robomimic/scripts/train.py --config robomimic/exps/templates/bc.json --dataset datasets/lift/ph/low_dim.hdf5 --debug +$ python robomimic/scripts/train.py --config robomimic/exps/templates/bc.json --dataset datasets/lift/ph/low_dim_v141.hdf5 --debug ```
@@ -35,7 +39,7 @@ Make sure to add the `--debug` flag to your experiments as a sanity check that y

Warning!

-This example [requires robosuite](./installation.html#robosuite) to be installed (under the `offline_study` branch), but it can be run without robosuite by disabling rollouts in `robomimic/exps/templates/bc.json`: simply change the `experiment.rollout.enabled` flag to `false`. +This example [requires robosuite](./installation.html#robosuite) to be installed (under the `v1.4.1` branch), but it can be run without robosuite by disabling rollouts in `robomimic/exps/templates/bc.json`: simply change the `experiment.rollout.enabled` flag to `false`.
@@ -90,4 +94,4 @@ Please refer to the remaining documentation sections. Some helpful suggestions o - [Running Hyperparameter Scans](../tutorials/hyperparam_scan.html) - [Overview of Datasets](../datasets/overview.html) - [Dataset Contents and Visualization](../tutorials/dataset_contents.html) -- [Overview of Modules](../modules/overview.html) \ No newline at end of file +- [Overview of Modules](../modules/overview.html) diff --git a/docs/introduction/implemented_algorithms.md b/docs/introduction/implemented_algorithms.md index 60cd04d5..30a26c8f 100644 --- a/docs/introduction/implemented_algorithms.md +++ b/docs/introduction/implemented_algorithms.md @@ -11,6 +11,10 @@ - Behavioral Cloning with an RNN network. Implemented in the `BC_RNN` and `BC_RNN_GMM` (recurrent GMM policy) classes in `algo/bc.py`. +### BC-Transformer + +- Behavioral Cloning with an Transformer network. Implemented in the `BC_Transformer` and `BC_Transformer_GMM` (transformer GMM policy) classes in `algo/bc.py`. + ### HBC - Hierarchical Behavioral Cloning - the implementation is largely based off of [this paper](https://arxiv.org/abs/2003.06085). Implemented in the `HBC` class in `algo/hbc.py`. @@ -29,6 +33,10 @@ - A recent batch offline RL algorithm from [this paper](https://arxiv.org/abs/2006.04779). Implemented in the `CQL` class in `algo/cql.py`. +### IQL + +- A recent batch offline RL algorithm from [this paper](https://arxiv.org/abs/2110.06169). Implemented in the `IQL` class in `algo/iql.py`. + ### TD3-BC - A recent algorithm from [this paper](https://arxiv.org/abs/2106.06860). We implemented it as an example (see section below on building your own algorithm). Implemented in the `TD3_BC` class in `algo/td3_bc.py`. diff --git a/docs/introduction/installation.md b/docs/introduction/installation.md index 3480a900..bc7b022a 100644 --- a/docs/introduction/installation.md +++ b/docs/introduction/installation.md @@ -3,7 +3,7 @@ ## Requirements - Mac OS X or Linux machine -- Python >= 3.6 (recommended 3.7.9) +- Python >= 3.6 (recommended 3.8.0) - [conda](https://www.anaconda.com/products/individual) - [virtualenv](https://virtualenv.pypa.io/en/latest/) is also an acceptable alternative, but we assume you have conda installed in our examples below @@ -13,7 +13,7 @@

1. Create and activate conda environment

```sh -$ conda create -n robomimic_venv python=3.7.9 +$ conda create -n robomimic_venv python=3.8.0 $ conda activate robomimic_venv ``` @@ -31,7 +31,7 @@ $ conda activate robomimic_venv ```sh # Can change pytorch, torchvision versions # We don't install cudatoolkit since Mac does not have NVIDIA GPU -$ conda install pytorch==1.6.0 torchvision==0.7.0 -c pytorch +$ conda install pytorch==2.0.0 torchvision==0.15.1 -c pytorch ```

@@ -43,7 +43,7 @@ $ conda install pytorch==1.6.0 torchvision==0.7.0 -c pytorch ```sh # Can change pytorch, torchvision versions -$ conda install pytorch==1.6.0 torchvision==0.7.0 cudatoolkit=10.2 -c pytorch +$ conda install pytorch==2.0.0 torchvision==0.15.1 -c pytorch ```

@@ -115,20 +115,20 @@ OR $ pip install robosuite ``` -**(Optional)** to use our released datasets and reproduce our experiments, switch to our `offline_study` branch (requires installing robosuite from source): +**(Optional)** to use our released datasets and reproduce our experiments, switch to our `v1.4.1` branch (requires installing robosuite from source): ```sh -git checkout offline_study +git checkout v1.4.1 ``` -
+

@@ -177,6 +177,7 @@ $ cd /docs $ make clean $ make apidoc $ make html +$ cp -r images _build/html/ ``` There should be a generated `_build` folder - navigate to `_build/html/` and open `index.html` in a web browser to view the documentation. \ No newline at end of file diff --git a/docs/introduction/overview.md b/docs/introduction/overview.md index cd14129b..f71a6aa3 100644 --- a/docs/introduction/overview.md +++ b/docs/introduction/overview.md @@ -122,6 +122,7 @@ Support for learning both low-dimensional and visuomotor policies, diverse netwo 4. **Flexible Experiment Workflow** Utilities for running hyperparameter sweeps, visualizing demonstration data and trained policies, and collecting new datasets using trained policies --> + ## Reproducing benchmarks The robomimic framework also makes reproducing the results from different benchmarks and datasets easy. See the [datasets page](../datasets/overview.html) for more information on downloading datasets and reproducing experiments. @@ -148,4 +149,4 @@ Please cite [this paper](https://arxiv.org/abs/2108.03298) if you use this frame booktitle={Conference on Robot Learning (CoRL)}, year={2021} } -``` \ No newline at end of file +``` diff --git a/docs/miscellaneous/acknowledgments.md b/docs/miscellaneous/acknowledgments.md index c4982bc3..858b16e1 100644 --- a/docs/miscellaneous/acknowledgments.md +++ b/docs/miscellaneous/acknowledgments.md @@ -15,6 +15,7 @@ We would like to thank members of the [Stanford PAIR Group](http://pair.stanford - The [BCQ](https://github.com/sfujim/BCQ), [CQL](https://github.com/aviralkumar2907/CQL), and [TD3-BC](https://github.com/sfujim/TD3_BC) author-provided implementations were used as a reference for our implementations. - The `TanhWrappedDistribution` class in `models/distributions.py` was adapted from [rlkit](TanhWrappedDistribution). - Support for training distributional critics (see `BCQ_Distributional` in `algos/bcq.py`) was adapted from [Acme](https://github.com/deepmind/acme). It also served as a useful reference for implementing Gaussian Mixture Model (GMM) policies. +- Our transformer implementation was adapted from the excellent [minGPT](https://github.com/karpathy/minGPT) codebase. We wholeheartedly welcome the community to contribute to our project through issues and pull requests. New contributors will be added to the list above. diff --git a/docs/miscellaneous/contributing.md b/docs/miscellaneous/contributing.md index b412b3b2..3833c349 100644 --- a/docs/miscellaneous/contributing.md +++ b/docs/miscellaneous/contributing.md @@ -45,6 +45,7 @@ We also list additional suggested contributing guidelines that we adhered to dur - Prefer `torch.expand` over `torch.repeat` wherever possible, for memory efficiency. See [this link](https://discuss.pytorch.org/t/expand-vs-repeat-semantic-difference/59789) for more details. +- When implementing new configs that specify kwargs that will be unpacked by a downstream python class (for example, the property `self.observation.encoder.rgb.core_kwargs` in the `BaseConfig` class, which is fed to the class specified by `self.observation.encoder.rgb.core_class`), the default config class should specify an empty config object (essentially an empty dictionary) for the kwargs. This is to make sure that external config jsons will be able to completely override both the class and the kwargs without worrying about existing default kwargs that could break the initialization of the class. For example, while the default `VisualCore` class takes a kwarg called `feature_dimension`, another class may not take this argument. If this kwarg already existed in the base config, the external json will just add additional kwargs. We look forward to your contributions. Thanks! diff --git a/docs/miscellaneous/references.md b/docs/miscellaneous/references.md index 4b654153..5cefac18 100644 --- a/docs/miscellaneous/references.md +++ b/docs/miscellaneous/references.md @@ -2,8 +2,37 @@ A list of projects and papers that use **robomimic**. If you would like to add your work to this list, please send the paper or project information to Ajay Mandlekar (). +## 2023 + +- [Imitating Task and Motion Planning with Visuomotor Transformers](https://arxiv.org/abs/2305.16309) Murtaza Dalal, Ajay Mandlekar\*, Caelan Garrett\*, Ankur Handa, Ruslan Salakhutdinov, Dieter Fox +- [Data Quality in Imitation Learning](https://arxiv.org/abs/2306.02437) Suneel Belkhale, Yuchen Cui, Dorsa Sadigh +- [Coherent Soft Imitation Learning](https://arxiv.org/abs/2305.16498) Joe Watson, Sandy H. Huang, Nicolas Heess +- [Inverse Preference Learning: Preference-based RL without a Reward Function](https://arxiv.org/abs/2305.15363) Joey Hejna, Dorsa Sadigh +- [Sequence Modeling is a Robust Contender for Offline Reinforcement Learning] Prajjwal Bhargava, Rohan Chitnis, Alborz Geramifard, Shagun Sodhani, Amy Zhang +- [Diffusion Co-Policy for Synergistic Human-Robot Collaborative Tasks](https://arxiv.org/abs/2305.12171) Eley Ng, Ziang Liu, Monroe Kennedy III +- [Zero-shot Preference Learning for Offline RL via Optimal Transport](https://arxiv.org/abs/2306.03615) Runze Liu, Yali Du, Fengshuo Bai, Jiafei Lyu, Xiu Li +- [Seeing the Pose in the Pixels: Learning Pose-Aware Representations in Vision Transformers](https://arxiv.org/abs/2306.09331) Dominick Reilly, Aman Chadha, Srijan Das +- [Get Back Here: Robust Imitation by Return-to-Distribution Planning](https://arxiv.org/abs/2305.01400) Geoffrey Cideron, Baruch Tabanpour, Sebastian Curi, Sertan Girgin, Leonard Hussenot, Gabriel Dulac-Arnold, Matthieu Geist, Olivier Pietquin, Robert Dadashi +- [Preference Transformer: Modeling Human Preferences using Transformers for RL](https://arxiv.org/abs/2303.00957) Changyeon Kim, Jongjin Park, Jinwoo Shin, Honglak Lee, Pieter Abbeel, Kimin Lee +- [MimicPlay: Long-Horizon Imitation Learning by Watching Human Play](https://arxiv.org/abs/2302.12422) Chen Wang, Linxi Fan, Jiankai Sun, Ruohan Zhang, Li Fei-Fei, Danfei Xu, Yuke Zhu, Anima Anandkumar +- [Diffusion Policy: Visuomotor Policy Learning via Action Diffusion](https://arxiv.org/abs/2303.04137) Cheng Chi, Siyuan Feng, Yilun Du, Zhenjia Xu, Eric Cousineau, Benjamin Burchfiel, Shuran Song +- [ORBIT: A Unified Simulation Framework for Interactive Robot Learning Environments](https://arxiv.org/abs/2301.04195) Mayank Mittal, Calvin Yu, Qinxi Yu, Jingzhou Liu, Nikita Rudin, David Hoeller, Jia Lin Yuan, Pooria Poorsarvi Tehrani, Ritvik Singh, Yunrong Guo, Hammad Mazhar, Ajay Mandlekar, Buck Babich, Gavriel State, Marco Hutter, Animesh Garg +- [PLEX: Making the Most of the Available Data for Robotic Manipulation Pretraining](https://arxiv.org/abs/2303.08789) Garrett Thomas, Ching-An Cheng, Ricky Loynd, Vibhav Vineet, Mihai Jalobeanu, Andrey Kolobov +- [Behavior Retrieval: Few-Shot Imitation Learning by Querying Unlabeled Datasets](https://arxiv.org/abs/2304.08742) Maximilian Du, Suraj Nair, Dorsa Sadigh, Chelsea Finn +- [Mind the Gap: Offline Policy Optimization for Imperfect Rewards](https://arxiv.org/abs/2302.01667) Jianxiong Li, Xiao Hu, Haoran Xu, Jingjing Liu, Xianyuan Zhan, Qing-Shan Jia, Ya-Qin Zhang + ## 2022 +- [Learning and Retrieval from Prior Data for Skill-based Imitation Learning](https://arxiv.org/abs/2210.11435) Soroush Nasiriany, Tian Gao, Ajay Mandlekar, Yuke Zhu +- [VIOLA: Imitation Learning for Vision-Based Manipulation with Object Proposal Priors](https://arxiv.org/abs/2210.11339) Yifeng Zhu, Abhishek Joshi, Peter Stone, Yuke Zhu +- [Robot Learning on the Job: Human-in-the-Loop Autonomy and Learning During Deployment](https://arxiv.org/abs/2211.08416) Huihan Liu, Soroush Nasiriany, Lance Zhang, Zhiyao Bao, Yuke Zhu +- [Data-Efficient Pipeline for Offline Reinforcement Learning with Limited Data](https://arxiv.org/abs/2210.08642) Allen Nie, Yannis Flet-Berliac, Deon R. Jordan, William Steenbergen, Emma Brunskill +- [Eliciting Compatible Demonstrations for Multi-Human Imitation Learning](https://arxiv.org/abs/2210.08073) Kanishk Gandhi, Siddharth Karamcheti, Madeline Liao, Dorsa Sadigh +- [Masked Imitation Learning: Discovering Environment-Invariant Modalities in Multimodal Demonstrations](https://arxiv.org/abs/2209.07682) Yilun Hao, Ruinan Wang, Zhangjie Cao, Zihan Wang, Yuchen Cui, Dorsa Sadigh +- [Know Your Boundaries: The Necessity of Explicit Behavioral Cloning in Offline RL](https://arxiv.org/abs/2206.00695) Wonjoon Goo, Scott Niekum +- [HEETR: Pretraining for Robotic Manipulation on Heteromodal Data](https://openreview.net/forum?id=1_XARk3k-M) Garrett Thomas, Andrey Kolobov, Ching-An Cheng, Vibhav Vineet, Mihai Jalobeanu +- [Translating Robot Skills: Learning Unsupervised Skill Correspondences Across Robots](https://proceedings.mlr.press/v162/shankar22a.html) Tanmay Shankar, Yixin Lin, Aravind Rajeswaran, Vikash Kumar, Stuart Anderson, Jean Oh +- [Active Predicting Coding: Brain-Inspired Reinforcement Learning for Sparse Reward Robotic Control Problems](https://arxiv.org/abs/2209.09174) Alexander Ororbia, Ankur Mali - [Imitation Learning by Estimating Expertise of Demonstrators](https://arxiv.org/abs/2202.01288) Mark Beliaev, Andy Shih, Stefano Ermon, Dorsa Sadigh, Ramtin Pedarsani ## 2021 diff --git a/docs/miscellaneous/team.md b/docs/miscellaneous/team.md index 31179139..16b784e0 100644 --- a/docs/miscellaneous/team.md +++ b/docs/miscellaneous/team.md @@ -4,11 +4,12 @@ These people contributed directly to the codebase and helped build it. -- [Ajay Mandlekar](http://web.stanford.edu/~amandlek/) -- [Danfei Xu](https://cs.stanford.edu/~danfei/) +- [Ajay Mandlekar](https://ai.stanford.edu/~amandlek/) +- [Danfei Xu](https://faculty.cc.gatech.edu/~danfei/) - [Josiah Wong](https://www.jowo.me/about) - [Soroush Nasiriany](http://snasiriany.me/) - [Chen Wang](http://www.chenwangjeremy.net/) +- [Matthew Bronars](https://bronars.github.io/) ## Mentors diff --git a/docs/miscellaneous/troubleshooting.md b/docs/miscellaneous/troubleshooting.md index e5e1e8ca..10e54e00 100644 --- a/docs/miscellaneous/troubleshooting.md +++ b/docs/miscellaneous/troubleshooting.md @@ -12,4 +12,4 @@ This section contains known issues that are either minor, or that will be patche - `PrintLogger` breaks if you use `embed()` with a new-ish IPython installation. The current workaround is to use an old version. Known working version is `ipython==5.8.0` -- The `test_scripts` tests will fail if `robosuite` is not on the `offline_study` branch with the following error: `No "site" with name gripper0_ee_x exists.`. This is because the test hdf5 was collected on that branch -- switching to that branch should make the test pass. \ No newline at end of file +- On robomimic v0.2, the `test_scripts` tests will fail if `robosuite` is not on the `offline_study` branch with the following error: `No "site" with name gripper0_ee_x exists.`. This is because the test hdf5 was collected on that branch -- switching to that branch should make the test pass. \ No newline at end of file diff --git a/docs/model_zoo/robomimic_v0.1.md b/docs/model_zoo/robomimic_v0.1.md index 8021a094..bb58be13 100644 --- a/docs/model_zoo/robomimic_v0.1.md +++ b/docs/model_zoo/robomimic_v0.1.md @@ -12,7 +12,7 @@ See the ["Using Pretrained Models"](../tutorials/using_pretrained_models.html) t

Warning: use correct robosuite branch!

-When using these trained models, please make sure that robosuite is on the `offline_study` branch of robosuite. +When using these trained models, please make sure that robosuite is on the `offline_study` branch of robosuite. Consult the [v0.2 docs](https://robomimic.github.io/docs/v0.2/datasets/robomimic_v0.1.html) for more information.
diff --git a/docs/modules/dataset.md b/docs/modules/dataset.md index 8a7871ee..6c65829d 100644 --- a/docs/modules/dataset.md +++ b/docs/modules/dataset.md @@ -19,8 +19,10 @@ dataset = SequenceDataset( "rewards", "dones", ), - seq_length=10, # length-10 temporal sequences + seq_length=10, # length of sub-sequence to fetch: (s_{t}, a_{t}), (s_{t+1}, a_{t+1}), ..., (s_{t+9}, a_{t+9}) + frame_stack=1, # length of sub-sequence to prepend pad_seq_length=True, # pad last obs per trajectory to ensure all sequences are sampled + pad_frame_stack=True, # pad first obs per trajectory to ensure all sequences are sampled hdf5_cache_mode="all", # cache dataset in memory to avoid repeated file i/o hdf5_normalize_obs=False, filter_by_attribute=None, # can optionally provide a filter key here @@ -34,9 +36,13 @@ dataset = SequenceDataset( - `dataset_keys` - Keys of non-observation data to read from a demonstration. Typically include `actions`, `rewards`, `dones`. - `seq_length` - - Length of the demonstration sub-sequence to fetch. + - Length of demonstration sub-sequence to fetch. For example, if `seq_length = 10` at time `t`, the data loader will fetch ${(s_{t}, a_{t}), (s_{t+1}, a_{t+1}), ..., (s_{t+9}, a_{t+9})}$ +- `frame_stack` + - Length of sub-sequence to stack at the beginning of fetched demonstration. For example, if `frame_stack = 10` at time `t`, the data loader will fetch ${(s_{t-1}, a_{t-1}), (s_{t-2}, a_{t-2}), ..., (s_{t-9}, a_{t-9})}$. Note that the actual length of the fetched sequence is `frame_stack - 1`. This term is useful when training a model to predict `seq_length` actions from `frame_stack` observations. If training a transformer, this should be the same as context length. - `pad_seq_length` - Whether to allow fetching subsequence that ends beyond the sequence. For example, given a demo of length 10 and `seq_length=10`, setting `pad_seq_length=True` allows the dataset object to access subsequence at `__get_item(index=5)__` by repeating the last frame 5 times. +- `pad_frame_stack` + - Whether to allow fetching subsequence that starts before the first time step. For example, given a demo of length 10 and `frame_stack=10`, setting `pad_frame_stack=True` allows the dataset object to access subsequence at `__get_item(index=5)__` by repeating the first frame 5 times. - `hdf5_cache_mode` - Optionally cache the dataset in memory for faster access. The dataset supports three caching modes: `["all", "low_dim", or None]`. - `all`: Load the entire dataset into the RAM. This mode minimizes data loading time but incurs the largest memory footprint. Recommended if the dataset is small or when working with low-dimensional observation data. diff --git a/docs/modules/models.md b/docs/modules/models.md index 1afd01f8..37d24edc 100644 --- a/docs/modules/models.md +++ b/docs/modules/models.md @@ -52,7 +52,8 @@ Below, we provide descriptions of specific EncoderCore-based classes used to enc We provide a `VisualCore` module for constructing custom vision architectures. A `VisualCore` consists of a backbone network that featurizes image input --- typically a `ConvBase` module --- and a pooling module that reduces the feature tensor into a fixed-sized vector representation. Below is a `VisualCore` built from a `ResNet18Conv` backbone and a `SpatialSoftmax` ([paper](https://rll.berkeley.edu/dsae/dsae.pdf)) pooling module. ```python -from robomimic.models.base_nets import VisualCore, ResNet18Conv, SpatialSoftmax +from robomimic.models.obs_core import VisualCore +from robomimic.models.base_nets import ResNet18Conv, SpatialSoftmax vis_net = VisualCore( input_shape=(3, 224, 224), @@ -72,9 +73,10 @@ New vision backbone and pooling classes can be added by subclassing `ConvBase`. We provide a `ScanCore` module for constructing custom range finder architectures. `ScanCore` consists of a 1D Convolution backbone network (`Conv1dBase`) that featurizes a high-dimensional 1D input, and a pooling module that reduces the feature tensor into a fixed-sized vector representation. Below is an example of a `ScanCore` network with a `SpatialSoftmax` ([paper](https://rll.berkeley.edu/dsae/dsae.pdf)) pooling module. ```python -from robomimic.models.base_nets import ScanCore, SpatialSoftmax +from robomimic.models.obs_core import ScanCore +from robomimic.models.base_nets import SpatialSoftmax -vis_net = VisualCore( +vis_net = ScanCore( input_shape=(1, 120), conv_kwargs={ "out_channels": [32, 64, 64], @@ -98,12 +100,15 @@ Randomizers are `Modules` that perturb network inputs during training, and optio `Randomizer` modules are intended to be used alongside an `ObservationEncoder` --- see the next section for more details. Additional randomizer classes can be implemented by subclassing the `Randomizer` class and implementing the necessary abstract functions. +**Visualizing randomized input:** To visualize the original and randomized image input, set `VISUALIZE_RANDOMIZER = True` in `robomimic/macros.py` ## Observation Encoder and Decoder `ObservationEncoder` and `ObservationDecoder` are basic building blocks for dealing with observation dictionary inputs and outputs. They are designed to take in multiple streams of observation modalities as input (e.g. a dictionary containing images and robot proprioception signals), and output a dictionary of predictions like actions and subgoals. Below is an example of how to manually create an `ObservationEncoder` instance by registering observation modalities with the `register_obs_key` function. ```python -from robomimic.models.obs_nets import ObservationEncoder, CropRandomizer, MLP, VisualCore, ObservationDecoder +from robomimic.models.base_nets import MLP +from robomimic.models.obs_core import VisualCore, CropRandomizer +from robomimic.models.obs_nets import ObservationEncoder, ObservationDecoder obs_encoder = ObservationEncoder(feature_activation=torch.nn.ReLU) @@ -191,13 +196,13 @@ These networks take an observation dictionary as input (and possibly additional ### ValueNetwork - A basic value network that predicts values from observations. Can optionally be goal conditioned on future observations. ### DistributionalActionValueNetwork -- Distributional Q (action-value) network that outputs a categorical distribution over a discrete grid of value atoms. See the [paper](https://arxiv.org/pdf/1707.06887.pdf for more details). +- Distributional Q (action-value) network that outputs a categorical distribution over a discrete grid of value atoms. See the [paper](https://arxiv.org/abs/1707.06887) for more details. ## Implemented VAEs The library implements a general VAE architecture and a number of prior distributions. See `robomimic/models/vae_nets.py` for complete implementations. ### VAE -A general Variational Autoencoder (VAE) implementation, as described in https://arxiv.org/abs/1312.6114. +A general Variational Autoencoder (VAE) implementation, as described in this [paper](https://arxiv.org/abs/1312.6114). Models a distribution p(X) or a conditional distribution p(X | Y), where each variable can consist of multiple modalities. The target variable X, whose distribution is modeled, is specified through the `input_shapes` argument, which is a map between modalities (strings) and expected shapes. In this way, a variable that consists of multiple kinds of data (e.g. image and flat-dimensional) can be modeled as well. A separate `output_shapes` argument is used to specify the expected reconstructions - this allows for asymmetric reconstruction (for example, reconstructing low-resolution images). diff --git a/docs/robomimic_logo.png b/docs/robomimic_logo.png new file mode 100644 index 00000000..ad3ed2ec Binary files /dev/null and b/docs/robomimic_logo.png differ diff --git a/docs/tutorials/configs.md b/docs/tutorials/configs.md index fa67a28a..519257de 100644 --- a/docs/tutorials/configs.md +++ b/docs/tutorials/configs.md @@ -19,10 +19,10 @@ Please see the [Config documentation](../modules/configs.html) for more informat The preferred way to specify training parameters is to pass a config json to the main training script `train.py` via the `--config` argument. The dataset can be specified by setting the `data` attribute of the `train` section of the config json, or specified via the `--dataset` argument. The example below runs a default template json for the BC algorithm. **This is the preferred way to launch training runs.** ```sh -$ python train.py --config ../exps/templates/bc.json --dataset ../../tests/assets/test.hdf5 +$ python train.py --config ../exps/templates/bc.json --dataset ../../tests/assets/test_v141.hdf5 ``` -Please see the [hyperparameter helper docs](./advanced.html#using-the-hyperparameter-helper-to-launch-runs) to see how to easily generate json configs for launching training runs. +Please see the [hyperparameter helper tutorial](../tutorials/hyperparam_scan.html) to see how to easily generate json configs for launching training runs. #### 2. Constructing a config object in code diff --git a/docs/tutorials/dataset_contents.md b/docs/tutorials/dataset_contents.md index 1519af4c..87f6456f 100644 --- a/docs/tutorials/dataset_contents.md +++ b/docs/tutorials/dataset_contents.md @@ -18,10 +18,10 @@ The repository offers a simple utility script (`get_dataset_info.py`) to view th - the [environment metadata](../modules/environments.html#initialize-an-environment-from-a-dataset) in the dataset, which is used to construct the same simulator environment that the data was collected on - the dataset structure for the first demonstration -Pass the `--verbose` argument to print the list of demonstration keys under each filter key, and the dataset structure for all demonstrations. An example, using the small hdf5 dataset packaged with the repository in `tests/assets/test.hdf5` is shown below. +Pass the `--verbose` argument to print the list of demonstration keys under each filter key, and the dataset structure for all demonstrations. An example, using the small hdf5 dataset packaged with the repository in `tests/assets/test_v141.hdf5` is shown below. ```sh -$ python get_dataset_info.py --dataset ../../tests/assets/test.hdf5 +$ python get_dataset_info.py --dataset ../../tests/assets/test_v141.hdf5 ```
@@ -36,7 +36,7 @@ Any user wishing to write custom code that works with robomimic datasets should

Note: These examples are compatible with any robomimic dataset.

-The examples in this section use the small hdf5 dataset packaged with the repository in `tests/assets/test.hdf5`, but you can run these examples with any robomimic hdf5 dataset. If you are using the default dataset, please make sure that robosuite is on the `offline_study` branch of robosuite -- this is necessary for the playback scripts to function properly. +The examples in this section use the small hdf5 dataset packaged with the repository in `tests/assets/test_v141.hdf5` (which requires robosuite v1.4.1), but you can run these examples with any robomimic hdf5 dataset.
@@ -44,14 +44,14 @@ Use the `playback_dataset.py` script to easily view dataset trajectories. ```sh # For the first 5 trajectories, load environment simulator states one-by-one, and render "agentview" and "robot0_eye_in_hand" cameras to video at /tmp/playback_dataset.mp4 -$ python playback_dataset.py --dataset ../../tests/assets/test.hdf5 --render_image_names agentview robot0_eye_in_hand --video_path /tmp/playback_dataset.mp4 --n 5 +$ python playback_dataset.py --dataset ../../tests/assets/test_v141.hdf5 --render_image_names agentview robot0_eye_in_hand --video_path /tmp/playback_dataset.mp4 --n 5 # Directly visualize the image observations in the dataset. This is especially useful for real robot datasets where there is no simulator to use for rendering. -$ python playback_dataset.py --dataset ../../tests/assets/test.hdf5 --use-obs --render_image_names agentview_image --video_path /tmp/obs_trajectory.mp4 +$ python playback_dataset.py --dataset ../../tests/assets/test_v141.hdf5 --use-obs --render_image_names agentview_image --video_path /tmp/obs_trajectory.mp4 # Play the dataset actions in the environment to verify that the recorded actions are reasonable. -$ python playback_dataset.py --dataset ../../tests/assets/test.hdf5 --use-actions --render_image_names agentview --video_path /tmp/playback_dataset_with_actions.mp4 +$ python playback_dataset.py --dataset ../../tests/assets/test_v141.hdf5 --use-actions --render_image_names agentview --video_path /tmp/playback_dataset_with_actions.mp4 # Visualize only the initial demonstration frames. -$ python playback_dataset.py --dataset ../../tests/assets/test.hdf5 --first --render_image_names agentview --video_path /tmp/dataset_task_inits.mp4 +$ python playback_dataset.py --dataset ../../tests/assets/test_v141.hdf5 --first --render_image_names agentview --video_path /tmp/dataset_task_inits.mp4 ``` diff --git a/docs/tutorials/hyperparam_scan.md b/docs/tutorials/hyperparam_scan.md index 43e7cb4e..1ca4b35e 100644 --- a/docs/tutorials/hyperparam_scan.md +++ b/docs/tutorials/hyperparam_scan.md @@ -22,7 +22,7 @@ Sections of the config that are not involved in the scan and that do not differ
-We modify `/tmp/gen_configs/base.json`, adding a base experiment name (`"bc_rnn_hyper"`) and specified the dataset path (`"/tmp/test.hdf5"`). +We modify `/tmp/gen_configs/base.json`, adding a base experiment name (`"bc_rnn_hyper"`) and specified the dataset path (`"/tmp/test_v141.hdf5"`). ```sh $ cat /tmp/gen_configs/base.json @@ -59,7 +59,7 @@ $ cat /tmp/gen_configs/base.json } }, "train": { - "data": "/tmp/test.hdf5", + "data": "/tmp/test_v141.hdf5", "output_dir": "../bc_trained_models", "num_data_workers": 0, "hdf5_cache_mode": "all", @@ -198,6 +198,21 @@ Only do this if you are sweeping over a single value!
+
+

wandb logging

+ +If you would like to log and view results on wandb, enable wandb logging in the hyperparameter generator: +```python +generator.add_param( + key="experiment.logging.log_wandb", + name="", + group=-1, + values=[True], +) +``` + +
+ ### Define hyperparameter scan values Now we define our scan - we could like to sweep the following: - policy learning rate in [1e-3, 1e-4] @@ -276,3 +291,10 @@ python train.py --config /tmp/gen_configs/bc_rnn_hyper_plr_0.0001_gmm_t_rnnd_100 python train.py --config /tmp/gen_configs/bc_rnn_hyper_plr_0.0001_gmm_f_rnnd_400_mlp_1024.json python train.py --config /tmp/gen_configs/bc_rnn_hyper_plr_0.0001_gmm_f_rnnd_1000_mlp_0.json ``` + +
+

Meta information

+ +For each generated config file you will find a `meta` section that contains hyperparameter names, values, and other metadata information. This `meta` section is generated automatically, and you should NOT need to edit or modify it. + +
diff --git a/docs/tutorials/pretrained_representations.md b/docs/tutorials/pretrained_representations.md new file mode 100644 index 00000000..72c66a51 --- /dev/null +++ b/docs/tutorials/pretrained_representations.md @@ -0,0 +1,42 @@ +# Pre-trained Visual Representations + +**Robomimic** supports multiple pre-trained visual representations and offers integration for adapting observation encoders to the desired pre-trained visual representation encoders. + +## Terminology + +First, let's clarify the semantic distinctions when using different pre-trained visual representations: + +- **Backbone Classes** refer to the various pre-trained visual encoders. For instance, `R3MConv` and `MVPConv` are the backbone classes for using [R3M](https://arxiv.org/abs/2203.12601) and [MVP](https://arxiv.org/abs/2203.06173) pre-trained representations, respectively. +- **Model Classes** pertain to the different sizes of the pretrained models within each selected backbone class. For example, `R3MConv` has three model classes - `resnet18`, `resnet34`, and `resnet50`, while `MVPConv` features five model classes - `vits-mae-hoi`, `vits-mae-in`, `vits-sup-in`, `vitb-mae-egosoup`, and `vitl-256-mae-egosoup`. + +## Examples + +Using pre-trained visual representations is simple. Each pre-trained encoder is defined by its `backbone_class`, `model_class`, and whether to `freeze` representations or finetune them. Please note that you may need to refer to the original library of the pre-trained representation for installation instructions. + +If you are specifying your config with code (as in `examples/train_bc_rnn.py`), the following are example code blocks for using pre-trained representations: + +```python +# R3M +config.observation.encoder.rgb.core_kwargs.backbone_class = 'R3MConv' # R3M backbone for image observations (unused if no image observations) +config.observation.encoder.rgb.core_kwargs.backbone_kwargs.r3m_model_class = 'resnet18' # R3M model class (resnet18, resnet34, resnet50) +config.observation.encoder.rgb.core_kwargs.backbone_kwargs.freeze = True # whether to freeze network during training or allow finetuning +config.observation.encoder.rgb.core_kwargs.pool_class = None # no pooling class for pretraining model + +# MVP +config.observation.encoder.rgb.core_kwargs.backbone_class = 'MVPConv' # MVP backbone for image observations (unused if no image observations) +config.observation.encoder.rgb.core_kwargs.backbone_kwargs.mvp_model_class = 'vitb-mae-egosoup' # MVP model class (vits-mae-hoi, vits-mae-in, vits-sup-in, vitb-mae-egosoup, vitl-256-mae-egosoup) +config.observation.encoder.rgb.core_kwargs.backbone_kwargs.freeze = True # whether to freeze network during training or allow finetuning +config.observation.encoder.rgb.core_kwargs.pool_class = None # no pooling class for pretraining model + +# Set data loader attributes for image observations +config.train.num_data_workers = 2 # 2 data workers for image datasets +config.train.hdf5_cache_mode = "low_dim" # only cache non-image data + +# Ensure that you are using image observation modalities, names may depend on your dataset naming convention +config.observation.modalities.obs.rgb = [ + "agentview_image", + "robot0_eye_in_hand_image" + ] +``` + +Alternatively, if you are using a config json, you can set the appropriate keys in your json. diff --git a/docs/tutorials/training_transformers.md b/docs/tutorials/training_transformers.md new file mode 100644 index 00000000..03af242a --- /dev/null +++ b/docs/tutorials/training_transformers.md @@ -0,0 +1,43 @@ +# Training Transformers + +This tutorial shows how to train a transformer policy network. + +
+

Note: Understand how to launch training runs and view results first!

+ +Before trying to train a transformer, it might be useful to read the following tutorials: +- [how to launch training runs](./configs.html) +- [how to view training results](./viewing_results.html) +- [how to launch multiple training runs efficiently](./hyperparam_scan.html) + +
+ +A template with tuned parameters for transformer based policy networks is defined in `robomimic/config/default_templates/bc_transformer.json`. + +#### 1. Using default configurations + +The easiest way to train a transformer policy network is to pass the default template json to the main training script `train.py` via the `--config` argument. The dataset can be specified by setting the `data` attribute of the `train` section of the config json, or specified via the `--dataset` argument. You may find that your data has different rollout horizon lengths, observation modalities, or other incompatibilities with the default template. In this scenario, we suggest defining custom parameters as described in (2). + +```sh +$ python train.py --config ../config/default_templates/bc_transformer.json --dataset /path/to/dataset.hdf5 +``` + +#### 2. Defining custom parameters + +If you want to modify the default transformer parameters, do not directly modify the default config (`config/bc_config.py`) or template (`config/default_templates/bc_transformer.json`). Instead, you can create a copy of `robomimic/config/default_templates/bc_transformer.json` and store it in a new directory on your computer. Set this as the base file for `scripts/hyperparam_helper.py` and define custom settings as described [here](./hyperparam_scan.html). This is particularly useful when running a sweep over hyperparameters; **it is the prefered way to launch multiple training runs**. + +Optionally, you can modify the default template in python code or directly set the appropriate keys in your copy of the config file. This code snippet below highlights useful parameters to tune for transformers. To see all transformer policy settings, refer to `config/bc_config.py`. + +```python +# make sure transformer is enabled +config.algo.transformer.enabled = True + +# useful config attributes to modify for transformers +config.algo.transformer.embed_dim = 512 # dimension for embeddings used by transformer +config.algo.transformer.num_layers = 6 # number of transformer blocks to stack +config.algo.transformer.num_heads = 8 # number of attention heads for each transformer block (should divide embed_dim evenly) +config.algo.transformer.context_length = 10 # length of (s, a) sub-seqeunces to feed to transformer +config.train.frame_stack = 10 # length of sub-sequence to observe: (s_{t-1}, a_{t-1}), (s_{t-2}, a_{t-2}), ..., (s_{t-9}, a_{t-9}) +config.train.seq_length = 1 # length of sub-seqeunce to predict: (s_{t}, a_{t}) +``` + diff --git a/docs/tutorials/viewing_results.md b/docs/tutorials/viewing_results.md index 36d972ee..682ee2d0 100644 --- a/docs/tutorials/viewing_results.md +++ b/docs/tutorials/viewing_results.md @@ -13,6 +13,9 @@ Configured under `experiment.logging`: # save tensorboard logs under `logs/tb` in experiment folder "log_tb": true + + # save wandb logs under `logs/wandb` in experiment folder + "log_wandb": true }, ``` @@ -60,6 +63,7 @@ config.json # config used for this experiment logs/ # experiment log files log.txt # terminal output tb/ # tensorboard logs + wandb/ # wandb logs videos/ # videos of robot rollouts during training models/ # saved model checkpoints ``` @@ -91,4 +95,12 @@ You may find the following logging metrics useful: - `Timing_Stats/`: time spent by the algorithm loading data, training, performing rollouts, etc. - `Train/`: training stats - `Validation/`: validation stats -- `System/RAM Usage (MB)`: system RAM used by algorithm \ No newline at end of file +- `System/RAM Usage (MB)`: system RAM used by algorithm + + +### Viewing wandb Results +You can also view results in [wandb](https://wandb.ai), similarly to tensorboard. To do so, ensure that you have set `experiment.logging.log_wandb` to True in the experiment config. + +When first logging to wandb, you will need to specify a wandb entity name, ie. the wandb account under which results will be logged. You can do so by setting `WANDB_API_KEY` to the desired wandb account name in `robomimic/macros_private.py`. Note: if this file does not exist, run `python robomimic/scripts/setup_macros.py` to setup the private macros file. + +By default all results will be logged under a wandb project labled `default`, however you can set the project name by setting `experiment.logging.wandb_proj_name` in the configs. diff --git a/docs/versions/v0.2.md b/docs/versions/v0.2.md new file mode 100644 index 00000000..72e853db --- /dev/null +++ b/docs/versions/v0.2.md @@ -0,0 +1,18 @@ +# v0.2 + +Redirecting... + + + + + + + Page Redirection + + + + If you are not redirected automatically, follow this link. + + diff --git a/examples/simple_obs_nets.py b/examples/simple_obs_nets.py index 9ef78187..236beaa8 100644 --- a/examples/simple_obs_nets.py +++ b/examples/simple_obs_nets.py @@ -7,8 +7,9 @@ from collections import OrderedDict import torch -from robomimic.models.obs_nets import ObservationEncoder, MLP, ObservationDecoder -from robomimic.models.base_nets import CropRandomizer +from robomimic.models.base_nets import MLP +from robomimic.models.obs_nets import ObservationEncoder, ObservationDecoder +from robomimic.models.obs_core import CropRandomizer import robomimic.utils.tensor_utils as TensorUtils import robomimic.utils.obs_utils as ObsUtils diff --git a/examples/simple_train_loop.py b/examples/simple_train_loop.py index 9e65d902..3e5c3c50 100644 --- a/examples/simple_train_loop.py +++ b/examples/simple_train_loop.py @@ -155,6 +155,7 @@ def run_train_loop(model, data_loader): # process batch for training input_batch = model.process_batch_for_training(batch) + input_batch = model.postprocess_batch_for_training(input_batch, obs_normalization_stats=None) # forward and backward pass info = model.train_on_batch(batch=input_batch, epoch=epoch, validate=False) diff --git a/examples/train_bc_rnn.py b/examples/train_bc_rnn.py index 205e7e46..b3ec7c51 100644 --- a/examples/train_bc_rnn.py +++ b/examples/train_bc_rnn.py @@ -21,7 +21,7 @@ import robomimic import robomimic.utils.torch_utils as TorchUtils import robomimic.utils.test_utils as TestUtils -import robomimic.utils.macros as Macros +import robomimic.macros as Macros from robomimic.config import config_factory from robomimic.scripts.train import train @@ -62,7 +62,7 @@ def robosuite_hyperparameters(config): ## evaluation rollout config ## config.experiment.rollout.enabled = True # enable evaluation rollouts config.experiment.rollout.n = 50 # number of rollouts per evaluation - config.experiment.rollout.horizon = 400 # maximum number of env steps per rollout + config.experiment.rollout.horizon = 400 # set horizon based on length of demonstrations (can be obtained with scripts/get_dataset_info.py) config.experiment.rollout.rate = 50 # do rollouts every @rate epochs config.experiment.rollout.warmstart = 0 # number of epochs to wait before starting rollouts config.experiment.rollout.terminate_on_success = True # end rollout early after task success @@ -86,7 +86,8 @@ def robosuite_hyperparameters(config): config.train.hdf5_normalize_obs = False # no obs normalization # if provided, demonstrations are filtered by the list of demo keys under "mask/@hdf5_filter_key" - config.train.hdf5_filter_key = None # by default, use no filter key + config.train.hdf5_filter_key = "train" # by default, use "train" and "valid" filter keys corresponding to train-valid split + config.train.hdf5_validation_filter_key = "valid" # fetch sequences of length 10 from dataset for RNN training config.train.seq_length = 10 @@ -132,6 +133,18 @@ def robosuite_hyperparameters(config): config.observation.encoder.rgb.core_kwargs.pool_kwargs.temperature = 1.0 # Default arguments for "SpatialSoftmax" config.observation.encoder.rgb.core_kwargs.pool_kwargs.noise_std = 0.0 # Default arguments for "SpatialSoftmax" + # if you prefer to use pre-trained visual representations, uncomment the following lines + # R3M + # config.observation.encoder.rgb.core_kwargs.backbone_class = 'R3MConv' # R3M backbone for image observations (unused if no image observations) + # config.observation.encoder.rgb.core_kwargs.backbone_kwargs.r3m_model_class = 'resnet18' # R3M model class (resnet18, resnet34, resnet50) + # config.observation.encoder.rgb.core_kwargs.backbone_kwargs.freeze = True # whether to freeze network during training or allow finetuning + # config.observation.encoder.rgb.core_kwargs.pool_class = None # no pooling class for pretraining model + # MVP + # config.observation.encoder.rgb.core_kwargs.backbone_class = 'MVPConv' # MVP backbone for image observations (unused if no image observations) + # config.observation.encoder.rgb.core_kwargs.backbone_kwargs.mvp_model_class = 'vitb-mae-egosoup' # MVP model class (vits-mae-hoi, vits-mae-in, vits-sup-in, vitb-mae-egosoup, vitl-256-mae-egosoup) + # config.observation.encoder.rgb.core_kwargs.backbone_kwargs.freeze = True # whether to freeze network during training or allow finetuning + # config.observation.encoder.rgb.core_kwargs.pool_class = None # no pooling class for pretraining model + # observation randomizer class - set to None to use no randomization, or 'CropRandomizer' to use crop randomization config.observation.encoder.rgb.obs_randomizer_class = None @@ -236,7 +249,8 @@ def momart_hyperparameters(config): config.train.hdf5_normalize_obs = False # no obs normalization # if provided, demonstrations are filtered by the list of demo keys under "mask/@hdf5_filter_key" - config.train.hdf5_filter_key = None # by default, use no filter key + config.train.hdf5_filter_key = "train" # by default, use "train" and "valid" filter keys corresponding to train-valid split + config.train.hdf5_validation_filter_key = "valid" # fetch sequences of length 10 from dataset for RNN training config.train.seq_length = 50 diff --git a/requirements.txt b/requirements.txt index 0bcab6d3..6f64af97 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ tensorboard tensorboardX imageio imageio-ffmpeg +matplotlib egl_probe>=1.0.1 torch torchvision diff --git a/robomimic/__init__.py b/robomimic/__init__.py index 5e305929..8b4d0fc5 100644 --- a/robomimic/__init__.py +++ b/robomimic/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.2.1" +__version__ = "0.3.0" # stores released dataset links and rollout horizons in global dictionary. @@ -60,46 +60,49 @@ def register_all_links(): ph_horizons = [400, 400, 400, 700, 700, 1000, 1000, 1000] for task, horizon in zip(ph_tasks, ph_horizons): register_dataset_link(task=task, dataset_type="ph", hdf5_type="raw", horizon=horizon, - link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/ph/demo.hdf5".format(task)) + link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/ph/demo{}.hdf5".format( + task, "" if "real" in task else "_v141" + ) + ) # real world datasets only have demo.hdf5 files which already contain all observation modalities # while sim datasets store raw low-dim mujoco states in the demo.hdf5 if "real" not in task: register_dataset_link(task=task, dataset_type="ph", hdf5_type="low_dim", horizon=horizon, - link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/ph/low_dim.hdf5".format(task)) + link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/ph/low_dim_v141.hdf5".format(task)) register_dataset_link(task=task, dataset_type="ph", hdf5_type="image", horizon=horizon, - link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/ph/image.hdf5".format(task)) + link=None) # all multi human datasets mh_tasks = ["lift", "can", "square", "transport"] mh_horizons = [500, 500, 500, 1100] for task, horizon in zip(mh_tasks, mh_horizons): register_dataset_link(task=task, dataset_type="mh", hdf5_type="raw", horizon=horizon, - link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mh/demo.hdf5".format(task)) + link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mh/demo_v141.hdf5".format(task)) register_dataset_link(task=task, dataset_type="mh", hdf5_type="low_dim", horizon=horizon, - link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mh/low_dim.hdf5".format(task)) + link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mh/low_dim_v141.hdf5".format(task)) register_dataset_link(task=task, dataset_type="mh", hdf5_type="image", horizon=horizon, - link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mh/image.hdf5".format(task)) + link=None) # all machine generated datasets for task, horizon in zip(["lift", "can"], [400, 400]): register_dataset_link(task=task, dataset_type="mg", hdf5_type="raw", horizon=horizon, - link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/demo.hdf5".format(task)) + link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/demo_v141.hdf5".format(task)) register_dataset_link(task=task, dataset_type="mg", hdf5_type="low_dim_sparse", horizon=horizon, - link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/low_dim_sparse.hdf5".format(task)) + link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/low_dim_sparse_v141.hdf5".format(task)) register_dataset_link(task=task, dataset_type="mg", hdf5_type="image_sparse", horizon=horizon, - link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/image_sparse.hdf5".format(task)) + link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/image_sparse_v141.hdf5".format(task)) register_dataset_link(task=task, dataset_type="mg", hdf5_type="low_dim_dense", horizon=horizon, - link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/low_dim_dense.hdf5".format(task)) + link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/low_dim_dense_v141.hdf5".format(task)) register_dataset_link(task=task, dataset_type="mg", hdf5_type="image_dense", horizon=horizon, - link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/image_dense.hdf5".format(task)) + link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/image_dense_v141.hdf5".format(task)) # can-paired dataset register_dataset_link(task="can", dataset_type="paired", hdf5_type="raw", horizon=400, - link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/demo.hdf5") + link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/demo_v141.hdf5") register_dataset_link(task="can", dataset_type="paired", hdf5_type="low_dim", horizon=400, - link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/low_dim.hdf5") + link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/low_dim_v141.hdf5") register_dataset_link(task="can", dataset_type="paired", hdf5_type="image", horizon=400, - link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/image.hdf5") + link=None) def register_momart_dataset_link(task, dataset_type, link, dataset_size): diff --git a/robomimic/algo/__init__.py b/robomimic/algo/__init__.py index a64077cc..68d70a57 100644 --- a/robomimic/algo/__init__.py +++ b/robomimic/algo/__init__.py @@ -4,6 +4,7 @@ from robomimic.algo.bc import BC, BC_Gaussian, BC_GMM, BC_VAE, BC_RNN, BC_RNN_GMM from robomimic.algo.bcq import BCQ, BCQ_GMM, BCQ_Distributional from robomimic.algo.cql import CQL +from robomimic.algo.iql import IQL from robomimic.algo.gl import GL, GL_VAE, ValuePlanner from robomimic.algo.hbc import HBC from robomimic.algo.iris import IRIS diff --git a/robomimic/algo/algo.py b/robomimic/algo/algo.py index 1a4a835f..49878ab1 100644 --- a/robomimic/algo/algo.py +++ b/robomimic/algo/algo.py @@ -201,6 +201,48 @@ def process_batch_for_training(self, batch): """ return batch + def postprocess_batch_for_training(self, batch, obs_normalization_stats): + """ + Does some operations (like channel swap, uint8 to float conversion, normalization) + after @process_batch_for_training is called, in order to ensure these operations + take place on GPU. + + Args: + batch (dict): dictionary with torch.Tensors sampled + from a data loader. Assumed to be on the device where + training will occur (after @process_batch_for_training + is called) + + obs_normalization_stats (dict or None): if provided, this should map observation + keys to dicts with a "mean" and "std" of shape (1, ...) where ... is the + default shape for the observation. + + Returns: + batch (dict): postproceesed batch + """ + + # we will search the nested batch dictionary for the following special batch dict keys + # and apply the processing function to their values (which correspond to observations) + obs_keys = ["obs", "next_obs", "goal_obs"] + + def recurse_helper(d): + """ + Apply process_obs_dict to values in nested dictionary d that match a key in obs_keys. + """ + for k in d: + if k in obs_keys: + # found key - stop search and process observation + if d[k] is not None: + d[k] = ObsUtils.process_obs_dict(d[k]) + if obs_normalization_stats is not None: + d[k] = ObsUtils.normalize_obs(d[k], obs_normalization_stats=obs_normalization_stats) + elif isinstance(d[k], dict): + # search down into dictionary + recurse_helper(d[k]) + + recurse_helper(batch) + return batch + def train_on_batch(self, batch, epoch, validate=False): """ Training on a single batch of data. diff --git a/robomimic/algo/bc.py b/robomimic/algo/bc.py index 4853f75a..c5674f05 100644 --- a/robomimic/algo/bc.py +++ b/robomimic/algo/bc.py @@ -6,6 +6,7 @@ import torch import torch.nn as nn import torch.nn.functional as F +import torch.distributions as D import robomimic.models.base_nets as BaseNets import robomimic.models.obs_nets as ObsNets @@ -38,18 +39,39 @@ def algo_config_to_class(algo_config): gmm_enabled = ("gmm" in algo_config and algo_config.gmm.enabled) vae_enabled = ("vae" in algo_config and algo_config.vae.enabled) - if algo_config.rnn.enabled: - if gmm_enabled: - return BC_RNN_GMM, {} - return BC_RNN, {} - assert sum([gaussian_enabled, gmm_enabled, vae_enabled]) <= 1 + rnn_enabled = algo_config.rnn.enabled + transformer_enabled = algo_config.transformer.enabled + if gaussian_enabled: - return BC_Gaussian, {} - if gmm_enabled: - return BC_GMM, {} - if vae_enabled: - return BC_VAE, {} - return BC, {} + if rnn_enabled: + raise NotImplementedError + elif transformer_enabled: + raise NotImplementedError + else: + algo_class, algo_kwargs = BC_Gaussian, {} + elif gmm_enabled: + if rnn_enabled: + algo_class, algo_kwargs = BC_RNN_GMM, {} + elif transformer_enabled: + algo_class, algo_kwargs = BC_Transformer_GMM, {} + else: + algo_class, algo_kwargs = BC_GMM, {} + elif vae_enabled: + if rnn_enabled: + raise NotImplementedError + elif transformer_enabled: + raise NotImplementedError + else: + algo_class, algo_kwargs = BC_VAE, {} + else: + if rnn_enabled: + algo_class, algo_kwargs = BC_RNN, {} + elif transformer_enabled: + algo_class, algo_kwargs = BC_Transformer, {} + else: + algo_class, algo_kwargs = BC, {} + + return algo_class, algo_kwargs class BC(PolicyAlgo): @@ -87,7 +109,10 @@ def process_batch_for_training(self, batch): input_batch["obs"] = {k: batch["obs"][k][:, 0, :] for k in batch["obs"]} input_batch["goal_obs"] = batch.get("goal_obs", None) # goals may not be present input_batch["actions"] = batch["actions"][:, 0, :] - return TensorUtils.to_device(TensorUtils.to_float(input_batch), self.device) + # we move to device first before float conversion because image observation modalities will be uint8 - + # this minimizes the amount of data transferred to GPU + return TensorUtils.to_float(TensorUtils.to_device(input_batch, self.device)) + def train_on_batch(self, batch, epoch, validate=False): """ @@ -504,7 +529,9 @@ def process_batch_for_training(self, batch): obs_seq_start = TensorUtils.index_at_time(batch["obs"], ind=0) input_batch["obs"] = TensorUtils.unsqueeze_expand_at(obs_seq_start, size=n_steps, dim=1) - return TensorUtils.to_device(TensorUtils.to_float(input_batch), self.device) + # we move to device first before float conversion because image observation modalities will be uint8 - + # this minimizes the amount of data transferred to GPU + return TensorUtils.to_float(TensorUtils.to_device(input_batch, self.device)) def get_action(self, obs_dict, goal_dict=None): """ @@ -643,3 +670,205 @@ def log_info(self, info): if "policy_grad_norms" in info: log["Policy_Grad_Norms"] = info["policy_grad_norms"] return log + + +class BC_Transformer(BC): + """ + BC training with a Transformer policy. + """ + def _create_networks(self): + """ + Creates networks and places them into @self.nets. + """ + assert self.algo_config.transformer.enabled + + self.nets = nn.ModuleDict() + self.nets["policy"] = PolicyNets.TransformerActorNetwork( + obs_shapes=self.obs_shapes, + goal_shapes=self.goal_shapes, + ac_dim=self.ac_dim, + encoder_kwargs=ObsUtils.obs_encoder_kwargs_from_config(self.obs_config.encoder), + **BaseNets.transformer_args_from_config(self.algo_config.transformer), + ) + self._set_params_from_config() + self.nets = self.nets.float().to(self.device) + + def _set_params_from_config(self): + """ + Read specific config variables we need for training / eval. + Called by @_create_networks method + """ + self.context_length = self.algo_config.transformer.context_length + self.supervise_all_steps = self.algo_config.transformer.supervise_all_steps + + def process_batch_for_training(self, batch): + """ + Processes input batch from a data loader to filter out + relevant information and prepare the batch for training. + Args: + batch (dict): dictionary with torch.Tensors sampled + from a data loader + Returns: + input_batch (dict): processed and filtered batch that + will be used for training + """ + input_batch = dict() + h = self.context_length + input_batch["obs"] = {k: batch["obs"][k][:, :h, :] for k in batch["obs"]} + input_batch["goal_obs"] = batch.get("goal_obs", None) # goals may not be present + + if self.supervise_all_steps: + # supervision on entire sequence (instead of just current timestep) + input_batch["actions"] = batch["actions"][:, :h, :] + else: + # just use current timestep + input_batch["actions"] = batch["actions"][:, h-1, :] + + input_batch = TensorUtils.to_device(TensorUtils.to_float(input_batch), self.device) + return input_batch + + def _forward_training(self, batch, epoch=None): + """ + Internal helper function for BC_Transformer algo class. Compute forward pass + and return network outputs in @predictions dict. + + Args: + batch (dict): dictionary with torch.Tensors sampled + from a data loader and filtered by @process_batch_for_training + + Returns: + predictions (dict): dictionary containing network outputs + """ + # ensure that transformer context length is consistent with temporal dimension of observations + TensorUtils.assert_size_at_dim( + batch["obs"], + size=(self.context_length), + dim=1, + msg="Error: expect temporal dimension of obs batch to match transformer context length {}".format(self.context_length), + ) + + predictions = OrderedDict() + predictions["actions"] = self.nets["policy"](obs_dict=batch["obs"], actions=None, goal_dict=batch["goal_obs"]) + if not self.supervise_all_steps: + # only supervise final timestep + predictions["actions"] = predictions["actions"][:, -1, :] + return predictions + + def get_action(self, obs_dict, goal_dict=None): + """ + Get policy action outputs. + Args: + obs_dict (dict): current observation + goal_dict (dict): (optional) goal + Returns: + action (torch.Tensor): action tensor + """ + assert not self.nets.training + + return self.nets["policy"](obs_dict, actions=None, goal_dict=goal_dict)[:, -1, :] + + +class BC_Transformer_GMM(BC_Transformer): + """ + BC training with a Transformer GMM policy. + """ + def _create_networks(self): + """ + Creates networks and places them into @self.nets. + """ + assert self.algo_config.gmm.enabled + assert self.algo_config.transformer.enabled + + self.nets = nn.ModuleDict() + self.nets["policy"] = PolicyNets.TransformerGMMActorNetwork( + obs_shapes=self.obs_shapes, + goal_shapes=self.goal_shapes, + ac_dim=self.ac_dim, + num_modes=self.algo_config.gmm.num_modes, + min_std=self.algo_config.gmm.min_std, + std_activation=self.algo_config.gmm.std_activation, + low_noise_eval=self.algo_config.gmm.low_noise_eval, + encoder_kwargs=ObsUtils.obs_encoder_kwargs_from_config(self.obs_config.encoder), + **BaseNets.transformer_args_from_config(self.algo_config.transformer), + ) + self._set_params_from_config() + self.nets = self.nets.float().to(self.device) + + def _forward_training(self, batch, epoch=None): + """ + Modify from super class to support GMM training. + """ + # ensure that transformer context length is consistent with temporal dimension of observations + TensorUtils.assert_size_at_dim( + batch["obs"], + size=(self.context_length), + dim=1, + msg="Error: expect temporal dimension of obs batch to match transformer context length {}".format(self.context_length), + ) + + dists = self.nets["policy"].forward_train( + obs_dict=batch["obs"], + actions=None, + goal_dict=batch["goal_obs"], + low_noise_eval=False, + ) + + # make sure that this is a batch of multivariate action distributions, so that + # the log probability computation will be correct + assert len(dists.batch_shape) == 2 # [B, T] + + if not self.supervise_all_steps: + # only use final timestep prediction by making a new distribution with only final timestep. + # This essentially does `dists = dists[:, -1]` + component_distribution = D.Normal( + loc=dists.component_distribution.base_dist.loc[:, -1], + scale=dists.component_distribution.base_dist.scale[:, -1], + ) + component_distribution = D.Independent(component_distribution, 1) + mixture_distribution = D.Categorical(logits=dists.mixture_distribution.logits[:, -1]) + dists = D.MixtureSameFamily( + mixture_distribution=mixture_distribution, + component_distribution=component_distribution, + ) + + log_probs = dists.log_prob(batch["actions"]) + + predictions = OrderedDict( + log_probs=log_probs, + ) + return predictions + + def _compute_losses(self, predictions, batch): + """ + Internal helper function for BC_Transformer_GMM algo class. Compute losses based on + network outputs in @predictions dict, using reference labels in @batch. + Args: + predictions (dict): dictionary containing network outputs, from @_forward_training + batch (dict): dictionary with torch.Tensors sampled + from a data loader and filtered by @process_batch_for_training + Returns: + losses (dict): dictionary of losses computed over the batch + """ + + # loss is just negative log-likelihood of action targets + action_loss = -predictions["log_probs"].mean() + return OrderedDict( + log_probs=-action_loss, + action_loss=action_loss, + ) + + def log_info(self, info): + """ + Process info dictionary from @train_on_batch to summarize + information to pass to tensorboard for logging. + Args: + info (dict): dictionary of info + Returns: + loss_log (dict): name -> summary statistic + """ + log = PolicyAlgo.log_info(self, info) + log["Loss"] = info["losses"]["action_loss"].item() + log["Log_Likelihood"] = info["losses"]["log_probs"].item() + if "policy_grad_norms" in info: + log["Policy_Grad_Norms"] = info["policy_grad_norms"] + return log \ No newline at end of file diff --git a/robomimic/algo/bcq.py b/robomimic/algo/bcq.py index 27123b7f..5843ccb5 100644 --- a/robomimic/algo/bcq.py +++ b/robomimic/algo/bcq.py @@ -201,7 +201,9 @@ def process_batch_for_training(self, batch): if done_inds.shape[0] > 0: input_batch["rewards"][done_inds] = input_batch["rewards"][done_inds] * (1. / (1. - self.discount)) - return TensorUtils.to_device(TensorUtils.to_float(input_batch), self.device) + # we move to device first before float conversion because image observation modalities will be uint8 - + # this minimizes the amount of data transferred to GPU + return TensorUtils.to_float(TensorUtils.to_device(input_batch, self.device)) def _train_action_sampler_on_batch(self, batch, epoch, no_backprop=False): """ diff --git a/robomimic/algo/cql.py b/robomimic/algo/cql.py index ef41812a..0c24d50a 100644 --- a/robomimic/algo/cql.py +++ b/robomimic/algo/cql.py @@ -208,7 +208,9 @@ def process_batch_for_training(self, batch): done_seq = batch["dones"][:, :self.n_step] input_batch["dones"] = (done_seq.sum(dim=1) > 0).float().unsqueeze(1) - return TensorUtils.to_device(TensorUtils.to_float(input_batch), self.device) + # we move to device first before float conversion because image observation modalities will be uint8 - + # this minimizes the amount of data transferred to GPU + return TensorUtils.to_float(TensorUtils.to_device(input_batch, self.device)) def train_on_batch(self, batch, epoch, validate=False): """ diff --git a/robomimic/algo/gl.py b/robomimic/algo/gl.py index 6b243b48..24ae8008 100644 --- a/robomimic/algo/gl.py +++ b/robomimic/algo/gl.py @@ -117,7 +117,9 @@ def process_batch_for_training(self, batch): input_batch["target_subgoals"] = input_batch["subgoals"] input_batch["goal_obs"] = batch.get("goal_obs", None) # goals may not be present - return TensorUtils.to_device(TensorUtils.to_float(input_batch), self.device) + # we move to device first before float conversion because image observation modalities will be uint8 - + # this minimizes the amount of data transferred to GPU + return TensorUtils.to_float(TensorUtils.to_device(input_batch, self.device)) def get_actor_goal_for_training_from_processed_batch(self, processed_batch, **kwargs): """ @@ -578,7 +580,9 @@ def process_batch_for_training(self, batch): input_batch["planner"] = self.planner.process_batch_for_training(batch) input_batch["value_net"] = self.value_net.process_batch_for_training(batch) - return TensorUtils.to_device(TensorUtils.to_float(input_batch), self.device) + # we move to device first before float conversion because image observation modalities will be uint8 - + # this minimizes the amount of data transferred to GPU + return TensorUtils.to_float(TensorUtils.to_device(input_batch, self.device)) def train_on_batch(self, batch, epoch, validate=False): """ diff --git a/robomimic/algo/hbc.py b/robomimic/algo/hbc.py index 7c540454..543b1fbc 100644 --- a/robomimic/algo/hbc.py +++ b/robomimic/algo/hbc.py @@ -149,7 +149,7 @@ def process_batch_for_training(self, batch): policy_subgoal_indices = torch.randint( low=0, high=self.global_config.train.seq_length, size=(batch["actions"].shape[0],)) goal_obs = TensorUtils.gather_sequence(batch["next_obs"], policy_subgoal_indices) - goal_obs = TensorUtils.to_device(TensorUtils.to_float(goal_obs), self.device) + goal_obs = TensorUtils.to_float(TensorUtils.to_device(goal_obs, self.device)) input_batch["actor"]["goal_obs"] = \ self.planner.get_actor_goal_for_training_from_processed_batch( goal_obs, @@ -167,7 +167,9 @@ def process_batch_for_training(self, batch): num_prior_samples=self.algo_config.latent_subgoal.prior_correction.num_samples, ) - return TensorUtils.to_device(TensorUtils.to_float(input_batch), self.device) + # we move to device first before float conversion because image observation modalities will be uint8 - + # this minimizes the amount of data transferred to GPU + return TensorUtils.to_float(TensorUtils.to_device(input_batch, self.device)) def train_on_batch(self, batch, epoch, validate=False): """ diff --git a/robomimic/algo/iql.py b/robomimic/algo/iql.py new file mode 100644 index 00000000..bde522b2 --- /dev/null +++ b/robomimic/algo/iql.py @@ -0,0 +1,428 @@ +""" +Implementation of Implicit Q-Learning (IQL). +Based off of https://github.com/rail-berkeley/rlkit/blob/master/rlkit/torch/sac/iql_trainer.py. +(Paper - https://arxiv.org/abs/2110.06169). +""" +import numpy as np +from collections import OrderedDict + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import robomimic.models.policy_nets as PolicyNets +import robomimic.models.value_nets as ValueNets +import robomimic.utils.obs_utils as ObsUtils +import robomimic.utils.tensor_utils as TensorUtils +import robomimic.utils.torch_utils as TorchUtils +from robomimic.algo import register_algo_factory_func, ValueAlgo, PolicyAlgo + + +@register_algo_factory_func("iql") +def algo_config_to_class(algo_config): + """ + Maps algo config to the IQL algo class to instantiate, along with additional algo kwargs. + + Args: + algo_config (Config instance): algo config + + Returns: + algo_class: subclass of Algo + algo_kwargs (dict): dictionary of additional kwargs to pass to algorithm + """ + return IQL, {} + + +class IQL(PolicyAlgo, ValueAlgo): + def _create_networks(self): + """ + Creates networks and places them into @self.nets. + + Networks for this algo: critic (potentially ensemble), actor, value function + """ + + # Create nets + self.nets = nn.ModuleDict() + + # Assemble args to pass to actor + actor_args = dict(self.algo_config.actor.net.common) + + # Add network-specific args and define network class + if self.algo_config.actor.net.type == "gaussian": + actor_cls = PolicyNets.GaussianActorNetwork + actor_args.update(dict(self.algo_config.actor.net.gaussian)) + elif self.algo_config.actor.net.type == "gmm": + actor_cls = PolicyNets.GMMActorNetwork + actor_args.update(dict(self.algo_config.actor.net.gmm)) + else: + # Unsupported actor type! + raise ValueError(f"Unsupported actor requested. " + f"Requested: {self.algo_config.actor.net.type}, " + f"valid options are: {['gaussian', 'gmm']}") + + # Actor + self.nets["actor"] = actor_cls( + obs_shapes=self.obs_shapes, + goal_shapes=self.goal_shapes, + ac_dim=self.ac_dim, + mlp_layer_dims=self.algo_config.actor.layer_dims, + encoder_kwargs=ObsUtils.obs_encoder_kwargs_from_config(self.obs_config.encoder), + **actor_args, + ) + + # Critics + self.nets["critic"] = nn.ModuleList() + self.nets["critic_target"] = nn.ModuleList() + for _ in range(self.algo_config.critic.ensemble.n): + for net_list in (self.nets["critic"], self.nets["critic_target"]): + critic = ValueNets.ActionValueNetwork( + obs_shapes=self.obs_shapes, + ac_dim=self.ac_dim, + mlp_layer_dims=self.algo_config.critic.layer_dims, + goal_shapes=self.goal_shapes, + encoder_kwargs=ObsUtils.obs_encoder_kwargs_from_config(self.obs_config.encoder), + ) + net_list.append(critic) + + # Value function network + self.nets["vf"] = ValueNets.ValueNetwork( + obs_shapes=self.obs_shapes, + mlp_layer_dims=self.algo_config.critic.layer_dims, + goal_shapes=self.goal_shapes, + encoder_kwargs=ObsUtils.obs_encoder_kwargs_from_config(self.obs_config.encoder), + ) + + # Send networks to appropriate device + self.nets = self.nets.float().to(self.device) + + # sync target networks at beginning of training + with torch.no_grad(): + for critic, critic_target in zip(self.nets["critic"], self.nets["critic_target"]): + TorchUtils.hard_update( + source=critic, + target=critic_target, + ) + + def process_batch_for_training(self, batch): + """ + Processes input batch from a data loader to filter out relevant info and prepare the batch for training. + + Args: + batch (dict): dictionary with torch.Tensors sampled + from a data loader + + Returns: + input_batch (dict): processed and filtered batch that + will be used for training + """ + + input_batch = dict() + + # remove temporal batches for all + input_batch["obs"] = {k: batch["obs"][k][:, 0, :] for k in batch["obs"]} + input_batch["next_obs"] = {k: batch["next_obs"][k][:, 0, :] for k in batch["next_obs"]} + input_batch["goal_obs"] = batch.get("goal_obs", None) # goals may not be present + input_batch["actions"] = batch["actions"][:, 0, :] + input_batch["dones"] = batch["dones"][:, 0] + input_batch["rewards"] = batch["rewards"][:, 0] + + return TensorUtils.to_device(TensorUtils.to_float(input_batch), self.device) + + def train_on_batch(self, batch, epoch, validate=False): + """ + Training on a single batch of data. + + Args: + batch (dict): dictionary with torch.Tensors sampled + from a data loader and filtered by @process_batch_for_training + + epoch (int): epoch number - required by some Algos that need + to perform staged training and early stopping + + validate (bool): if True, don't perform any learning updates. + + Returns: + info (dict): dictionary of relevant inputs, outputs, and losses + that might be relevant for logging + """ + info = OrderedDict() + + # Set the correct context for this training step + with TorchUtils.maybe_no_grad(no_grad=validate): + # Always run super call first + info = super().train_on_batch(batch, epoch, validate=validate) + + # Compute loss for critic(s) + critic_losses, vf_loss, critic_info = self._compute_critic_loss(batch) + # Compute loss for actor + actor_loss, actor_info = self._compute_actor_loss(batch, critic_info) + + if not validate: + # Critic update + self._update_critic(critic_losses, vf_loss) + + # Actor update + self._update_actor(actor_loss) + + # Update info + info.update(actor_info) + info.update(critic_info) + + # Return stats + return info + + def _compute_critic_loss(self, batch): + """ + Helper function for computing Q and V losses. Called by @train_on_batch + + Args: + batch (dict): dictionary with torch.Tensors sampled + from a data loader and filtered by @process_batch_for_training + + Returns: + critic_losses (list): list of critic (Q function) losses + vf_loss (torch.Tensor): value function loss + info (dict): dictionary of Q / V predictions and losses + """ + info = OrderedDict() + + # get batch values + obs = batch["obs"] + actions = batch["actions"] + next_obs = batch["next_obs"] + goal_obs = batch["goal_obs"] + rewards = torch.unsqueeze(batch["rewards"], 1) + dones = torch.unsqueeze(batch["dones"], 1) + + # Q predictions + pred_qs = [critic(obs_dict=obs, acts=actions, goal_dict=goal_obs) + for critic in self.nets["critic"]] + + info["critic/critic1_pred"] = pred_qs[0].mean() + + # Q target values + target_vf_pred = self.nets["vf"](obs_dict=next_obs, goal_dict=goal_obs).detach() + q_target = rewards + (1. - dones) * self.algo_config.discount * target_vf_pred + q_target = q_target.detach() + + # Q losses + critic_losses = [] + td_loss_fcn = nn.SmoothL1Loss() if self.algo_config.critic.use_huber else nn.MSELoss() + for (i, q_pred) in enumerate(pred_qs): + # Calculate td error loss + td_loss = td_loss_fcn(q_pred, q_target) + info[f"critic/critic{i+1}_loss"] = td_loss + critic_losses.append(td_loss) + + # V predictions + pred_qs = [critic(obs_dict=obs, acts=actions, goal_dict=goal_obs) + for critic in self.nets["critic_target"]] + q_pred, _ = torch.cat(pred_qs, dim=1).min(dim=1, keepdim=True) + q_pred = q_pred.detach() + vf_pred = self.nets["vf"](obs) + + # V losses: expectile regression. see section 4.1 in https://arxiv.org/pdf/2110.06169.pdf + vf_err = vf_pred - q_pred + vf_sign = (vf_err > 0).float() + vf_weight = (1 - vf_sign) * self.algo_config.vf_quantile + vf_sign * (1 - self.algo_config.vf_quantile) + vf_loss = (vf_weight * (vf_err ** 2)).mean() + + # update logs for V loss + info["vf/q_pred"] = q_pred + info["vf/v_pred"] = vf_pred + info["vf/v_loss"] = vf_loss + + # Return stats + return critic_losses, vf_loss, info + + def _update_critic(self, critic_losses, vf_loss): + """ + Helper function for updating critic and vf networks. Called by @train_on_batch + + Args: + critic_losses (list): list of critic (Q function) losses + vf_loss (torch.Tensor): value function loss + """ + + # update ensemble of critics + for (critic_loss, critic, critic_target, optimizer) in zip( + critic_losses, self.nets["critic"], self.nets["critic_target"], self.optimizers["critic"] + ): + TorchUtils.backprop_for_loss( + net=critic, + optim=optimizer, + loss=critic_loss, + max_grad_norm=self.algo_config.critic.max_gradient_norm, + retain_graph=False, + ) + + # update target network + with torch.no_grad(): + TorchUtils.soft_update(source=critic, target=critic_target, tau=self.algo_config.target_tau) + + # update V function network + TorchUtils.backprop_for_loss( + net=self.nets["vf"], + optim=self.optimizers["vf"], + loss=vf_loss, + max_grad_norm=self.algo_config.critic.max_gradient_norm, + retain_graph=False, + ) + + def _compute_actor_loss(self, batch, critic_info): + """ + Helper function for computing actor loss. Called by @train_on_batch + + Args: + batch (dict): dictionary with torch.Tensors sampled + from a data loader and filtered by @process_batch_for_training + + critic_info (dict): dictionary containing Q and V function predictions, + to be used for computing advantage estimates + + Returns: + actor_loss (torch.Tensor): actor loss + info (dict): dictionary of actor losses, log_probs, advantages, and weights + """ + info = OrderedDict() + + # compute log probability of batch actions + dist = self.nets["actor"].forward_train(obs_dict=batch["obs"], goal_dict=batch["goal_obs"]) + log_prob = dist.log_prob(batch["actions"]) + + info["actor/log_prob"] = log_prob.mean() + + # compute advantage estimate + q_pred = critic_info["vf/q_pred"] + v_pred = critic_info["vf/v_pred"] + adv = q_pred - v_pred + + # compute weights + weights = self._get_adv_weights(adv) + + # compute advantage weighted actor loss. disable gradients through weights + actor_loss = (-log_prob * weights.detach()).mean() + + info["actor/loss"] = actor_loss + + # log adv-related values + info["adv/adv"] = adv + info["adv/adv_weight"] = weights + + # Return stats + return actor_loss, info + + def _update_actor(self, actor_loss): + """ + Helper function for updating actor network. Called by @train_on_batch + + Args: + actor_loss (torch.Tensor): actor loss + """ + + TorchUtils.backprop_for_loss( + net=self.nets["actor"], + optim=self.optimizers["actor"], + loss=actor_loss, + max_grad_norm=self.algo_config.actor.max_gradient_norm, + ) + + def _get_adv_weights(self, adv): + """ + Helper function for computing advantage weights. Called by @_compute_actor_loss + + Args: + adv (torch.Tensor): raw advantage estimates + + Returns: + weights (torch.Tensor): weights computed based on advantage estimates, + in shape (B,) where B is batch size + """ + + # clip raw advantage values + if self.algo_config.adv.clip_adv_value is not None: + adv = adv.clamp(max=self.algo_config.adv.clip_adv_value) + + # compute weights based on advantage values + beta = self.algo_config.adv.beta # temprature factor + weights = torch.exp(adv / beta) + + # clip final weights + if self.algo_config.adv.use_final_clip is True: + weights = weights.clamp(-100.0, 100.0) + + # reshape from (B, 1) to (B,) + return weights[:, 0] + + def log_info(self, info): + """ + Process info dictionary from @train_on_batch to summarize + information to pass to tensorboard for logging. + + Args: + info (dict): dictionary of info + + Returns: + loss_log (dict): name -> summary statistic + """ + log = OrderedDict() + + log["actor/log_prob"] = info["actor/log_prob"].item() + log["actor/loss"] = info["actor/loss"].item() + + log["critic/critic1_pred"] = info["critic/critic1_pred"].item() + log["critic/critic1_loss"] = info["critic/critic1_loss"].item() + + log["vf/v_loss"] = info["vf/v_loss"].item() + + self._log_data_attributes(log, info, "vf/q_pred") + self._log_data_attributes(log, info, "vf/v_pred") + self._log_data_attributes(log, info, "adv/adv") + self._log_data_attributes(log, info, "adv/adv_weight") + + return log + + def _log_data_attributes(self, log, info, key): + """ + Helper function for logging statistics. Moodifies log in-place + + Args: + log (dict): existing log dictionary + log (dict): existing dictionary of tensors containing raw stats + key (str): key to log + """ + log[key + "/max"] = info[key].max().item() + log[key + "/min"] = info[key].min().item() + log[key + "/mean"] = info[key].mean().item() + log[key + "/std"] = info[key].std().item() + + def on_epoch_end(self, epoch): + """ + Called at the end of each epoch. + """ + + # LR scheduling updates + for lr_sc in self.lr_schedulers["critic"]: + if lr_sc is not None: + lr_sc.step() + + if self.lr_schedulers["vf"] is not None: + self.lr_schedulers["vf"].step() + + if self.lr_schedulers["actor"] is not None: + self.lr_schedulers["actor"].step() + + def get_action(self, obs_dict, goal_dict=None): + """ + Get policy action outputs. + + Args: + obs_dict (dict): current observation + goal_dict (dict): (optional) goal + + Returns: + action (torch.Tensor): action tensor + """ + assert not self.nets.training + + return self.nets["actor"](obs_dict=obs_dict, goal_dict=goal_dict) \ No newline at end of file diff --git a/robomimic/algo/iris.py b/robomimic/algo/iris.py index de79bd71..7b441470 100644 --- a/robomimic/algo/iris.py +++ b/robomimic/algo/iris.py @@ -145,13 +145,15 @@ def process_batch_for_training(self, batch): policy_subgoal_indices = torch.randint( low=0, high=self.global_config.train.seq_length, size=(batch["actions"].shape[0],)) goal_obs = TensorUtils.gather_sequence(batch["next_obs"], policy_subgoal_indices) - goal_obs = TensorUtils.to_device(TensorUtils.to_float(goal_obs), self.device) + goal_obs = TensorUtils.to_float(TensorUtils.to_device(goal_obs, self.device)) input_batch["actor"]["goal_obs"] = goal_obs else: # otherwise, use planner subgoal target as goal for the policy input_batch["actor"]["goal_obs"] = input_batch["planner"]["planner"]["target_subgoals"] - return TensorUtils.to_device(TensorUtils.to_float(input_batch), self.device) + # we move to device first before float conversion because image observation modalities will be uint8 - + # this minimizes the amount of data transferred to GPU + return TensorUtils.to_float(TensorUtils.to_device(input_batch, self.device)) def get_state_value(self, obs_dict, goal_dict=None): """ diff --git a/robomimic/algo/td3_bc.py b/robomimic/algo/td3_bc.py index fb8b21c6..e324c54a 100644 --- a/robomimic/algo/td3_bc.py +++ b/robomimic/algo/td3_bc.py @@ -189,7 +189,9 @@ def process_batch_for_training(self, batch): if done_inds.shape[0] > 0: input_batch["rewards"][done_inds] = input_batch["rewards"][done_inds] * (1. / (1. - self.discount)) - return TensorUtils.to_device(TensorUtils.to_float(input_batch), self.device) + # we move to device first before float conversion because image observation modalities will be uint8 - + # this minimizes the amount of data transferred to GPU + return TensorUtils.to_float(TensorUtils.to_device(input_batch, self.device)) def _train_critic_on_batch(self, batch, epoch, no_backprop=False): """ diff --git a/robomimic/config/__init__.py b/robomimic/config/__init__.py index ac44664c..fa60a2f5 100644 --- a/robomimic/config/__init__.py +++ b/robomimic/config/__init__.py @@ -5,6 +5,7 @@ from robomimic.config.bc_config import BCConfig from robomimic.config.bcq_config import BCQConfig from robomimic.config.cql_config import CQLConfig +from robomimic.config.iql_config import IQLConfig from robomimic.config.gl_config import GLConfig from robomimic.config.hbc_config import HBCConfig from robomimic.config.iris_config import IRISConfig diff --git a/robomimic/config/base_config.py b/robomimic/config/base_config.py index 664736f1..62129cd0 100644 --- a/robomimic/config/base_config.py +++ b/robomimic/config/base_config.py @@ -60,6 +60,7 @@ def __init__(self, dict_to_load=None): self.train_config() self.algo_config() self.observation_config() + self.meta_config() # After Config init, new keys cannot be added to the config, except under nested # attributes that have called @do_not_lock_keys @@ -81,9 +82,11 @@ def experiment_config(self): """ self.experiment.name = "test" # name of experiment used to make log files - self.experiment.validate = True # whether to do validation or not + self.experiment.validate = False # whether to do validation or not self.experiment.logging.terminal_output_to_txt = True # whether to log stdout to txt file self.experiment.logging.log_tb = True # enable tensorboard logging + self.experiment.logging.log_wandb = False # enable wandb logging + self.experiment.logging.wandb_proj_name = "debug" # project name if using wandb ## save config - if and when to save model checkpoints ## @@ -152,6 +155,9 @@ class has a default implementation that usually doesn't need to be overriden. # used for parallel data loading self.train.hdf5_use_swmr = True + # whether to load "next_obs" group from hdf5 - only needed for batch / offline RL algorithms + self.train.hdf5_load_next_obs = True + # if true, normalize observations at train and test time, using the global mean and standard deviation # of each observation in each dimension, computed across the training set. See SequenceDataset.normalize_obs # in utils/dataset.py for more information. @@ -161,8 +167,16 @@ class has a default implementation that usually doesn't need to be overriden. # of the full dataset. This provides a convenient way to train on only a subset of the trajectories in a dataset. self.train.hdf5_filter_key = None + # if provided, use the list of demo keys under the hdf5 group "mask/@hdf5_validation_filter_key" for validation. + # Must be provided if @experiment.validate is True. + self.train.hdf5_validation_filter_key = None + # length of experience sequence to fetch from the dataset + # and whether to pad the beginning / end of the sequence at boundaries of trajectory in dataset self.train.seq_length = 1 + self.train.pad_seq_length = True + self.train.frame_stack = 1 + self.train.pad_frame_stack = True # keys from hdf5 to load into each batch, besides "obs" and "next_obs". If algorithms # require additional keys from each trajectory in the hdf5, they should be specified here. @@ -235,27 +249,13 @@ def observation_config(self): self.observation.encoder.low_dim.obs_randomizer_kwargs.do_not_lock_keys() # =============== RGB default encoder (ResNet backbone + linear layer output) =============== - self.observation.encoder.rgb.core_class = "VisualCore" - self.observation.encoder.rgb.core_kwargs.feature_dimension = 64 - self.observation.encoder.rgb.core_kwargs.flatten = True - self.observation.encoder.rgb.core_kwargs.backbone_class = "ResNet18Conv" - self.observation.encoder.rgb.core_kwargs.backbone_kwargs.pretrained = False - self.observation.encoder.rgb.core_kwargs.backbone_kwargs.input_coord_conv = False - self.observation.encoder.rgb.core_kwargs.backbone_kwargs.do_not_lock_keys() - self.observation.encoder.rgb.core_kwargs.pool_class = "SpatialSoftmax" # Alternate options are "SpatialMeanPool" or None (no pooling) - self.observation.encoder.rgb.core_kwargs.pool_kwargs.num_kp = 32 # Default arguments for "SpatialSoftmax" - self.observation.encoder.rgb.core_kwargs.pool_kwargs.learnable_temperature = False # Default arguments for "SpatialSoftmax" - self.observation.encoder.rgb.core_kwargs.pool_kwargs.temperature = 1.0 # Default arguments for "SpatialSoftmax" - self.observation.encoder.rgb.core_kwargs.pool_kwargs.noise_std = 0.0 # Default arguments for "SpatialSoftmax" - self.observation.encoder.rgb.core_kwargs.pool_kwargs.output_variance = False # Default arguments for "SpatialSoftmax" - self.observation.encoder.rgb.core_kwargs.pool_kwargs.do_not_lock_keys() + self.observation.encoder.rgb.core_class = "VisualCore" # Default VisualCore class combines backbone (like ResNet-18) with pooling operation (like spatial softmax) + self.observation.encoder.rgb.core_kwargs = Config() # See models/obs_core.py for important kwargs to set and defaults used + self.observation.encoder.rgb.core_kwargs.do_not_lock_keys() # RGB: Obs Randomizer settings - self.observation.encoder.rgb.obs_randomizer_class = None # Can set to 'CropRandomizer' to use crop randomization - self.observation.encoder.rgb.obs_randomizer_kwargs.crop_height = 76 # Default arguments for "CropRandomizer" - self.observation.encoder.rgb.obs_randomizer_kwargs.crop_width = 76 # Default arguments for "CropRandomizer" - self.observation.encoder.rgb.obs_randomizer_kwargs.num_crops = 1 # Default arguments for "CropRandomizer" - self.observation.encoder.rgb.obs_randomizer_kwargs.pos_enc = False # Default arguments for "CropRandomizer" + self.observation.encoder.rgb.obs_randomizer_class = None # Can set to 'CropRandomizer' to use crop randomization + self.observation.encoder.rgb.obs_randomizer_kwargs = Config() # See models/obs_core.py for important kwargs to set and defaults used self.observation.encoder.rgb.obs_randomizer_kwargs.do_not_lock_keys() # Allow for other custom modalities to be specified @@ -266,16 +266,25 @@ def observation_config(self): # =============== Scan default encoder (Conv1d backbone + linear layer output) =============== self.observation.encoder.scan = deepcopy(self.observation.encoder.rgb) - self.observation.encoder.scan.core_kwargs.pop("backbone_class") - self.observation.encoder.scan.core_kwargs.pop("backbone_kwargs") # Scan: Modify the core class + kwargs, otherwise, is same as rgb encoder - self.observation.encoder.scan.core_class = "ScanCore" - self.observation.encoder.scan.core_kwargs.conv_activation = "relu" - self.observation.encoder.scan.core_kwargs.conv_kwargs.out_channels = [32, 64, 64] - self.observation.encoder.scan.core_kwargs.conv_kwargs.kernel_size = [8, 4, 2] - self.observation.encoder.scan.core_kwargs.conv_kwargs.stride = [4, 2, 1] + self.observation.encoder.scan.core_class = "ScanCore" # Default ScanCore class uses Conv1D to process this modality + self.observation.encoder.scan.core_kwargs = Config() # See models/obs_core.py for important kwargs to set and defaults used + self.observation.encoder.scan.core_kwargs.do_not_lock_keys() + def meta_config(self): + """ + This function populates the `config.meta` attribute of the config. This portion of the config + is used to specify job information primarily for hyperparameter sweeps. + It contains hyperparameter keys and values, which are populated automatically + by the hyperparameter config generator (see `utils/hyperparam_utils.py`). + These values are read by the wandb logger (see `utils/log_utils.py`) to set job tags. + """ + + self.meta.hp_base_config_file = None # base config file in hyperparam sweep + self.meta.hp_keys = [] # relevant keys (swept) in hyperparam sweep + self.meta.hp_values = [] # values corresponding to keys in hyperparam sweep + @property def use_goals(self): # whether the agent is goal-conditioned diff --git a/robomimic/config/bc_config.py b/robomimic/config/bc_config.py index 5fb14930..1f701c68 100644 --- a/robomimic/config/bc_config.py +++ b/robomimic/config/bc_config.py @@ -8,6 +8,13 @@ class BCConfig(BaseConfig): ALGO_NAME = "bc" + def train_config(self): + """ + BC algorithms don't need "next_obs" from hdf5 - so save on storage and compute by disabling it. + """ + super(BCConfig, self).train_config() + self.train.hdf5_load_next_obs = False + def algo_config(self): """ This function populates the `config.algo` attribute of the config, and is given to the @@ -17,9 +24,11 @@ def algo_config(self): """ # optimization parameters + self.algo.optim_params.policy.optimizer_type = "adam" self.algo.optim_params.policy.learning_rate.initial = 1e-4 # policy learning rate self.algo.optim_params.policy.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty) self.algo.optim_params.policy.learning_rate.epoch_schedule = [] # epochs where LR decay occurs + self.algo.optim_params.policy.learning_rate.scheduler_type = "multistep" # learning rate scheduler ("multistep", "linear", etc) self.algo.optim_params.policy.regularization.L2 = 0.00 # L2 regularization strength # loss weights @@ -73,11 +82,25 @@ def algo_config(self): self.algo.vae.prior_layer_dims = (300, 400) # prior MLP layer dimensions (if learning conditioned prior) # RNN policy settings - self.algo.rnn.enabled = False # whether to train RNN policy - self.algo.rnn.horizon = 10 # unroll length for RNN - should usually match train.seq_length - self.algo.rnn.hidden_dim = 400 # hidden dimension size - self.algo.rnn.rnn_type = "LSTM" # rnn type - one of "LSTM" or "GRU" - self.algo.rnn.num_layers = 2 # number of RNN layers that are stacked - self.algo.rnn.open_loop = False # if True, action predictions are only based on a single observation (not sequence) - self.algo.rnn.kwargs.bidirectional = False # rnn kwargs - self.algo.rnn.kwargs.do_not_lock_keys() \ No newline at end of file + self.algo.rnn.enabled = False # whether to train RNN policy + self.algo.rnn.horizon = 10 # unroll length for RNN - should usually match train.seq_length + self.algo.rnn.hidden_dim = 400 # hidden dimension size + self.algo.rnn.rnn_type = "LSTM" # rnn type - one of "LSTM" or "GRU" + self.algo.rnn.num_layers = 2 # number of RNN layers that are stacked + self.algo.rnn.open_loop = False # if True, action predictions are only based on a single observation (not sequence) + self.algo.rnn.kwargs.bidirectional = False # rnn kwargs + self.algo.rnn.kwargs.do_not_lock_keys() + + # Transformer policy settings + self.algo.transformer.enabled = False # whether to train transformer policy + self.algo.transformer.context_length = 10 # length of (s, a) seqeunces to feed to transformer - should usually match train.frame_stack + self.algo.transformer.embed_dim = 512 # dimension for embeddings used by transformer + self.algo.transformer.num_layers = 6 # number of transformer blocks to stack + self.algo.transformer.num_heads = 8 # number of attention heads for each transformer block (should divide embed_dim evenly) + self.algo.transformer.emb_dropout = 0.1 # dropout probability for embedding inputs in transformer + self.algo.transformer.attn_dropout = 0.1 # dropout probability for attention outputs for each transformer block + self.algo.transformer.block_output_dropout = 0.1 # dropout probability for final outputs for each transformer block + self.algo.transformer.sinusoidal_embedding = False # if True, use standard positional encodings (sin/cos) + self.algo.transformer.activation = "gelu" # activation function for MLP in Transformer Block + self.algo.transformer.supervise_all_steps = False # if true, supervise all intermediate actions, otherwise only final one + self.algo.transformer.nn_parameter_for_timesteps = True # if true, use nn.Parameter otherwise use nn.Embedding diff --git a/robomimic/config/bcq_config.py b/robomimic/config/bcq_config.py index 7bae7f92..e28f5ba5 100644 --- a/robomimic/config/bcq_config.py +++ b/robomimic/config/bcq_config.py @@ -71,6 +71,7 @@ def algo_config(self): del self.algo.action_sampler.loss del self.algo.action_sampler.gaussian del self.algo.action_sampler.rnn + del self.algo.action_sampler.transformer # Number of epochs before freezing encoder (-1 for no freezing). Only applies to cVAE-based action samplers. with self.algo.action_sampler.unlocked(): diff --git a/robomimic/config/default_templates/bc_transformer.json b/robomimic/config/default_templates/bc_transformer.json new file mode 100644 index 00000000..ed59f175 --- /dev/null +++ b/robomimic/config/default_templates/bc_transformer.json @@ -0,0 +1,171 @@ +{ + "algo_name": "bc", + "experiment": { + "name": "test", + "validate": false, + "logging": { + "terminal_output_to_txt": true, + "log_tb": true, + "log_wandb": false, + "wandb_proj_name": "debug" + }, + "save": { + "enabled": true, + "every_n_seconds": null, + "every_n_epochs": 50, + "epochs": [], + "on_best_validation": false, + "on_best_rollout_return": false, + "on_best_rollout_success_rate": true + }, + "epoch_every_n_steps": 100, + "validation_epoch_every_n_steps": 10, + "env": null, + "additional_envs": null, + "render": false, + "render_video": true, + "keep_all_videos": false, + "video_skip": 5, + "rollout": { + "enabled": true, + "n": 50, + "horizon": 400, + "rate": 50, + "warmstart": 0, + "terminate_on_success": true + } + }, + "train": { + "data": null, + "output_dir": "../bc_transformer_trained_models", + "num_data_workers": 0, + "hdf5_cache_mode": "all", + "hdf5_use_swmr": true, + "hdf5_load_next_obs": false, + "hdf5_normalize_obs": false, + "hdf5_filter_key": null, + "seq_length": 1, + "pad_seq_length": true, + "frame_stack": 10, + "pad_frame_stack": true, + "dataset_keys": [ + "actions" + ], + "goal_mode": null, + "cuda": true, + "batch_size": 100, + "num_epochs": 2000, + "seed": 1 + }, + "algo": { + "optim_params": { + "policy": { + "optimizer_type": "adamw", + "learning_rate": { + "initial": 0.0001, + "decay_factor": 0.1, + "epoch_schedule": [100], + "scheduler_type": "linear" + }, + "regularization": { + "L2": 0.01 + } + } + }, + "loss": { + "l2_weight": 1.0, + "l1_weight": 0.0, + "cos_weight": 0.0 + }, + "actor_layer_dims": [], + "gaussian": { + "enabled": false + }, + "gmm": { + "enabled": true, + "num_modes": 5, + "min_std": 0.0001, + "std_activation": "softplus", + "low_noise_eval": true + }, + "vae": { + "enabled": false + }, + "rnn": { + "enabled": false + }, + "transformer": { + "enabled": true, + "supervise_all_steps": false, + "num_layers": 6, + "embed_dim": 512, + "num_heads": 8 + } + }, + "observation": { + "modalities": { + "obs": { + "low_dim": [ + "robot0_eef_pos", + "robot0_eef_quat", + "robot0_gripper_qpos", + "object" + ], + "rgb": [], + "depth": [], + "scan": [] + }, + "goal": { + "low_dim": [], + "rgb": [], + "depth": [], + "scan": [] + } + }, + "encoder": { + "low_dim": { + "core_class": null, + "core_kwargs": {}, + "obs_randomizer_class": null, + "obs_randomizer_kwargs": {} + }, + "rgb": { + "core_class": "VisualCore", + "core_kwargs": { + "feature_dimension": 64, + "backbone_class": "ResNet18Conv", + "backbone_kwargs": { + "pretrained": false, + "input_coord_conv": false + }, + "pool_class": "SpatialSoftmax", + "pool_kwargs": { + "num_kp": 32, + "learnable_temperature": false, + "temperature": 1.0, + "noise_std": 0.0 + } + }, + "obs_randomizer_class": "CropRandomizer", + "obs_randomizer_kwargs": { + "crop_height": 76, + "crop_width": 76, + "num_crops": 1, + "pos_enc": false + } + }, + "depth": { + "core_class": "VisualCore", + "core_kwargs": {}, + "obs_randomizer_class": null, + "obs_randomizer_kwargs": {} + }, + "scan": { + "core_class": "ScanCore", + "core_kwargs": {}, + "obs_randomizer_class": null, + "obs_randomizer_kwargs": {} + } + } + } +} diff --git a/robomimic/config/iql_config.py b/robomimic/config/iql_config.py new file mode 100644 index 00000000..bd603d1a --- /dev/null +++ b/robomimic/config/iql_config.py @@ -0,0 +1,73 @@ +""" +Config for IQL algorithm. +""" + +from robomimic.config.base_config import BaseConfig + + +class IQLConfig(BaseConfig): + ALGO_NAME = "iql" + + def algo_config(self): + """ + This function populates the `config.algo` attribute of the config, and is given to the + `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` + argument to the constructor. Any parameter that an algorithm needs to determine its + training and test-time behavior should be populated here. + """ + super(IQLConfig, self).algo_config() + + # optimization parameters + self.algo.optim_params.critic.learning_rate.initial = 1e-4 # critic learning rate + self.algo.optim_params.critic.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty) + self.algo.optim_params.critic.learning_rate.epoch_schedule = [] # epochs where LR decay occurs + self.algo.optim_params.critic.regularization.L2 = 0.00 # L2 regularization strength + + self.algo.optim_params.vf.learning_rate.initial = 1e-4 # vf learning rate + self.algo.optim_params.vf.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty) + self.algo.optim_params.vf.learning_rate.epoch_schedule = [] # epochs where LR decay occurs + self.algo.optim_params.vf.regularization.L2 = 0.00 # L2 regularization strength + + self.algo.optim_params.actor.learning_rate.initial = 1e-4 # actor learning rate + self.algo.optim_params.actor.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty) + self.algo.optim_params.actor.learning_rate.epoch_schedule = [] # epochs where LR decay occurs + self.algo.optim_params.actor.regularization.L2 = 0.00 # L2 regularization strength + + # target network related parameters + self.algo.discount = 0.99 # discount factor to use + self.algo.target_tau = 0.01 # update rate for target networks + + # ================== Actor Network Config =================== + # Actor network settings + self.algo.actor.net.type = "gaussian" # Options are currently ["gaussian", "gmm"] + + # Actor network settings - shared + self.algo.actor.net.common.std_activation = "softplus" # Activation to use for std output from policy net + self.algo.actor.net.common.low_noise_eval = True # Whether to use deterministic action sampling at eval stage + self.algo.actor.net.common.use_tanh = False # Whether to use tanh at output of actor network + + # Actor network settings - gaussian + self.algo.actor.net.gaussian.init_last_fc_weight = 0.001 # If set, will override the initialization of the final fc layer to be uniformly sampled limited by this value + self.algo.actor.net.gaussian.init_std = 0.3 # Relative scaling factor for std from policy net + self.algo.actor.net.gaussian.fixed_std = False # Whether to learn std dev or not + + self.algo.actor.net.gmm.num_modes = 5 # number of GMM modes + self.algo.actor.net.gmm.min_std = 0.0001 # minimum std output from network + + self.algo.actor.layer_dims = (300, 400) # actor MLP layer dimensions + + self.algo.actor.max_gradient_norm = None # L2 gradient clipping for actor + + # ================== Critic Network Config =================== + # critic ensemble parameters + self.algo.critic.ensemble.n = 2 # number of Q networks in the ensemble + self.algo.critic.layer_dims = (300, 400) # critic MLP layer dimensions + self.algo.critic.use_huber = False # Huber Loss instead of L2 for critic + self.algo.critic.max_gradient_norm = None # L2 gradient clipping for actor + + # ================== Adv Config ============================== + self.algo.adv.clip_adv_value = None # whether to clip raw advantage estimates + self.algo.adv.beta = 1.0 # temperature for operator + self.algo.adv.use_final_clip = True # whether to clip final weight calculations + + self.algo.vf_quantile = 0.9 # quantile factor in quantile regression diff --git a/robomimic/envs/env_base.py b/robomimic/envs/env_base.py index df13b3ef..ee3184c2 100644 --- a/robomimic/envs/env_base.py +++ b/robomimic/envs/env_base.py @@ -164,6 +164,14 @@ def type(self): """ return + @property + def version(self): + """ + Returns version of environment (str). + This is not an abstract method, some subclasses do not implement it + """ + return None + @abc.abstractmethod def serialize(self): """ diff --git a/robomimic/envs/env_robosuite.py b/robomimic/envs/env_robosuite.py index a4cd26b4..b2bf46cf 100644 --- a/robomimic/envs/env_robosuite.py +++ b/robomimic/envs/env_robosuite.py @@ -9,7 +9,6 @@ import mujoco_py import robosuite -from robosuite.utils.mjcf_utils import postprocess_model_xml import robomimic.utils.obs_utils as ObsUtils import robomimic.envs.env_base as EB @@ -131,7 +130,13 @@ def reset_to(self, state): should_ret = False if "model" in state: self.reset() - xml = postprocess_model_xml(state["model"]) + robosuite_version_id = int(robosuite.__version__.split(".")[1]) + if robosuite_version_id <= 3: + from robosuite.utils.mjcf_utils import postprocess_model_xml + xml = postprocess_model_xml(state["model"]) + else: + # v1.4 and above use the class-based edit_model_xml function + xml = self.env.edit_model_xml(state["model"]) self.env.reset_from_xml_string(xml) self.env.sim.reset() if not self._is_v1: @@ -195,7 +200,8 @@ def get_observation(self, di=None): # ensures that we don't accidentally add robot wrist images a second time pf = robot.robot_model.naming_prefix for k in di: - if k.startswith(pf) and (k not in ret) and (not k.endswith("proprio-state")): + if k.startswith(pf) and (k not in ret) and \ + (not k.endswith("proprio-state")): ret[k] = np.array(di[k]) else: # minimal proprioception for older versions of robosuite @@ -273,13 +279,25 @@ def type(self): """ return EB.EnvType.ROBOSUITE_TYPE + @property + def version(self): + """ + Returns version of robosuite used for this environment, eg. 1.2.0 + """ + return robosuite.__version__ + def serialize(self): """ Save all information needed to re-instantiate this environment in a dictionary. This is the same as @env_meta - environment metadata stored in hdf5 datasets, and used in utils/env_utils.py. """ - return dict(env_name=self.name, type=self.type, env_kwargs=deepcopy(self._init_kwargs)) + return dict( + env_name=self.name, + env_version=self.version, + type=self.type, + env_kwargs=deepcopy(self._init_kwargs) + ) @classmethod def create_for_data_processing( diff --git a/robomimic/envs/wrappers.py b/robomimic/envs/wrappers.py new file mode 100644 index 00000000..9936f9de --- /dev/null +++ b/robomimic/envs/wrappers.py @@ -0,0 +1,220 @@ +""" +A collection of useful environment wrappers. +""" +from copy import deepcopy +import textwrap +import numpy as np +from collections import deque + +import robomimic.envs.env_base as EB + + +class EnvWrapper(object): + """ + Base class for all environment wrappers in robomimic. + """ + def __init__(self, env): + """ + Args: + env (EnvBase instance): The environment to wrap. + """ + assert isinstance(env, EB.EnvBase) or isinstance(env, EnvWrapper) + self.env = env + + @classmethod + def class_name(cls): + return cls.__name__ + + def _warn_double_wrap(self): + """ + Utility function that checks if we're accidentally trying to double wrap an env + Raises: + Exception: [Double wrapping env] + """ + env = self.env + while True: + if isinstance(env, EnvWrapper): + if env.class_name() == self.class_name(): + raise Exception( + "Attempted to double wrap with Wrapper: {}".format( + self.__class__.__name__ + ) + ) + env = env.env + else: + break + + @property + def unwrapped(self): + """ + Grabs unwrapped environment + + Returns: + env (EnvBase instance): Unwrapped environment + """ + if hasattr(self.env, "unwrapped"): + return self.env.unwrapped + else: + return self.env + + def _to_string(self): + """ + Subclasses should override this method to print out info about the + wrapper (such as arguments passed to it). + """ + return '' + + def __repr__(self): + """Pretty print environment.""" + header = '{}'.format(str(self.__class__.__name__)) + msg = '' + indent = ' ' * 4 + if self._to_string() != '': + msg += textwrap.indent("\n" + self._to_string(), indent) + msg += textwrap.indent("\nenv={}".format(self.env), indent) + msg = header + '(' + msg + '\n)' + return msg + + # this method is a fallback option on any methods the original env might support + def __getattr__(self, attr): + # using getattr ensures that both __getattribute__ and __getattr__ (fallback) get called + # (see https://stackoverflow.com/questions/3278077/difference-between-getattr-vs-getattribute) + orig_attr = getattr(self.env, attr) + if callable(orig_attr): + + def hooked(*args, **kwargs): + result = orig_attr(*args, **kwargs) + # prevent wrapped_class from becoming unwrapped + if id(result) == id(self.env): + return self + return result + + return hooked + else: + return orig_attr + + +class FrameStackWrapper(EnvWrapper): + """ + Wrapper for frame stacking observations during rollouts. The agent + receives a sequence of past observations instead of a single observation + when it calls @env.reset, @env.reset_to, or @env.step in the rollout loop. + """ + def __init__(self, env, num_frames): + """ + Args: + env (EnvBase instance): The environment to wrap. + num_frames (int): number of past observations (including current observation) + to stack together. Must be greater than 1 (otherwise this wrapper would + be a no-op). + """ + assert num_frames > 1, "error: FrameStackWrapper must have num_frames > 1 but got num_frames of {}".format(num_frames) + + super(FrameStackWrapper, self).__init__(env=env) + self.num_frames = num_frames + + # keep track of last @num_frames observations for each obs key + self.obs_history = None + + def _get_initial_obs_history(self, init_obs): + """ + Helper method to get observation history from the initial observation, by + repeating it. + + Returns: + obs_history (dict): a deque for each observation key, with an extra + leading dimension of 1 for each key (for easy concatenation later) + """ + obs_history = {} + for k in init_obs: + obs_history[k] = deque( + [init_obs[k][None] for _ in range(self.num_frames)], + maxlen=self.num_frames, + ) + return obs_history + + def _get_stacked_obs_from_history(self): + """ + Helper method to convert internal variable @self.obs_history to a + stacked observation where each key is a numpy array with leading dimension + @self.num_frames. + """ + # concatenate all frames per key so we return a numpy array per key + return { k : np.concatenate(self.obs_history[k], axis=0) for k in self.obs_history } + + def cache_obs_history(self): + self.obs_history_cache = deepcopy(self.obs_history) + + def uncache_obs_history(self): + self.obs_history = self.obs_history_cache + self.obs_history_cache = None + + def reset(self): + """ + Modify to return frame stacked observation which is @self.num_frames copies of + the initial observation. + + Returns: + obs_stacked (dict): each observation key in original observation now has + leading shape @self.num_frames and consists of the previous @self.num_frames + observations + """ + obs = self.env.reset() + self.timestep = 0 # always zero regardless of timestep type + self.update_obs(obs, reset=True) + self.obs_history = self._get_initial_obs_history(init_obs=obs) + return self._get_stacked_obs_from_history() + + def reset_to(self, state): + """ + Modify to return frame stacked observation which is @self.num_frames copies of + the initial observation. + + Returns: + obs_stacked (dict): each observation key in original observation now has + leading shape @self.num_frames and consists of the previous @self.num_frames + observations + """ + obs = self.env.reset_to(state) + self.timestep = 0 # always zero regardless of timestep type + self.update_obs(obs, reset=True) + self.obs_history = self._get_initial_obs_history(init_obs=obs) + return self._get_stacked_obs_from_history() + + def step(self, action): + """ + Modify to update the internal frame history and return frame stacked observation, + which will have leading dimension @self.num_frames for each key. + + Args: + action (np.array): action to take + + Returns: + obs_stacked (dict): each observation key in original observation now has + leading shape @self.num_frames and consists of the previous @self.num_frames + observations + reward (float): reward for this step + done (bool): whether the task is done + info (dict): extra information + """ + obs, r, done, info = self.env.step(action) + self.update_obs(obs, action=action, reset=False) + # update frame history + for k in obs: + # make sure to have leading dim of 1 for easy concatenation + self.obs_history[k].append(obs[k][None]) + obs_ret = self._get_stacked_obs_from_history() + return obs_ret, r, done, info + + def update_obs(self, obs, action=None, reset=False): + obs["timesteps"] = np.array([self.timestep]) + + if reset: + obs["actions"] = np.zeros(self.env.action_dimension) + else: + self.timestep += 1 + obs["actions"] = action[: self.env.action_dimension] + + def _to_string(self): + """Info to pretty print.""" + return "num_frames={}".format(self.num_frames) \ No newline at end of file diff --git a/robomimic/exps/templates/bc.json b/robomimic/exps/templates/bc.json index 47975820..82ad783f 100644 --- a/robomimic/exps/templates/bc.json +++ b/robomimic/exps/templates/bc.json @@ -2,10 +2,12 @@ "algo_name": "bc", "experiment": { "name": "test", - "validate": true, + "validate": false, "logging": { "terminal_output_to_txt": true, - "log_tb": true + "log_tb": true, + "log_wandb": false, + "wandb_proj_name": "debug" }, "save": { "enabled": true, @@ -39,9 +41,14 @@ "num_data_workers": 0, "hdf5_cache_mode": "all", "hdf5_use_swmr": true, + "hdf5_load_next_obs": false, "hdf5_normalize_obs": false, "hdf5_filter_key": null, + "hdf5_validation_filter_key": null, "seq_length": 1, + "pad_seq_length": true, + "frame_stack": 1, + "pad_frame_stack": true, "dataset_keys": [ "actions", "rewards", @@ -56,10 +63,12 @@ "algo": { "optim_params": { "policy": { + "optimizer_type": "adam", "learning_rate": { "initial": 0.0001, "decay_factor": 0.1, - "epoch_schedule": [] + "epoch_schedule": [], + "scheduler_type": "multistep" }, "regularization": { "L2": 0.0 @@ -135,6 +144,20 @@ "kwargs": { "bidirectional": false } + }, + "transformer": { + "enabled": false, + "context_length": 10, + "embed_dim": 512, + "num_layers": 6, + "num_heads": 8, + "emb_dropout": 0.1, + "attn_dropout": 0.1, + "block_output_dropout": 0.1, + "sinusoidal_embedding": false, + "activation": "gelu", + "supervise_all_steps": false, + "nn_parameter_for_timesteps": true } }, "observation": { @@ -166,98 +189,27 @@ }, "rgb": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "depth": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "scan": { "core_class": "ScanCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - }, - "conv_activation": "relu", - "conv_kwargs": { - "out_channels": [ - 32, - 64, - 64 - ], - "kernel_size": [ - 8, - 4, - 2 - ], - "stride": [ - 4, - 2, - 1 - ] - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} } } + }, + "meta": { + "hp_base_config_file": null, + "hp_keys": [], + "hp_values": [] } } \ No newline at end of file diff --git a/robomimic/exps/templates/bcq.json b/robomimic/exps/templates/bcq.json index c027d8c7..5ae9d907 100644 --- a/robomimic/exps/templates/bcq.json +++ b/robomimic/exps/templates/bcq.json @@ -2,10 +2,12 @@ "algo_name": "bcq", "experiment": { "name": "test", - "validate": true, + "validate": false, "logging": { "terminal_output_to_txt": true, - "log_tb": true + "log_tb": true, + "log_wandb": false, + "wandb_proj_name": "debug" }, "save": { "enabled": true, @@ -39,9 +41,14 @@ "num_data_workers": 0, "hdf5_cache_mode": "all", "hdf5_use_swmr": true, + "hdf5_load_next_obs": true, "hdf5_normalize_obs": false, "hdf5_filter_key": null, + "hdf5_validation_filter_key": null, "seq_length": 1, + "pad_seq_length": true, + "frame_stack": 1, + "pad_frame_stack": true, "dataset_keys": [ "actions", "rewards", @@ -202,98 +209,27 @@ }, "rgb": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "depth": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "scan": { "core_class": "ScanCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - }, - "conv_activation": "relu", - "conv_kwargs": { - "out_channels": [ - 32, - 64, - 64 - ], - "kernel_size": [ - 8, - 4, - 2 - ], - "stride": [ - 4, - 2, - 1 - ] - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} } } + }, + "meta": { + "hp_base_config_file": null, + "hp_keys": [], + "hp_values": [] } } \ No newline at end of file diff --git a/robomimic/exps/templates/cql.json b/robomimic/exps/templates/cql.json index 54d2ccb7..a920efd6 100644 --- a/robomimic/exps/templates/cql.json +++ b/robomimic/exps/templates/cql.json @@ -2,10 +2,12 @@ "algo_name": "cql", "experiment": { "name": "test", - "validate": true, + "validate": false, "logging": { "terminal_output_to_txt": true, - "log_tb": true + "log_tb": true, + "log_wandb": false, + "wandb_proj_name": "debug" }, "save": { "enabled": true, @@ -39,9 +41,14 @@ "num_data_workers": 0, "hdf5_cache_mode": "all", "hdf5_use_swmr": true, + "hdf5_load_next_obs": true, "hdf5_normalize_obs": false, "hdf5_filter_key": null, + "hdf5_validation_filter_key": null, "seq_length": 1, + "pad_seq_length": true, + "frame_stack": 1, + "pad_frame_stack": true, "dataset_keys": [ "actions", "rewards", @@ -149,98 +156,27 @@ }, "rgb": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "depth": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "scan": { "core_class": "ScanCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - }, - "conv_activation": "relu", - "conv_kwargs": { - "out_channels": [ - 32, - 64, - 64 - ], - "kernel_size": [ - 8, - 4, - 2 - ], - "stride": [ - 4, - 2, - 1 - ] - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} } } + }, + "meta": { + "hp_base_config_file": null, + "hp_keys": [], + "hp_values": [] } } \ No newline at end of file diff --git a/robomimic/exps/templates/gl.json b/robomimic/exps/templates/gl.json index 385148da..39b4c2db 100644 --- a/robomimic/exps/templates/gl.json +++ b/robomimic/exps/templates/gl.json @@ -2,10 +2,12 @@ "algo_name": "gl", "experiment": { "name": "test", - "validate": true, + "validate": false, "logging": { "terminal_output_to_txt": true, - "log_tb": true + "log_tb": true, + "log_wandb": false, + "wandb_proj_name": "debug" }, "save": { "enabled": true, @@ -39,9 +41,14 @@ "num_data_workers": 0, "hdf5_cache_mode": "all", "hdf5_use_swmr": true, + "hdf5_load_next_obs": true, "hdf5_normalize_obs": false, "hdf5_filter_key": null, + "hdf5_validation_filter_key": null, "seq_length": 1, + "pad_seq_length": true, + "frame_stack": 1, + "pad_frame_stack": true, "dataset_keys": [ "actions", "rewards", @@ -149,98 +156,27 @@ }, "rgb": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "depth": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "scan": { "core_class": "ScanCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - }, - "conv_activation": "relu", - "conv_kwargs": { - "out_channels": [ - 32, - 64, - 64 - ], - "kernel_size": [ - 8, - 4, - 2 - ], - "stride": [ - 4, - 2, - 1 - ] - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} } } + }, + "meta": { + "hp_base_config_file": null, + "hp_keys": [], + "hp_values": [] } } \ No newline at end of file diff --git a/robomimic/exps/templates/hbc.json b/robomimic/exps/templates/hbc.json index 7b1ed073..26eff76a 100644 --- a/robomimic/exps/templates/hbc.json +++ b/robomimic/exps/templates/hbc.json @@ -2,10 +2,12 @@ "algo_name": "hbc", "experiment": { "name": "test", - "validate": true, + "validate": false, "logging": { "terminal_output_to_txt": true, - "log_tb": true + "log_tb": true, + "log_wandb": false, + "wandb_proj_name": "debug" }, "save": { "enabled": true, @@ -39,9 +41,14 @@ "num_data_workers": 0, "hdf5_cache_mode": "all", "hdf5_use_swmr": true, + "hdf5_load_next_obs": true, "hdf5_normalize_obs": false, "hdf5_filter_key": null, + "hdf5_validation_filter_key": null, "seq_length": 10, + "pad_seq_length": true, + "frame_stack": 1, + "pad_frame_stack": true, "dataset_keys": [ "actions", "rewards", @@ -123,10 +130,12 @@ "actor": { "optim_params": { "policy": { + "optimizer_type": "adam", "learning_rate": { "initial": 0.0001, "decay_factor": 0.1, - "epoch_schedule": [] + "epoch_schedule": [], + "scheduler_type": "multistep" }, "regularization": { "L2": 0.0 @@ -152,6 +161,20 @@ "kwargs": { "bidirectional": false } + }, + "transformer": { + "enabled": false, + "context_length": 10, + "embed_dim": 512, + "num_layers": 6, + "num_heads": 8, + "emb_dropout": 0.1, + "attn_dropout": 0.1, + "block_output_dropout": 0.1, + "sinusoidal_embedding": false, + "activation": "gelu", + "supervise_all_steps": false, + "nn_parameter_for_timesteps": true } } }, @@ -196,97 +219,21 @@ }, "rgb": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "depth": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "scan": { "core_class": "ScanCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - }, - "conv_activation": "relu", - "conv_kwargs": { - "out_channels": [ - 32, - 64, - 64 - ], - "kernel_size": [ - 8, - 4, - 2 - ], - "stride": [ - 4, - 2, - 1 - ] - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} } } }, @@ -319,99 +266,28 @@ }, "rgb": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "depth": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "scan": { "core_class": "ScanCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - }, - "conv_activation": "relu", - "conv_kwargs": { - "out_channels": [ - 32, - 64, - 64 - ], - "kernel_size": [ - 8, - 4, - 2 - ], - "stride": [ - 4, - 2, - 1 - ] - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} } } } + }, + "meta": { + "hp_base_config_file": null, + "hp_keys": [], + "hp_values": [] } } \ No newline at end of file diff --git a/robomimic/exps/templates/iql.json b/robomimic/exps/templates/iql.json new file mode 100644 index 00000000..47317884 --- /dev/null +++ b/robomimic/exps/templates/iql.json @@ -0,0 +1,192 @@ +{ + "algo_name": "iql", + "experiment": { + "name": "test", + "validate": false, + "logging": { + "terminal_output_to_txt": true, + "log_tb": true, + "log_wandb": false, + "wandb_proj_name": "debug" + }, + "save": { + "enabled": true, + "every_n_seconds": null, + "every_n_epochs": 50, + "epochs": [], + "on_best_validation": false, + "on_best_rollout_return": false, + "on_best_rollout_success_rate": true + }, + "epoch_every_n_steps": 100, + "validation_epoch_every_n_steps": 10, + "env": null, + "additional_envs": null, + "render": false, + "render_video": true, + "keep_all_videos": false, + "video_skip": 5, + "rollout": { + "enabled": true, + "n": 50, + "horizon": 400, + "rate": 50, + "warmstart": 0, + "terminate_on_success": true + } + }, + "train": { + "data": null, + "output_dir": "../iql_trained_models", + "num_data_workers": 0, + "hdf5_cache_mode": "all", + "hdf5_use_swmr": true, + "hdf5_load_next_obs": true, + "hdf5_normalize_obs": false, + "hdf5_filter_key": null, + "hdf5_validation_filter_key": null, + "seq_length": 1, + "pad_seq_length": true, + "frame_stack": 1, + "pad_frame_stack": true, + "dataset_keys": [ + "actions", + "rewards", + "dones" + ], + "goal_mode": null, + "cuda": true, + "batch_size": 100, + "num_epochs": 2000, + "seed": 1 + }, + "algo": { + "optim_params": { + "critic": { + "learning_rate": { + "initial": 0.0001, + "decay_factor": 0.0, + "epoch_schedule": [] + }, + "regularization": { + "L2": 0.0 + } + }, + "vf": { + "learning_rate": { + "initial": 0.0001, + "decay_factor": 0.0, + "epoch_schedule": [] + }, + "regularization": { + "L2": 0.0 + } + }, + "actor": { + "learning_rate": { + "initial": 0.0001, + "decay_factor": 0.0, + "epoch_schedule": [] + }, + "regularization": { + "L2": 0.0 + } + } + }, + "discount": 0.99, + "target_tau": 0.01, + "actor": { + "net": { + "type": "gaussian", + "common": { + "std_activation": "softplus", + "low_noise_eval": true, + "use_tanh": false + }, + "gaussian": { + "init_last_fc_weight": 0.001, + "init_std": 0.3, + "fixed_std": false + }, + "gmm": { + "num_modes": 5, + "min_std": 0.0001 + } + }, + "layer_dims": [ + 300, + 400 + ], + "max_gradient_norm": null + }, + "critic": { + "ensemble": { + "n": 2 + }, + "layer_dims": [ + 300, + 400 + ], + "use_huber": false, + "max_gradient_norm": null + }, + "adv": { + "clip_adv_value": null, + "beta": 1.0, + "use_final_clip": true + }, + "vf_quantile": 0.9 + }, + "observation": { + "modalities": { + "obs": { + "low_dim": [ + "robot0_eef_pos", + "robot0_eef_quat", + "robot0_gripper_qpos", + "object" + ], + "rgb": [], + "depth": [], + "scan": [] + }, + "goal": { + "low_dim": [], + "rgb": [], + "depth": [], + "scan": [] + } + }, + "encoder": { + "low_dim": { + "core_class": null, + "core_kwargs": {}, + "obs_randomizer_class": null, + "obs_randomizer_kwargs": {} + }, + "rgb": { + "core_class": "VisualCore", + "core_kwargs": {}, + "obs_randomizer_class": null, + "obs_randomizer_kwargs": {} + }, + "depth": { + "core_class": "VisualCore", + "core_kwargs": {}, + "obs_randomizer_class": null, + "obs_randomizer_kwargs": {} + }, + "scan": { + "core_class": "ScanCore", + "core_kwargs": {}, + "obs_randomizer_class": null, + "obs_randomizer_kwargs": {} + } + } + }, + "meta": { + "hp_base_config_file": null, + "hp_keys": [], + "hp_values": [] + } +} \ No newline at end of file diff --git a/robomimic/exps/templates/iris.json b/robomimic/exps/templates/iris.json index c47a6723..65516638 100644 --- a/robomimic/exps/templates/iris.json +++ b/robomimic/exps/templates/iris.json @@ -2,10 +2,12 @@ "algo_name": "iris", "experiment": { "name": "test", - "validate": true, + "validate": false, "logging": { "terminal_output_to_txt": true, - "log_tb": true + "log_tb": true, + "log_wandb": false, + "wandb_proj_name": "debug" }, "save": { "enabled": true, @@ -39,9 +41,14 @@ "num_data_workers": 0, "hdf5_cache_mode": "all", "hdf5_use_swmr": true, + "hdf5_load_next_obs": true, "hdf5_normalize_obs": false, "hdf5_filter_key": null, + "hdf5_validation_filter_key": null, "seq_length": 10, + "pad_seq_length": true, + "frame_stack": 1, + "pad_frame_stack": true, "dataset_keys": [ "actions", "rewards", @@ -246,10 +253,12 @@ "actor": { "optim_params": { "policy": { + "optimizer_type": "adam", "learning_rate": { "initial": 0.0001, "decay_factor": 0.1, - "epoch_schedule": [] + "epoch_schedule": [], + "scheduler_type": "multistep" }, "regularization": { "L2": 0.0 @@ -275,6 +284,20 @@ "kwargs": { "bidirectional": false } + }, + "transformer": { + "enabled": false, + "context_length": 10, + "embed_dim": 512, + "num_layers": 6, + "num_heads": 8, + "emb_dropout": 0.1, + "attn_dropout": 0.1, + "block_output_dropout": 0.1, + "sinusoidal_embedding": false, + "activation": "gelu", + "supervise_all_steps": false, + "nn_parameter_for_timesteps": true } } }, @@ -320,97 +343,21 @@ }, "rgb": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "depth": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "scan": { "core_class": "ScanCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - }, - "conv_activation": "relu", - "conv_kwargs": { - "out_channels": [ - 32, - 64, - 64 - ], - "kernel_size": [ - 8, - 4, - 2 - ], - "stride": [ - 4, - 2, - 1 - ] - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} } } }, @@ -443,97 +390,21 @@ }, "rgb": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "depth": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "scan": { "core_class": "ScanCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - }, - "conv_activation": "relu", - "conv_kwargs": { - "out_channels": [ - 32, - 64, - 64 - ], - "kernel_size": [ - 8, - 4, - 2 - ], - "stride": [ - 4, - 2, - 1 - ] - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} } } } @@ -567,99 +438,28 @@ }, "rgb": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "depth": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "scan": { "core_class": "ScanCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - }, - "conv_activation": "relu", - "conv_kwargs": { - "out_channels": [ - 32, - 64, - 64 - ], - "kernel_size": [ - 8, - 4, - 2 - ], - "stride": [ - 4, - 2, - 1 - ] - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} } } } + }, + "meta": { + "hp_base_config_file": null, + "hp_keys": [], + "hp_values": [] } } \ No newline at end of file diff --git a/robomimic/exps/templates/td3_bc.json b/robomimic/exps/templates/td3_bc.json index 04a3f865..414a8f04 100644 --- a/robomimic/exps/templates/td3_bc.json +++ b/robomimic/exps/templates/td3_bc.json @@ -5,7 +5,9 @@ "validate": false, "logging": { "terminal_output_to_txt": true, - "log_tb": true + "log_tb": true, + "log_wandb": false, + "wandb_proj_name": "debug" }, "save": { "enabled": true, @@ -39,9 +41,14 @@ "num_data_workers": 0, "hdf5_cache_mode": "all", "hdf5_use_swmr": true, + "hdf5_load_next_obs": true, "hdf5_normalize_obs": true, "hdf5_filter_key": null, + "hdf5_validation_filter_key": null, "seq_length": 1, + "pad_seq_length": true, + "frame_stack": 1, + "pad_frame_stack": true, "dataset_keys": [ "actions", "rewards", @@ -134,98 +141,27 @@ }, "rgb": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "depth": { "core_class": "VisualCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "backbone_class": "ResNet18Conv", - "backbone_kwargs": { - "pretrained": false, - "input_coord_conv": false - }, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} }, "scan": { "core_class": "ScanCore", - "core_kwargs": { - "feature_dimension": 64, - "flatten": true, - "pool_class": "SpatialSoftmax", - "pool_kwargs": { - "num_kp": 32, - "learnable_temperature": false, - "temperature": 1.0, - "noise_std": 0.0, - "output_variance": false - }, - "conv_activation": "relu", - "conv_kwargs": { - "out_channels": [ - 32, - 64, - 64 - ], - "kernel_size": [ - 8, - 4, - 2 - ], - "stride": [ - 4, - 2, - 1 - ] - } - }, + "core_kwargs": {}, "obs_randomizer_class": null, - "obs_randomizer_kwargs": { - "crop_height": 76, - "crop_width": 76, - "num_crops": 1, - "pos_enc": false - } + "obs_randomizer_kwargs": {} } } + }, + "meta": { + "hp_base_config_file": null, + "hp_keys": [], + "hp_values": [] } } \ No newline at end of file diff --git a/robomimic/macros.py b/robomimic/macros.py new file mode 100644 index 00000000..3b6c0503 --- /dev/null +++ b/robomimic/macros.py @@ -0,0 +1,27 @@ +""" +Set of global variables shared across robomimic +""" +# Sets debugging mode. Should be set at top-level script so that internal +# debugging functionalities are made active +DEBUG = False + +# Whether to visualize the before & after of an observation randomizer +VISUALIZE_RANDOMIZER = False + +# wandb entity (eg. username or team name) +WANDB_ENTITY = None + +# wandb api key (obtain from https://wandb.ai/authorize) +# alternatively, set up wandb from terminal with `wandb login` +WANDB_API_KEY = None + +try: + from robomimic.macros_private import * +except ImportError: + from robomimic.utils.log_utils import log_warning + import robomimic + log_warning( + "No private macro file found!"\ + "\nIt is recommended to use a private macro file"\ + "\nTo setup, run: python {}/scripts/setup_macros.py".format(robomimic.__path__[0]) + ) diff --git a/robomimic/models/__init__.py b/robomimic/models/__init__.py index c3a0eb93..7460f930 100644 --- a/robomimic/models/__init__.py +++ b/robomimic/models/__init__.py @@ -1 +1 @@ -from .base_nets import EncoderCore, Randomizer +from .obs_core import EncoderCore, Randomizer diff --git a/robomimic/models/base_nets.py b/robomimic/models/base_nets.py index 6be2a23b..0a4927e0 100644 --- a/robomimic/models/base_nets.py +++ b/robomimic/models/base_nets.py @@ -3,22 +3,19 @@ MLP, RNN, and CNN backbones. """ -import sys import math import abc import numpy as np import textwrap -from copy import deepcopy from collections import OrderedDict import torch import torch.nn as nn import torch.nn.functional as F +from torchvision import transforms from torchvision import models as vision_models import robomimic.utils.tensor_utils as TensorUtils -import robomimic.utils.obs_utils as ObsUtils -from robomimic.utils.python_utils import extract_class_init_kwargs_from_dict CONV_ACTIVATIONS = { @@ -42,6 +39,30 @@ def rnn_args_from_config(rnn_config): ) +def transformer_args_from_config(transformer_config): + """ + Takes a Config object corresponding to Transformer settings + (for example `config.algo.transformer` in BCConfig) and extracts + transformer kwargs for instantiating transformer networks. + """ + transformer_args = dict( + transformer_context_length=transformer_config.context_length, + transformer_embed_dim=transformer_config.embed_dim, + transformer_num_heads=transformer_config.num_heads, + transformer_emb_dropout=transformer_config.emb_dropout, + transformer_attn_dropout=transformer_config.attn_dropout, + transformer_block_output_dropout=transformer_config.block_output_dropout, + transformer_sinusoidal_embedding=transformer_config.sinusoidal_embedding, + transformer_activation=transformer_config.activation, + transformer_nn_parameter_for_timesteps=transformer_config.nn_parameter_for_timesteps, + ) + + if "num_layers" in transformer_config: + transformer_args["transformer_num_layers"] = transformer_config.num_layers + + return transformer_args + + class Module(torch.nn.Module): """ Base class for networks. The only difference from torch.nn.Module is that it @@ -67,10 +88,20 @@ class Sequential(torch.nn.Sequential, Module): """ Compose multiple Modules together (defined above). """ - def __init__(self, *args): + def __init__(self, *args, has_output_shape = True): + """ + Args: + has_output_shape (bool, optional): indicates whether output_shape can be called on the Sequential module. + torch.nn modules do not have an output_shape, but Modules (defined above) do. Defaults to True. + """ for arg in args: - assert isinstance(arg, Module) + if has_output_shape: + assert isinstance(arg, Module) + else: + assert isinstance(arg, nn.Module) torch.nn.Sequential.__init__(self, *args) + self.fixed = False + self.has_output_shape = has_output_shape def output_shape(self, input_shape=None): """ @@ -84,11 +115,22 @@ def output_shape(self, input_shape=None): Returns: out_shape ([int]): list of integers corresponding to output shape """ + if not self.has_output_shape: + raise NotImplementedError("Output shape is not defined for this module") out_shape = input_shape for module in self: out_shape = module.output_shape(out_shape) return out_shape + def freeze(self): + self.fixed = True + + def train(self, mode): + if self.fixed: + super().train(False) + else: + super().train(mode) + class Parameter(Module): """ @@ -500,6 +542,166 @@ def __repr__(self): return header + '(input_channel={}, input_coord_conv={})'.format(self._input_channel, self._input_coord_conv) +class R3MConv(ConvBase): + """ + Base class for ConvNets pretrained with R3M (https://arxiv.org/abs/2203.12601) + """ + def __init__( + self, + input_channel=3, + r3m_model_class='resnet18', + freeze=True, + ): + """ + Using R3M pretrained observation encoder network proposed by https://arxiv.org/abs/2203.12601 + Args: + input_channel (int): number of input channels for input images to the network. + If not equal to 3, modifies first conv layer in ResNet to handle the number + of input channels. + r3m_model_class (str): select one of the r3m pretrained model "resnet18", "resnet34" or "resnet50" + freeze (bool): if True, use a frozen R3M pretrained model. + """ + super(R3MConv, self).__init__() + + try: + from r3m import load_r3m + except ImportError: + print("WARNING: could not load r3m library! Please follow https://github.com/facebookresearch/r3m to install R3M") + + net = load_r3m(r3m_model_class) + + assert input_channel == 3 # R3M only support input image with channel size 3 + assert r3m_model_class in ["resnet18", "resnet34", "resnet50"] # make sure the selected r3m model do exist + + # cut the last fc layer + self._input_channel = input_channel + self._r3m_model_class = r3m_model_class + self._freeze = freeze + self._input_coord_conv = False + self._pretrained = True + + preprocess = nn.Sequential( + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ) + self.nets = Sequential(*([preprocess] + list(net.module.convnet.children())), has_output_shape = False) + if freeze: + self.nets.freeze() + + self.weight_sum = np.sum([param.cpu().data.numpy().sum() for param in self.nets.parameters()]) + if freeze: + for param in self.nets.parameters(): + param.requires_grad = False + + self.nets.eval() + + def output_shape(self, input_shape): + """ + Function to compute output shape from inputs to this module. + Args: + input_shape (iterable of int): shape of input. Does not include batch dimension. + Some modules may not need this argument, if their output does not depend + on the size of the input, or if they assume fixed size input. + Returns: + out_shape ([int]): list of integers corresponding to output shape + """ + assert(len(input_shape) == 3) + + if self._r3m_model_class == 'resnet50': + out_dim = 2048 + else: + out_dim = 512 + + return [out_dim, 1, 1] + + def __repr__(self): + """Pretty print network.""" + header = '{}'.format(str(self.__class__.__name__)) + return header + '(input_channel={}, input_coord_conv={}, pretrained={}, freeze={})'.format(self._input_channel, self._input_coord_conv, self._pretrained, self._freeze) + + +class MVPConv(ConvBase): + """ + Base class for ConvNets pretrained with MVP (https://arxiv.org/abs/2203.06173) + """ + def __init__( + self, + input_channel=3, + mvp_model_class='vitb-mae-egosoup', + freeze=True, + ): + """ + Using MVP pretrained observation encoder network proposed by https://arxiv.org/abs/2203.06173 + Args: + input_channel (int): number of input channels for input images to the network. + If not equal to 3, modifies first conv layer in ResNet to handle the number + of input channels. + mvp_model_class (str): select one of the mvp pretrained model "vits-mae-hoi", "vits-mae-in", "vits-sup-in", "vitb-mae-egosoup" or "vitl-256-mae-egosoup" + freeze (bool): if True, use a frozen MVP pretrained model. + """ + super(MVPConv, self).__init__() + + try: + import mvp + except ImportError: + print("WARNING: could not load mvp library! Please follow https://github.com/ir413/mvp to install MVP.") + + self.nets = mvp.load(mvp_model_class) + if freeze: + self.nets.freeze() + + assert input_channel == 3 # MVP only support input image with channel size 3 + assert mvp_model_class in ["vits-mae-hoi", "vits-mae-in", "vits-sup-in", "vitb-mae-egosoup", "vitl-256-mae-egosoup"] # make sure the selected r3m model do exist + + self._input_channel = input_channel + self._freeze = freeze + self._mvp_model_class = mvp_model_class + self._input_coord_conv = False + self._pretrained = True + + if '256' in mvp_model_class: + input_img_size = 256 + else: + input_img_size = 224 + self.preprocess = nn.Sequential( + transforms.Resize(input_img_size) + ) + + def forward(self, inputs): + x = self.preprocess(inputs) + x = self.nets(x) + if list(self.output_shape(list(inputs.shape)[1:])) != list(x.shape)[1:]: + raise ValueError('Size mismatch: expect size %s, but got size %s' % ( + str(self.output_shape(list(inputs.shape)[1:])), str(list(x.shape)[1:])) + ) + return x + + def output_shape(self, input_shape): + """ + Function to compute output shape from inputs to this module. + Args: + input_shape (iterable of int): shape of input. Does not include batch dimension. + Some modules may not need this argument, if their output does not depend + on the size of the input, or if they assume fixed size input. + Returns: + out_shape ([int]): list of integers corresponding to output shape + """ + assert(len(input_shape) == 3) + if 'vitb' in self._mvp_model_class: + output_shape = [768] + elif 'vitl' in self._mvp_model_class: + output_shape = [1024] + else: + output_shape = [384] + return output_shape + + def __repr__(self): + """Pretty print network.""" + header = '{}'.format(str(self.__class__.__name__)) + return header + '(input_channel={}, input_coord_conv={}, pretrained={}, freeze={})'.format(self._input_channel, self._input_coord_conv, self._pretrained, self._freeze) + + class CoordConv2d(nn.Conv2d, Module): """ 2D Coordinate Convolution @@ -629,19 +831,20 @@ class Conv1dBase(Module): input_channel (int): Number of channels for inputs to this network activation (None or str): Per-layer activation to use. Defaults to "relu". Valid options are currently {relu, None} for no activation - conv_kwargs (dict): Specific nn.Conv1D args to use, in list form, where the ith element corresponds to the + out_channels (list of int): Output channel size for each sequential Conv1d layer + kernel_size (list of int): Kernel sizes for each sequential Conv1d layer + stride (list of int): Stride sizes for each sequential Conv1d layer + conv_kwargs (dict): additional nn.Conv1D args to use, in list form, where the ith element corresponds to the argument to be passed to the ith Conv1D layer. See https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html for specific possible arguments. - - e.g.: common values to use: - out_channels (list of int): Output channel size for each sequential Conv1d layer - kernel_size (list of int): Kernel sizes for each sequential Conv1d layer - stride (list of int): Stride sizes for each sequential Conv1d layer """ def __init__( self, input_channel=1, activation="relu", + out_channels=(32, 64, 64), + kernel_size=(8, 4, 2), + stride=(4, 2, 1), **conv_kwargs, ): super(Conv1dBase, self).__init__() @@ -649,12 +852,8 @@ def __init__( # Get activation requested activation = CONV_ACTIVATIONS[activation] - # Make sure out_channels and kernel_size are specified - for kwarg in ("out_channels", "kernel_size"): - assert kwarg in conv_kwargs, f"{kwarg} must be specified in Conv1dBase kwargs!" - # Generate network - self.n_layers = len(conv_kwargs["out_channels"]) + self.n_layers = len(out_channels) layers = OrderedDict() for i in range(self.n_layers): layer_kwargs = {k: v[i] for k, v in conv_kwargs.items()} @@ -712,7 +911,7 @@ class SpatialSoftmax(ConvBase): def __init__( self, input_shape, - num_kp=None, + num_kp=32, temperature=1., learnable_temperature=False, output_variance=False, @@ -721,7 +920,7 @@ def __init__( """ Args: input_shape (list): shape of the input feature (C, H, W) - num_kp (int): number of keypoints (None for not use spatialsoftmax) + num_kp (int): number of keypoints (None for not using spatialsoftmax) temperature (float): temperature term for the softmax. learnable_temperature (bool): whether to learn the temperature output_variance (bool): treat attention as a distribution, and compute second-order statistics to return @@ -910,474 +1109,3 @@ def forward(self, x): # weighted mean-pooling return torch.sum(x * self.agg_weight, dim=1) raise Exception("unexpected agg type: {}".forward(self.agg_type)) - - -""" -================================================ -Encoder Core Networks (Abstract class) -================================================ -""" -class EncoderCore(Module): - """ - Abstract class used to categorize all cores used to encode observations - """ - def __init__(self, input_shape): - self.input_shape = input_shape - super(EncoderCore, self).__init__() - - def __init_subclass__(cls, **kwargs): - """ - Hook method to automatically register all valid subclasses so we can keep track of valid observation encoders - in a global dict. - - This global dict stores mapping from observation encoder network name to class. - We keep track of these registries to enable automated class inference at runtime, allowing - users to simply extend our base encoder class and refer to that class in string form - in their config, without having to manually register their class internally. - This also future-proofs us for any additional encoder classes we would - like to add ourselves. - """ - ObsUtils.register_encoder_core(cls) - - -""" -================================================ -Visual Core Networks (Backbone + Pool) -================================================ -""" -class VisualCore(EncoderCore, ConvBase): - """ - A network block that combines a visual backbone network with optional pooling - and linear layers. - """ - def __init__( - self, - input_shape, - backbone_class, - backbone_kwargs, - pool_class=None, - pool_kwargs=None, - flatten=True, - feature_dimension=None, - ): - """ - Args: - input_shape (tuple): shape of input (not including batch dimension) - backbone_class (str): class name for the visual backbone network (e.g.: ResNet18) - backbone_kwargs (dict): kwargs for the visual backbone network - pool_class (str): class name for the visual feature pooler (optional) - Common options are "SpatialSoftmax" and "SpatialMeanPool" - pool_kwargs (dict): kwargs for the visual feature pooler (optional) - flatten (bool): whether to flatten the visual feature - feature_dimension (int): if not None, add a Linear layer to - project output into a desired feature dimension - """ - super(VisualCore, self).__init__(input_shape=input_shape) - self.flatten = flatten - - # add input channel dimension to visual core inputs - backbone_kwargs["input_channel"] = input_shape[0] - - # extract only relevant kwargs for this specific backbone - backbone_kwargs = extract_class_init_kwargs_from_dict(cls=eval(backbone_class), dic=backbone_kwargs, copy=True) - - # visual backbone - assert isinstance(backbone_class, str) - self.backbone = eval(backbone_class)(**backbone_kwargs) - - assert isinstance(self.backbone, ConvBase) - - feat_shape = self.backbone.output_shape(input_shape) - net_list = [self.backbone] - - # maybe make pool net - if pool_class is not None: - assert isinstance(pool_class, str) - # feed output shape of backbone to pool net - if pool_kwargs is None: - pool_kwargs = dict() - # extract only relevant kwargs for this specific backbone - pool_kwargs["input_shape"] = feat_shape - pool_kwargs = extract_class_init_kwargs_from_dict(cls=eval(pool_class), dic=pool_kwargs, copy=True) - self.pool = eval(pool_class)(**pool_kwargs) - assert isinstance(self.pool, Module) - - feat_shape = self.pool.output_shape(feat_shape) - net_list.append(self.pool) - else: - self.pool = None - - # flatten layer - if self.flatten: - net_list.append(torch.nn.Flatten(start_dim=1, end_dim=-1)) - - # maybe linear layer - self.feature_dimension = feature_dimension - if feature_dimension is not None: - assert self.flatten - linear = torch.nn.Linear(int(np.prod(feat_shape)), feature_dimension) - net_list.append(linear) - - self.nets = nn.Sequential(*net_list) - - def output_shape(self, input_shape): - """ - Function to compute output shape from inputs to this module. - - Args: - input_shape (iterable of int): shape of input. Does not include batch dimension. - Some modules may not need this argument, if their output does not depend - on the size of the input, or if they assume fixed size input. - - Returns: - out_shape ([int]): list of integers corresponding to output shape - """ - if self.feature_dimension is not None: - # linear output - return [self.feature_dimension] - feat_shape = self.backbone.output_shape(input_shape) - if self.pool is not None: - # pool output - feat_shape = self.pool.output_shape(feat_shape) - # backbone + flat output - if self.flatten: - return [np.prod(feat_shape)] - else: - return feat_shape - - def forward(self, inputs): - """ - Forward pass through visual core. - """ - ndim = len(self.input_shape) - assert tuple(inputs.shape)[-ndim:] == tuple(self.input_shape) - return super(VisualCore, self).forward(inputs) - - def __repr__(self): - """Pretty print network.""" - header = '{}'.format(str(self.__class__.__name__)) - msg = '' - indent = ' ' * 2 - msg += textwrap.indent( - "\ninput_shape={}\noutput_shape={}".format(self.input_shape, self.output_shape(self.input_shape)), indent) - msg += textwrap.indent("\nbackbone_net={}".format(self.backbone), indent) - msg += textwrap.indent("\npool_net={}".format(self.pool), indent) - msg = header + '(' + msg + '\n)' - return msg - - -""" -================================================ -Scan Core Networks (Conv1D Sequential + Pool) -================================================ -""" -class ScanCore(EncoderCore, ConvBase): - """ - A network block that combines a Conv1D backbone network with optional pooling - and linear layers. - """ - def __init__( - self, - input_shape, - conv_kwargs, - conv_activation="relu", - pool_class=None, - pool_kwargs=None, - flatten=True, - feature_dimension=None, - ): - """ - Args: - input_shape (tuple): shape of input (not including batch dimension) - conv_kwargs (dict): kwargs for the conv1d backbone network. Should contain lists for the following values: - out_channels (int) - kernel_size (int) - stride (int) - ... - conv_activation (str or None): Activation to use between conv layers. Default is relu. - Currently, valid options are {relu} - pool_class (str): class name for the visual feature pooler (optional) - Common options are "SpatialSoftmax" and "SpatialMeanPool" - pool_kwargs (dict): kwargs for the visual feature pooler (optional) - flatten (bool): whether to flatten the network output - feature_dimension (int): if not None, add a Linear layer to - project output into a desired feature dimension (note: flatten must be set to True!) - """ - super(ScanCore, self).__init__(input_shape=input_shape) - self.flatten = flatten - self.feature_dimension = feature_dimension - - # Generate backbone network - self.backbone = Conv1dBase( - input_channel=1, - activation=conv_activation, - **conv_kwargs, - ) - feat_shape = self.backbone.output_shape(input_shape=input_shape) - - # Create netlist of all generated networks - net_list = [self.backbone] - - # Possibly add pooling network - if pool_class is not None: - # Add an unsqueeze network so that the shape is correct to pass to pooling network - self.unsqueeze = Unsqueeze(dim=-1) - net_list.append(self.unsqueeze) - # Get output shape - feat_shape = self.unsqueeze.output_shape(feat_shape) - # Create pooling network - self.pool = eval(pool_class)(input_shape=feat_shape, **pool_kwargs) - net_list.append(self.pool) - feat_shape = self.pool.output_shape(feat_shape) - else: - self.unsqueeze, self.pool = None, None - - # flatten layer - if self.flatten: - net_list.append(torch.nn.Flatten(start_dim=1, end_dim=-1)) - - # maybe linear layer - if self.feature_dimension is not None: - assert self.flatten - linear = torch.nn.Linear(int(np.prod(feat_shape)), self.feature_dimension) - net_list.append(linear) - - # Generate final network - self.nets = nn.Sequential(*net_list) - - def output_shape(self, input_shape): - """ - Function to compute output shape from inputs to this module. - - Args: - input_shape (iterable of int): shape of input. Does not include batch dimension. - Some modules may not need this argument, if their output does not depend - on the size of the input, or if they assume fixed size input. - - Returns: - out_shape ([int]): list of integers corresponding to output shape - """ - if self.feature_dimension is not None: - # linear output - return [self.feature_dimension] - feat_shape = self.backbone.output_shape(input_shape) - if self.pool is not None: - # pool output - feat_shape = self.pool.output_shape(self.unsqueeze.output_shape(feat_shape)) - # backbone + flat output - return [np.prod(feat_shape)] if self.flatten else feat_shape - - def forward(self, inputs): - """ - Forward pass through visual core. - """ - ndim = len(self.input_shape) - assert tuple(inputs.shape)[-ndim:] == tuple(self.input_shape) - return super(ScanCore, self).forward(inputs) - - def __repr__(self): - """Pretty print network.""" - header = '{}'.format(str(self.__class__.__name__)) - msg = '' - indent = ' ' * 2 - msg += textwrap.indent( - "\ninput_shape={}\noutput_shape={}".format(self.input_shape, self.output_shape(self.input_shape)), indent) - msg += textwrap.indent("\nbackbone_net={}".format(self.backbone), indent) - msg += textwrap.indent("\npool_net={}".format(self.pool), indent) - msg = header + '(' + msg + '\n)' - return msg - - - -""" -================================================ -Observation Randomizer Networks -================================================ -""" -class Randomizer(Module): - """ - Base class for randomizer networks. Each randomizer should implement the @output_shape_in, - @output_shape_out, @forward_in, and @forward_out methods. The randomizer's @forward_in - method is invoked on raw inputs, and @forward_out is invoked on processed inputs - (usually processed by a @VisualCore instance). Note that the self.training property - can be used to change the randomizer's behavior at train vs. test time. - """ - def __init__(self): - super(Randomizer, self).__init__() - - def __init_subclass__(cls, **kwargs): - """ - Hook method to automatically register all valid subclasses so we can keep track of valid observation randomizers - in a global dict. - - This global dict stores mapping from observation randomizer network name to class. - We keep track of these registries to enable automated class inference at runtime, allowing - users to simply extend our base randomizer class and refer to that class in string form - in their config, without having to manually register their class internally. - This also future-proofs us for any additional randomizer classes we would - like to add ourselves. - """ - ObsUtils.register_randomizer(cls) - - def output_shape(self, input_shape=None): - """ - This function is unused. See @output_shape_in and @output_shape_out. - """ - raise NotImplementedError - - @abc.abstractmethod - def output_shape_in(self, input_shape=None): - """ - Function to compute output shape from inputs to this module. Corresponds to - the @forward_in operation, where raw inputs (usually observation modalities) - are passed in. - - Args: - input_shape (iterable of int): shape of input. Does not include batch dimension. - Some modules may not need this argument, if their output does not depend - on the size of the input, or if they assume fixed size input. - - Returns: - out_shape ([int]): list of integers corresponding to output shape - """ - raise NotImplementedError - - @abc.abstractmethod - def output_shape_out(self, input_shape=None): - """ - Function to compute output shape from inputs to this module. Corresponds to - the @forward_out operation, where processed inputs (usually encoded observation - modalities) are passed in. - - Args: - input_shape (iterable of int): shape of input. Does not include batch dimension. - Some modules may not need this argument, if their output does not depend - on the size of the input, or if they assume fixed size input. - - Returns: - out_shape ([int]): list of integers corresponding to output shape - """ - raise NotImplementedError - - @abc.abstractmethod - def forward_in(self, inputs): - """ - Randomize raw inputs. - """ - raise NotImplementedError - - @abc.abstractmethod - def forward_out(self, inputs): - """ - Processing for network outputs. - """ - return inputs - - -class CropRandomizer(Randomizer): - """ - Randomly sample crops at input, and then average across crop features at output. - """ - def __init__( - self, - input_shape, - crop_height, - crop_width, - num_crops=1, - pos_enc=False, - ): - """ - Args: - input_shape (tuple, list): shape of input (not including batch dimension) - crop_height (int): crop height - crop_width (int): crop width - num_crops (int): number of random crops to take - pos_enc (bool): if True, add 2 channels to the output to encode the spatial - location of the cropped pixels in the source image - """ - super(CropRandomizer, self).__init__() - - assert len(input_shape) == 3 # (C, H, W) - assert crop_height < input_shape[1] - assert crop_width < input_shape[2] - - self.input_shape = input_shape - self.crop_height = crop_height - self.crop_width = crop_width - self.num_crops = num_crops - self.pos_enc = pos_enc - - def output_shape_in(self, input_shape=None): - """ - Function to compute output shape from inputs to this module. Corresponds to - the @forward_in operation, where raw inputs (usually observation modalities) - are passed in. - - Args: - input_shape (iterable of int): shape of input. Does not include batch dimension. - Some modules may not need this argument, if their output does not depend - on the size of the input, or if they assume fixed size input. - - Returns: - out_shape ([int]): list of integers corresponding to output shape - """ - - # outputs are shape (C, CH, CW), or maybe C + 2 if using position encoding, because - # the number of crops are reshaped into the batch dimension, increasing the batch - # size from B to B * N - out_c = self.input_shape[0] + 2 if self.pos_enc else self.input_shape[0] - return [out_c, self.crop_height, self.crop_width] - - def output_shape_out(self, input_shape=None): - """ - Function to compute output shape from inputs to this module. Corresponds to - the @forward_out operation, where processed inputs (usually encoded observation - modalities) are passed in. - - Args: - input_shape (iterable of int): shape of input. Does not include batch dimension. - Some modules may not need this argument, if their output does not depend - on the size of the input, or if they assume fixed size input. - - Returns: - out_shape ([int]): list of integers corresponding to output shape - """ - - # since the forward_out operation splits [B * N, ...] -> [B, N, ...] - # and then pools to result in [B, ...], only the batch dimension changes, - # and so the other dimensions retain their shape. - return list(input_shape) - - def forward_in(self, inputs): - """ - Samples N random crops for each input in the batch, and then reshapes - inputs to [B * N, ...]. - """ - assert len(inputs.shape) >= 3 # must have at least (C, H, W) dimensions - out, _ = ObsUtils.sample_random_image_crops( - images=inputs, - crop_height=self.crop_height, - crop_width=self.crop_width, - num_crops=self.num_crops, - pos_enc=self.pos_enc, - ) - # [B, N, ...] -> [B * N, ...] - return TensorUtils.join_dimensions(out, 0, 1) - - def forward_out(self, inputs): - """ - Splits the outputs from shape [B * N, ...] -> [B, N, ...] and then average across N - to result in shape [B, ...] to make sure the network output is consistent with - what would have happened if there were no randomization. - """ - batch_size = (inputs.shape[0] // self.num_crops) - out = TensorUtils.reshape_dimensions(inputs, begin_axis=0, end_axis=0, - target_dims=(batch_size, self.num_crops)) - return out.mean(dim=1) - - def __repr__(self): - """Pretty print network.""" - header = '{}'.format(str(self.__class__.__name__)) - msg = header + "(input_shape={}, crop_size=[{}, {}], num_crops={})".format( - self.input_shape, self.crop_height, self.crop_width, self.num_crops) - return msg diff --git a/robomimic/models/obs_core.py b/robomimic/models/obs_core.py new file mode 100644 index 00000000..c784fa27 --- /dev/null +++ b/robomimic/models/obs_core.py @@ -0,0 +1,828 @@ +""" +Contains torch Modules for core observation processing blocks +such as encoders (e.g. EncoderCore, VisualCore, ScanCore, ...) +and randomizers (e.g. Randomizer, CropRandomizer). +""" + +import abc +import numpy as np +import textwrap +import random + +import torch +import torch.nn as nn +from torchvision.transforms import Lambda, Compose +import torchvision.transforms.functional as TVF + +import robomimic.models.base_nets as BaseNets +import robomimic.utils.tensor_utils as TensorUtils +import robomimic.utils.obs_utils as ObsUtils +from robomimic.utils.python_utils import extract_class_init_kwargs_from_dict + +# NOTE: this is required for the backbone classes to be found by the `eval` call in the core networks +from robomimic.models.base_nets import * +from robomimic.utils.vis_utils import visualize_image_randomizer +from robomimic.macros import VISUALIZE_RANDOMIZER + + +""" +================================================ +Encoder Core Networks (Abstract class) +================================================ +""" +class EncoderCore(BaseNets.Module): + """ + Abstract class used to categorize all cores used to encode observations + """ + def __init__(self, input_shape): + self.input_shape = input_shape + super(EncoderCore, self).__init__() + + def __init_subclass__(cls, **kwargs): + """ + Hook method to automatically register all valid subclasses so we can keep track of valid observation encoders + in a global dict. + + This global dict stores mapping from observation encoder network name to class. + We keep track of these registries to enable automated class inference at runtime, allowing + users to simply extend our base encoder class and refer to that class in string form + in their config, without having to manually register their class internally. + This also future-proofs us for any additional encoder classes we would + like to add ourselves. + """ + ObsUtils.register_encoder_core(cls) + + +""" +================================================ +Visual Core Networks (Backbone + Pool) +================================================ +""" +class VisualCore(EncoderCore, BaseNets.ConvBase): + """ + A network block that combines a visual backbone network with optional pooling + and linear layers. + """ + def __init__( + self, + input_shape, + backbone_class="ResNet18Conv", + pool_class="SpatialSoftmax", + backbone_kwargs=None, + pool_kwargs=None, + flatten=True, + feature_dimension=64, + ): + """ + Args: + input_shape (tuple): shape of input (not including batch dimension) + backbone_class (str): class name for the visual backbone network. Defaults + to "ResNet18Conv". + pool_class (str): class name for the visual feature pooler (optional) + Common options are "SpatialSoftmax" and "SpatialMeanPool". Defaults to + "SpatialSoftmax". + backbone_kwargs (dict): kwargs for the visual backbone network (optional) + pool_kwargs (dict): kwargs for the visual feature pooler (optional) + flatten (bool): whether to flatten the visual features + feature_dimension (int): if not None, add a Linear layer to + project output into a desired feature dimension + """ + super(VisualCore, self).__init__(input_shape=input_shape) + self.flatten = flatten + + if backbone_kwargs is None: + backbone_kwargs = dict() + + # add input channel dimension to visual core inputs + backbone_kwargs["input_channel"] = input_shape[0] + + # extract only relevant kwargs for this specific backbone + backbone_kwargs = extract_class_init_kwargs_from_dict(cls=eval(backbone_class), dic=backbone_kwargs, copy=True) + + # visual backbone + assert isinstance(backbone_class, str) + self.backbone = eval(backbone_class)(**backbone_kwargs) + + assert isinstance(self.backbone, BaseNets.ConvBase) + + feat_shape = self.backbone.output_shape(input_shape) + net_list = [self.backbone] + + # maybe make pool net + if pool_class is not None: + assert isinstance(pool_class, str) + # feed output shape of backbone to pool net + if pool_kwargs is None: + pool_kwargs = dict() + # extract only relevant kwargs for this specific backbone + pool_kwargs["input_shape"] = feat_shape + pool_kwargs = extract_class_init_kwargs_from_dict(cls=eval(pool_class), dic=pool_kwargs, copy=True) + self.pool = eval(pool_class)(**pool_kwargs) + assert isinstance(self.pool, BaseNets.Module) + + feat_shape = self.pool.output_shape(feat_shape) + net_list.append(self.pool) + else: + self.pool = None + + # flatten layer + if self.flatten: + net_list.append(torch.nn.Flatten(start_dim=1, end_dim=-1)) + + # maybe linear layer + self.feature_dimension = feature_dimension + if feature_dimension is not None: + assert self.flatten + linear = torch.nn.Linear(int(np.prod(feat_shape)), feature_dimension) + net_list.append(linear) + + self.nets = nn.Sequential(*net_list) + + def output_shape(self, input_shape): + """ + Function to compute output shape from inputs to this module. + + Args: + input_shape (iterable of int): shape of input. Does not include batch dimension. + Some modules may not need this argument, if their output does not depend + on the size of the input, or if they assume fixed size input. + + Returns: + out_shape ([int]): list of integers corresponding to output shape + """ + if self.feature_dimension is not None: + # linear output + return [self.feature_dimension] + feat_shape = self.backbone.output_shape(input_shape) + if self.pool is not None: + # pool output + feat_shape = self.pool.output_shape(feat_shape) + # backbone + flat output + if self.flatten: + return [np.prod(feat_shape)] + else: + return feat_shape + + def forward(self, inputs): + """ + Forward pass through visual core. + """ + ndim = len(self.input_shape) + assert tuple(inputs.shape)[-ndim:] == tuple(self.input_shape) + return super(VisualCore, self).forward(inputs) + + def __repr__(self): + """Pretty print network.""" + header = '{}'.format(str(self.__class__.__name__)) + msg = '' + indent = ' ' * 2 + msg += textwrap.indent( + "\ninput_shape={}\noutput_shape={}".format(self.input_shape, self.output_shape(self.input_shape)), indent) + msg += textwrap.indent("\nbackbone_net={}".format(self.backbone), indent) + msg += textwrap.indent("\npool_net={}".format(self.pool), indent) + msg = header + '(' + msg + '\n)' + return msg + + +""" +================================================ +Scan Core Networks (Conv1D Sequential + Pool) +================================================ +""" +class ScanCore(EncoderCore, BaseNets.ConvBase): + """ + A network block that combines a Conv1D backbone network with optional pooling + and linear layers. + """ + def __init__( + self, + input_shape, + conv_kwargs=None, + conv_activation="relu", + pool_class=None, + pool_kwargs=None, + flatten=True, + feature_dimension=None, + ): + """ + Args: + input_shape (tuple): shape of input (not including batch dimension) + conv_kwargs (dict): kwargs for the conv1d backbone network. Should contain lists for the following values: + out_channels (int) + kernel_size (int) + stride (int) + ... + + If not specified, or an empty dictionary is specified, some default settings will be used. + conv_activation (str or None): Activation to use between conv layers. Default is relu. + Currently, valid options are {relu} + pool_class (str): class name for the visual feature pooler (optional) + Common options are "SpatialSoftmax" and "SpatialMeanPool" + pool_kwargs (dict): kwargs for the visual feature pooler (optional) + flatten (bool): whether to flatten the network output + feature_dimension (int): if not None, add a Linear layer to + project output into a desired feature dimension (note: flatten must be set to True!) + """ + super(ScanCore, self).__init__(input_shape=input_shape) + self.flatten = flatten + self.feature_dimension = feature_dimension + + if conv_kwargs is None: + conv_kwargs = dict() + + # Generate backbone network + self.backbone = BaseNets.Conv1dBase( + input_channel=1, + activation=conv_activation, + **conv_kwargs, + ) + feat_shape = self.backbone.output_shape(input_shape=input_shape) + + # Create netlist of all generated networks + net_list = [self.backbone] + + # Possibly add pooling network + if pool_class is not None: + # Add an unsqueeze network so that the shape is correct to pass to pooling network + self.unsqueeze = Unsqueeze(dim=-1) + net_list.append(self.unsqueeze) + # Get output shape + feat_shape = self.unsqueeze.output_shape(feat_shape) + # Create pooling network + self.pool = eval(pool_class)(input_shape=feat_shape, **pool_kwargs) + net_list.append(self.pool) + feat_shape = self.pool.output_shape(feat_shape) + else: + self.unsqueeze, self.pool = None, None + + # flatten layer + if self.flatten: + net_list.append(torch.nn.Flatten(start_dim=1, end_dim=-1)) + + # maybe linear layer + if self.feature_dimension is not None: + assert self.flatten + linear = torch.nn.Linear(int(np.prod(feat_shape)), self.feature_dimension) + net_list.append(linear) + + # Generate final network + self.nets = nn.Sequential(*net_list) + + def output_shape(self, input_shape): + """ + Function to compute output shape from inputs to this module. + + Args: + input_shape (iterable of int): shape of input. Does not include batch dimension. + Some modules may not need this argument, if their output does not depend + on the size of the input, or if they assume fixed size input. + + Returns: + out_shape ([int]): list of integers corresponding to output shape + """ + if self.feature_dimension is not None: + # linear output + return [self.feature_dimension] + feat_shape = self.backbone.output_shape(input_shape) + if self.pool is not None: + # pool output + feat_shape = self.pool.output_shape(self.unsqueeze.output_shape(feat_shape)) + # backbone + flat output + return [np.prod(feat_shape)] if self.flatten else feat_shape + + def forward(self, inputs): + """ + Forward pass through visual core. + """ + ndim = len(self.input_shape) + assert tuple(inputs.shape)[-ndim:] == tuple(self.input_shape) + return super(ScanCore, self).forward(inputs) + + def __repr__(self): + """Pretty print network.""" + header = '{}'.format(str(self.__class__.__name__)) + msg = '' + indent = ' ' * 2 + msg += textwrap.indent( + "\ninput_shape={}\noutput_shape={}".format(self.input_shape, self.output_shape(self.input_shape)), indent) + msg += textwrap.indent("\nbackbone_net={}".format(self.backbone), indent) + msg += textwrap.indent("\npool_net={}".format(self.pool), indent) + msg = header + '(' + msg + '\n)' + return msg + + +""" +================================================ +Observation Randomizer Networks +================================================ +""" +class Randomizer(BaseNets.Module): + """ + Base class for randomizer networks. Each randomizer should implement the @output_shape_in, + @output_shape_out, @forward_in, and @forward_out methods. The randomizer's @forward_in + method is invoked on raw inputs, and @forward_out is invoked on processed inputs + (usually processed by a @VisualCore instance). Note that the self.training property + can be used to change the randomizer's behavior at train vs. test time. + """ + def __init__(self): + super(Randomizer, self).__init__() + + def __init_subclass__(cls, **kwargs): + """ + Hook method to automatically register all valid subclasses so we can keep track of valid observation randomizers + in a global dict. + + This global dict stores mapping from observation randomizer network name to class. + We keep track of these registries to enable automated class inference at runtime, allowing + users to simply extend our base randomizer class and refer to that class in string form + in their config, without having to manually register their class internally. + This also future-proofs us for any additional randomizer classes we would + like to add ourselves. + """ + ObsUtils.register_randomizer(cls) + + def output_shape(self, input_shape=None): + """ + This function is unused. See @output_shape_in and @output_shape_out. + """ + raise NotImplementedError + + @abc.abstractmethod + def output_shape_in(self, input_shape=None): + """ + Function to compute output shape from inputs to this module. Corresponds to + the @forward_in operation, where raw inputs (usually observation modalities) + are passed in. + + Args: + input_shape (iterable of int): shape of input. Does not include batch dimension. + Some modules may not need this argument, if their output does not depend + on the size of the input, or if they assume fixed size input. + + Returns: + out_shape ([int]): list of integers corresponding to output shape + """ + raise NotImplementedError + + @abc.abstractmethod + def output_shape_out(self, input_shape=None): + """ + Function to compute output shape from inputs to this module. Corresponds to + the @forward_out operation, where processed inputs (usually encoded observation + modalities) are passed in. + + Args: + input_shape (iterable of int): shape of input. Does not include batch dimension. + Some modules may not need this argument, if their output does not depend + on the size of the input, or if they assume fixed size input. + + Returns: + out_shape ([int]): list of integers corresponding to output shape + """ + raise NotImplementedError + + def forward_in(self, inputs): + """ + Randomize raw inputs if training. + """ + if self.training: + randomized_inputs = self._forward_in(inputs=inputs) + if VISUALIZE_RANDOMIZER: + num_samples_to_visualize = min(4, inputs.shape[0]) + self._visualize(inputs, randomized_inputs, num_samples_to_visualize=num_samples_to_visualize) + return randomized_inputs + else: + return self._forward_in_eval(inputs) + + def forward_out(self, inputs): + """ + Processing for network outputs. + """ + if self.training: + return self._forward_out(inputs) + else: + return self._forward_out_eval(inputs) + + @abc.abstractmethod + def _forward_in(self, inputs): + """ + Randomize raw inputs. + """ + raise NotImplementedError + + def _forward_in_eval(self, inputs): + """ + Test-time behavior for the randomizer + """ + return inputs + + @abc.abstractmethod + def _forward_out(self, inputs): + """ + Processing for network outputs. + """ + return inputs + + def _forward_out_eval(self, inputs): + """ + Test-time behavior for the randomizer + """ + return inputs + + @abc.abstractmethod + def _visualize(self, pre_random_input, randomized_input, num_samples_to_visualize=2): + """ + Visualize the original input and the randomized input for _forward_in for debugging purposes. + """ + pass + + +class CropRandomizer(Randomizer): + """ + Randomly sample crops at input, and then average across crop features at output. + """ + def __init__( + self, + input_shape, + crop_height=76, + crop_width=76, + num_crops=1, + pos_enc=False, + ): + """ + Args: + input_shape (tuple, list): shape of input (not including batch dimension) + crop_height (int): crop height + crop_width (int): crop width + num_crops (int): number of random crops to take + pos_enc (bool): if True, add 2 channels to the output to encode the spatial + location of the cropped pixels in the source image + """ + super(CropRandomizer, self).__init__() + + assert len(input_shape) == 3 # (C, H, W) + assert crop_height < input_shape[1] + assert crop_width < input_shape[2] + + self.input_shape = input_shape + self.crop_height = crop_height + self.crop_width = crop_width + self.num_crops = num_crops + self.pos_enc = pos_enc + + def output_shape_in(self, input_shape=None): + """ + Function to compute output shape from inputs to this module. Corresponds to + the @forward_in operation, where raw inputs (usually observation modalities) + are passed in. + + Args: + input_shape (iterable of int): shape of input. Does not include batch dimension. + Some modules may not need this argument, if their output does not depend + on the size of the input, or if they assume fixed size input. + + Returns: + out_shape ([int]): list of integers corresponding to output shape + """ + + # outputs are shape (C, CH, CW), or maybe C + 2 if using position encoding, because + # the number of crops are reshaped into the batch dimension, increasing the batch + # size from B to B * N + out_c = self.input_shape[0] + 2 if self.pos_enc else self.input_shape[0] + return [out_c, self.crop_height, self.crop_width] + + def output_shape_out(self, input_shape=None): + """ + Function to compute output shape from inputs to this module. Corresponds to + the @forward_out operation, where processed inputs (usually encoded observation + modalities) are passed in. + + Args: + input_shape (iterable of int): shape of input. Does not include batch dimension. + Some modules may not need this argument, if their output does not depend + on the size of the input, or if they assume fixed size input. + + Returns: + out_shape ([int]): list of integers corresponding to output shape + """ + + # since the forward_out operation splits [B * N, ...] -> [B, N, ...] + # and then pools to result in [B, ...], only the batch dimension changes, + # and so the other dimensions retain their shape. + return list(input_shape) + + def _forward_in(self, inputs): + """ + Samples N random crops for each input in the batch, and then reshapes + inputs to [B * N, ...]. + """ + assert len(inputs.shape) >= 3 # must have at least (C, H, W) dimensions + out, _ = ObsUtils.sample_random_image_crops( + images=inputs, + crop_height=self.crop_height, + crop_width=self.crop_width, + num_crops=self.num_crops, + pos_enc=self.pos_enc, + ) + # [B, N, ...] -> [B * N, ...] + return TensorUtils.join_dimensions(out, 0, 1) + + def _forward_in_eval(self, inputs): + """ + Do center crops during eval + """ + assert len(inputs.shape) >= 3 # must have at least (C, H, W) dimensions + inputs = inputs.permute(*range(inputs.dim()-3), inputs.dim()-2, inputs.dim()-1, inputs.dim()-3) + out = ObsUtils.center_crop(inputs, self.crop_height, self.crop_width) + out = out.permute(*range(out.dim()-3), out.dim()-1, out.dim()-3, out.dim()-2) + return out + + def _forward_out(self, inputs): + """ + Splits the outputs from shape [B * N, ...] -> [B, N, ...] and then average across N + to result in shape [B, ...] to make sure the network output is consistent with + what would have happened if there were no randomization. + """ + batch_size = (inputs.shape[0] // self.num_crops) + out = TensorUtils.reshape_dimensions(inputs, begin_axis=0, end_axis=0, + target_dims=(batch_size, self.num_crops)) + return out.mean(dim=1) + + def _visualize(self, pre_random_input, randomized_input, num_samples_to_visualize=2): + batch_size = pre_random_input.shape[0] + random_sample_inds = torch.randint(0, batch_size, size=(num_samples_to_visualize,)) + pre_random_input_np = TensorUtils.to_numpy(pre_random_input)[random_sample_inds] + randomized_input = TensorUtils.reshape_dimensions( + randomized_input, + begin_axis=0, + end_axis=0, + target_dims=(batch_size, self.num_crops) + ) # [B * N, ...] -> [B, N, ...] + randomized_input_np = TensorUtils.to_numpy(randomized_input[random_sample_inds]) + + pre_random_input_np = pre_random_input_np.transpose((0, 2, 3, 1)) # [B, C, H, W] -> [B, H, W, C] + randomized_input_np = randomized_input_np.transpose((0, 1, 3, 4, 2)) # [B, N, C, H, W] -> [B, N, H, W, C] + + visualize_image_randomizer( + pre_random_input_np, + randomized_input_np, + randomizer_name='{}'.format(str(self.__class__.__name__)) + ) + + def __repr__(self): + """Pretty print network.""" + header = '{}'.format(str(self.__class__.__name__)) + msg = header + "(input_shape={}, crop_size=[{}, {}], num_crops={})".format( + self.input_shape, self.crop_height, self.crop_width, self.num_crops) + return msg + + +class ColorRandomizer(Randomizer): + """ + Randomly sample color jitter at input, and then average across color jtters at output. + """ + def __init__( + self, + input_shape, + brightness=0.3, + contrast=0.3, + saturation=0.3, + hue=0.3, + num_samples=1, + ): + """ + Args: + input_shape (tuple, list): shape of input (not including batch dimension) + brightness (None or float or 2-tuple): How much to jitter brightness. brightness_factor is chosen uniformly + from [max(0, 1 - brightness), 1 + brightness] or the given [min, max]. Should be non negative numbers. + contrast (None or float or 2-tuple): How much to jitter contrast. contrast_factor is chosen uniformly + from [max(0, 1 - contrast), 1 + contrast] or the given [min, max]. Should be non negative numbers. + saturation (None or float or 2-tuple): How much to jitter saturation. saturation_factor is chosen uniformly + from [max(0, 1 - saturation), 1 + saturation] or the given [min, max]. Should be non negative numbers. + hue (None or float or 2-tuple): How much to jitter hue. hue_factor is chosen uniformly from [-hue, hue] or + the given [min, max]. Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5. To jitter hue, the pixel + values of the input image has to be non-negative for conversion to HSV space; thus it does not work + if you normalize your image to an interval with negative values, or use an interpolation that + generates negative values before using this function. + num_samples (int): number of random color jitters to take + """ + super(ColorRandomizer, self).__init__() + + assert len(input_shape) == 3 # (C, H, W) + + self.input_shape = input_shape + self.brightness = [max(0, 1 - brightness), 1 + brightness] if type(brightness) in {float, int} else brightness + self.contrast = [max(0, 1 - contrast), 1 + contrast] if type(contrast) in {float, int} else contrast + self.saturation = [max(0, 1 - saturation), 1 + saturation] if type(saturation) in {float, int} else saturation + self.hue = [-hue, hue] if type(hue) in {float, int} else hue + self.num_samples = num_samples + + @torch.jit.unused + def get_transform(self): + """ + Get a randomized transform to be applied on image. + + Implementation taken directly from: + + https://github.com/pytorch/vision/blob/2f40a483d73018ae6e1488a484c5927f2b309969/torchvision/transforms/transforms.py#L1053-L1085 + + Returns: + Transform: Transform which randomly adjusts brightness, contrast and + saturation in a random order. + """ + transforms = [] + + if self.brightness is not None: + brightness_factor = random.uniform(self.brightness[0], self.brightness[1]) + transforms.append(Lambda(lambda img: TVF.adjust_brightness(img, brightness_factor))) + + if self.contrast is not None: + contrast_factor = random.uniform(self.contrast[0], self.contrast[1]) + transforms.append(Lambda(lambda img: TVF.adjust_contrast(img, contrast_factor))) + + if self.saturation is not None: + saturation_factor = random.uniform(self.saturation[0], self.saturation[1]) + transforms.append(Lambda(lambda img: TVF.adjust_saturation(img, saturation_factor))) + + if self.hue is not None: + hue_factor = random.uniform(self.hue[0], self.hue[1]) + transforms.append(Lambda(lambda img: TVF.adjust_hue(img, hue_factor))) + + random.shuffle(transforms) + transform = Compose(transforms) + + return transform + + def get_batch_transform(self, N): + """ + Generates a batch transform, where each set of sample(s) along the batch (first) dimension will have the same + @N unique ColorJitter transforms applied. + + Args: + N (int): Number of ColorJitter transforms to apply per set of sample(s) along the batch (first) dimension + + Returns: + Lambda: Aggregated transform which will autoamtically apply a different ColorJitter transforms to + each sub-set of samples along batch dimension, assumed to be the FIRST dimension in the inputted tensor + Note: This function will MULTIPLY the first dimension by N + """ + return Lambda(lambda x: torch.stack([self.get_transform()(x_) for x_ in x for _ in range(N)])) + + def output_shape_in(self, input_shape=None): + # outputs are same shape as inputs + return list(input_shape) + + def output_shape_out(self, input_shape=None): + # since the forward_out operation splits [B * N, ...] -> [B, N, ...] + # and then pools to result in [B, ...], only the batch dimension changes, + # and so the other dimensions retain their shape. + return list(input_shape) + + def _forward_in(self, inputs): + """ + Samples N random color jitters for each input in the batch, and then reshapes + inputs to [B * N, ...]. + """ + assert len(inputs.shape) >= 3 # must have at least (C, H, W) dimensions + + # Make sure shape is exactly 4 + if len(inputs.shape) == 3: + inputs = torch.unsqueeze(inputs, dim=0) + + # Create lambda to aggregate all color randomizings at once + transform = self.get_batch_transform(N=self.num_samples) + + return transform(inputs) + + def _forward_out(self, inputs): + """ + Splits the outputs from shape [B * N, ...] -> [B, N, ...] and then average across N + to result in shape [B, ...] to make sure the network output is consistent with + what would have happened if there were no randomization. + """ + batch_size = (inputs.shape[0] // self.num_samples) + out = TensorUtils.reshape_dimensions(inputs, begin_axis=0, end_axis=0, + target_dims=(batch_size, self.num_samples)) + return out.mean(dim=1) + + def _visualize(self, pre_random_input, randomized_input, num_samples_to_visualize=2): + batch_size = pre_random_input.shape[0] + random_sample_inds = torch.randint(0, batch_size, size=(num_samples_to_visualize,)) + pre_random_input_np = TensorUtils.to_numpy(pre_random_input)[random_sample_inds] + randomized_input = TensorUtils.reshape_dimensions( + randomized_input, + begin_axis=0, + end_axis=0, + target_dims=(batch_size, self.num_samples) + ) # [B * N, ...] -> [B, N, ...] + randomized_input_np = TensorUtils.to_numpy(randomized_input[random_sample_inds]) + + pre_random_input_np = pre_random_input_np.transpose((0, 2, 3, 1)) # [B, C, H, W] -> [B, H, W, C] + randomized_input_np = randomized_input_np.transpose((0, 1, 3, 4, 2)) # [B, N, C, H, W] -> [B, N, H, W, C] + + visualize_image_randomizer( + pre_random_input_np, + randomized_input_np, + randomizer_name='{}'.format(str(self.__class__.__name__)) + ) + + def __repr__(self): + """Pretty print network.""" + header = '{}'.format(str(self.__class__.__name__)) + msg = header + f"(input_shape={self.input_shape}, brightness={self.brightness}, contrast={self.contrast}, " \ + f"saturation={self.saturation}, hue={self.hue}, num_samples={self.num_samples})" + return msg + + +class GaussianNoiseRandomizer(Randomizer): + """ + Randomly sample gaussian noise at input, and then average across noises at output. + """ + def __init__( + self, + input_shape, + noise_mean=0.0, + noise_std=0.3, + limits=None, + num_samples=1, + ): + """ + Args: + input_shape (tuple, list): shape of input (not including batch dimension) + noise_mean (float): Mean of noise to apply + noise_std (float): Standard deviation of noise to apply + limits (None or 2-tuple): If specified, should be the (min, max) values to clamp all noisied samples to + num_samples (int): number of random color jitters to take + """ + super(GaussianNoiseRandomizer, self).__init__() + + self.input_shape = input_shape + self.noise_mean = noise_mean + self.noise_std = noise_std + self.limits = limits + self.num_samples = num_samples + + def output_shape_in(self, input_shape=None): + # outputs are same shape as inputs + return list(input_shape) + + def output_shape_out(self, input_shape=None): + # since the forward_out operation splits [B * N, ...] -> [B, N, ...] + # and then pools to result in [B, ...], only the batch dimension changes, + # and so the other dimensions retain their shape. + return list(input_shape) + + def _forward_in(self, inputs): + """ + Samples N random gaussian noises for each input in the batch, and then reshapes + inputs to [B * N, ...]. + """ + out = TensorUtils.repeat_by_expand_at(inputs, repeats=self.num_samples, dim=0) + + # Sample noise across all samples + out = torch.rand(size=out.shape) * self.noise_std + self.noise_mean + out + + # Possibly clamp + if self.limits is not None: + out = torch.clip(out, min=self.limits[0], max=self.limits[1]) + + return out + + def _forward_out(self, inputs): + """ + Splits the outputs from shape [B * N, ...] -> [B, N, ...] and then average across N + to result in shape [B, ...] to make sure the network output is consistent with + what would have happened if there were no randomization. + """ + batch_size = (inputs.shape[0] // self.num_samples) + out = TensorUtils.reshape_dimensions(inputs, begin_axis=0, end_axis=0, + target_dims=(batch_size, self.num_samples)) + return out.mean(dim=1) + + def _visualize(self, pre_random_input, randomized_input, num_samples_to_visualize=2): + batch_size = pre_random_input.shape[0] + random_sample_inds = torch.randint(0, batch_size, size=(num_samples_to_visualize,)) + pre_random_input_np = TensorUtils.to_numpy(pre_random_input)[random_sample_inds] + randomized_input = TensorUtils.reshape_dimensions( + randomized_input, + begin_axis=0, + end_axis=0, + target_dims=(batch_size, self.num_samples) + ) # [B * N, ...] -> [B, N, ...] + randomized_input_np = TensorUtils.to_numpy(randomized_input[random_sample_inds]) + + pre_random_input_np = pre_random_input_np.transpose((0, 2, 3, 1)) # [B, C, H, W] -> [B, H, W, C] + randomized_input_np = randomized_input_np.transpose((0, 1, 3, 4, 2)) # [B, N, C, H, W] -> [B, N, H, W, C] + + visualize_image_randomizer( + pre_random_input_np, + randomized_input_np, + randomizer_name='{}'.format(str(self.__class__.__name__)) + ) + + def __repr__(self): + """Pretty print network.""" + header = '{}'.format(str(self.__class__.__name__)) + msg = header + f"(input_shape={self.input_shape}, noise_mean={self.noise_mean}, noise_std={self.noise_std}, " \ + f"limits={self.limits}, num_samples={self.num_samples})" + return msg diff --git a/robomimic/models/obs_nets.py b/robomimic/models/obs_nets.py index 6a0eef69..b3284185 100644 --- a/robomimic/models/obs_nets.py +++ b/robomimic/models/obs_nets.py @@ -22,7 +22,9 @@ import robomimic.utils.tensor_utils as TensorUtils import robomimic.utils.obs_utils as ObsUtils from robomimic.models.base_nets import Module, Sequential, MLP, RNN_Base, ResNet18Conv, SpatialSoftmax, \ - FeatureAggregator, VisualCore, Randomizer + FeatureAggregator +from robomimic.models.obs_core import VisualCore, Randomizer +from robomimic.models.transformers import PositionalEncoding, GPT_Backbone def obs_encoder_factory( @@ -851,3 +853,247 @@ def __repr__(self): msg += textwrap.indent("\n\nrnn={}".format(self.nets["rnn"]), indent) msg = header + '(' + msg + '\n)' return msg + + +class MIMO_Transformer(Module): + """ + Extension to Transformer (based on GPT architecture) to accept multiple observation + dictionaries as input and to output dictionaries of tensors. Inputs are specified as + a dictionary of observation dictionaries, with each key corresponding to an observation group. + This module utilizes @ObservationGroupEncoder to process the multiple input dictionaries and + @ObservationDecoder to generate tensor dictionaries. The default behavior + for encoding the inputs is to process visual inputs with a learned CNN and concatenating + the flat encodings with the other flat inputs. The default behavior for generating + outputs is to use a linear layer branch to produce each modality separately + (including visual outputs). + """ + def __init__( + self, + input_obs_group_shapes, + output_shapes, + transformer_embed_dim, + transformer_num_layers, + transformer_num_heads, + transformer_context_length, + transformer_emb_dropout=0.1, + transformer_attn_dropout=0.1, + transformer_block_output_dropout=0.1, + transformer_sinusoidal_embedding=False, + transformer_activation="gelu", + transformer_nn_parameter_for_timesteps=False, + encoder_kwargs=None, + ): + """ + Args: + input_obs_group_shapes (OrderedDict): a dictionary of dictionaries. + Each key in this dictionary should specify an observation group, and + the value should be an OrderedDict that maps modalities to + expected shapes. + output_shapes (OrderedDict): a dictionary that maps modality to + expected shapes for outputs. + transformer_embed_dim (int): dimension for embeddings used by transformer + transformer_num_layers (int): number of transformer blocks to stack + transformer_num_heads (int): number of attention heads for each + transformer block - must divide @transformer_embed_dim evenly. Self-attention is + computed over this many partitions of the embedding dimension separately. + transformer_context_length (int): expected length of input sequences + transformer_activation: non-linearity for input and output layers used in transformer + transformer_emb_dropout (float): dropout probability for embedding inputs in transformer + transformer_attn_dropout (float): dropout probability for attention outputs for each transformer block + transformer_block_output_dropout (float): dropout probability for final outputs for each transformer block + encoder_kwargs (dict): observation encoder config + """ + super(MIMO_Transformer, self).__init__() + + assert isinstance(input_obs_group_shapes, OrderedDict) + assert np.all([isinstance(input_obs_group_shapes[k], OrderedDict) for k in input_obs_group_shapes]) + assert isinstance(output_shapes, OrderedDict) + + self.input_obs_group_shapes = input_obs_group_shapes + self.output_shapes = output_shapes + + self.nets = nn.ModuleDict() + self.params = nn.ParameterDict() + + # Encoder for all observation groups. + self.nets["encoder"] = ObservationGroupEncoder( + observation_group_shapes=input_obs_group_shapes, + encoder_kwargs=encoder_kwargs, + feature_activation=None, + ) + + # flat encoder output dimension + transformer_input_dim = self.nets["encoder"].output_shape()[0] + + self.nets["embed_encoder"] = nn.Linear( + transformer_input_dim, transformer_embed_dim + ) + + max_timestep = transformer_context_length + + if transformer_sinusoidal_embedding: + self.nets["embed_timestep"] = PositionalEncoding(transformer_embed_dim) + elif transformer_nn_parameter_for_timesteps: + assert ( + not transformer_sinusoidal_embedding + ), "nn.Parameter only works with learned embeddings" + self.params["embed_timestep"] = nn.Parameter( + torch.zeros(1, max_timestep, transformer_embed_dim) + ) + else: + self.nets["embed_timestep"] = nn.Embedding(max_timestep, transformer_embed_dim) + + # layer norm for embeddings + self.nets["embed_ln"] = nn.LayerNorm(transformer_embed_dim) + + # dropout for input embeddings + self.nets["embed_drop"] = nn.Dropout(transformer_emb_dropout) + + # GPT transformer + self.nets["transformer"] = GPT_Backbone( + embed_dim=transformer_embed_dim, + num_layers=transformer_num_layers, + num_heads=transformer_num_heads, + context_length=transformer_context_length, + attn_dropout=transformer_attn_dropout, + block_output_dropout=transformer_block_output_dropout, + activation=transformer_activation, + ) + + # decoder for output modalities + self.nets["decoder"] = ObservationDecoder( + decode_shapes=self.output_shapes, + input_feat_dim=transformer_embed_dim, + ) + + self.transformer_context_length = transformer_context_length + self.transformer_embed_dim = transformer_embed_dim + self.transformer_sinusoidal_embedding = transformer_sinusoidal_embedding + self.transformer_nn_parameter_for_timesteps = transformer_nn_parameter_for_timesteps + + def output_shape(self, input_shape=None): + """ + Returns output shape for this module, which is a dictionary instead + of a list since outputs are dictionaries. + """ + return { k : list(self.output_shapes[k]) for k in self.output_shapes } + + def embed_timesteps(self, embeddings): + """ + Computes timestep-based embeddings (aka positional embeddings) to add to embeddings. + Args: + embeddings (torch.Tensor): embeddings prior to positional embeddings are computed + Returns: + time_embeddings (torch.Tensor): positional embeddings to add to embeddings + """ + timesteps = ( + torch.arange( + 0, + embeddings.shape[1], + dtype=embeddings.dtype, + device=embeddings.device, + ) + .unsqueeze(0) + .repeat(embeddings.shape[0], 1) + ) + assert (timesteps >= 0.0).all(), "timesteps must be positive!" + if self.transformer_sinusoidal_embedding: + assert torch.is_floating_point(timesteps), timesteps.dtype + else: + timesteps = timesteps.long() + + if self.transformer_nn_parameter_for_timesteps: + time_embeddings = self.params["embed_timestep"] + else: + time_embeddings = self.nets["embed_timestep"]( + timesteps + ) # these are NOT fed into transformer, only added to the inputs. + # compute how many modalities were combined into embeddings, replicate time embeddings that many times + num_replicates = embeddings.shape[-1] // self.transformer_embed_dim + time_embeddings = torch.cat([time_embeddings for _ in range(num_replicates)], -1) + assert ( + embeddings.shape == time_embeddings.shape + ), f"{embeddings.shape}, {time_embeddings.shape}" + return time_embeddings + + def input_embedding( + self, + inputs, + ): + """ + Process encoded observations into embeddings to pass to transformer, + Adds timestep-based embeddings (aka positional embeddings) to inputs. + Args: + inputs (torch.Tensor): outputs from observation encoder + Returns: + embeddings (torch.Tensor): input embeddings to pass to transformer backbone. + """ + embeddings = self.nets["embed_encoder"](inputs) + time_embeddings = self.embed_timesteps(embeddings) + embeddings = embeddings + time_embeddings + embeddings = self.nets["embed_ln"](embeddings) + embeddings = self.nets["embed_drop"](embeddings) + + return embeddings + + + def forward(self, **inputs): + """ + Process each set of inputs in its own observation group. + Args: + inputs (dict): a dictionary of dictionaries with one dictionary per + observation group. Each observation group's dictionary should map + modality to torch.Tensor batches. Should be consistent with + @self.input_obs_group_shapes. First two leading dimensions should + be batch and time [B, T, ...] for each tensor. + Returns: + outputs (dict): dictionary of output torch.Tensors, that corresponds + to @self.output_shapes. Leading dimensions will be batch and time [B, T, ...] + for each tensor. + """ + for obs_group in self.input_obs_group_shapes: + for k in self.input_obs_group_shapes[obs_group]: + # first two dimensions should be [B, T] for inputs + if inputs[obs_group][k] is None: + continue + assert inputs[obs_group][k].ndim - 2 == len(self.input_obs_group_shapes[obs_group][k]) + + inputs = inputs.copy() + + transformer_encoder_outputs = None + transformer_inputs = TensorUtils.time_distributed( + inputs, self.nets["encoder"], inputs_as_kwargs=True + ) + assert transformer_inputs.ndim == 3 # [B, T, D] + + if transformer_encoder_outputs is None: + transformer_embeddings = self.input_embedding(transformer_inputs) + # pass encoded sequences through transformer + transformer_encoder_outputs = self.nets["transformer"].forward(transformer_embeddings) + + transformer_outputs = transformer_encoder_outputs + # apply decoder to each timestep of sequence to get a dictionary of outputs + transformer_outputs = TensorUtils.time_distributed( + transformer_outputs, self.nets["decoder"] + ) + transformer_outputs["transformer_encoder_outputs"] = transformer_encoder_outputs + return transformer_outputs + + def _to_string(self): + """ + Subclasses should override this method to print out info about network / policy. + """ + return '' + + def __repr__(self): + """Pretty print network.""" + header = '{}'.format(str(self.__class__.__name__)) + msg = '' + indent = ' ' * 4 + if self._to_string() != '': + msg += textwrap.indent("\n" + self._to_string() + "\n", indent) + msg += textwrap.indent("\nencoder={}".format(self.nets["encoder"]), indent) + msg += textwrap.indent("\n\ntransformer={}".format(self.nets["transformer"]), indent) + msg += textwrap.indent("\n\ndecoder={}".format(self.nets["decoder"]), indent) + msg = header + '(' + msg + '\n)' + return msg \ No newline at end of file diff --git a/robomimic/models/policy_nets.py b/robomimic/models/policy_nets.py index b9229e69..8dba1d93 100644 --- a/robomimic/models/policy_nets.py +++ b/robomimic/models/policy_nets.py @@ -17,7 +17,8 @@ import robomimic.utils.tensor_utils as TensorUtils from robomimic.models.base_nets import Module -from robomimic.models.obs_nets import MIMO_MLP, RNN_MIMO_MLP +from robomimic.models.transformers import GPT_Backbone +from robomimic.models.obs_nets import MIMO_MLP, RNN_MIMO_MLP, MIMO_Transformer, ObservationDecoder from robomimic.models.vae_nets import VAE from robomimic.models.distributions import TanhWrappedDistribution @@ -974,6 +975,364 @@ def _to_string(self): return msg +class TransformerActorNetwork(MIMO_Transformer): + """ + An Transformer policy network that predicts actions from observation sequences (assumed to be frame stacked + from previous observations) and possible from previous actions as well (in an autoregressive manner). + """ + def __init__( + self, + obs_shapes, + ac_dim, + transformer_embed_dim, + transformer_num_layers, + transformer_num_heads, + transformer_context_length, + transformer_emb_dropout=0.1, + transformer_attn_dropout=0.1, + transformer_block_output_dropout=0.1, + transformer_sinusoidal_embedding=False, + transformer_activation="gelu", + transformer_nn_parameter_for_timesteps=False, + goal_shapes=None, + encoder_kwargs=None, + ): + """ + Args: + + obs_shapes (OrderedDict): a dictionary that maps modality to + expected shapes for observations. + + ac_dim (int): dimension of action space. + + transformer_embed_dim (int): dimension for embeddings used by transformer + + transformer_num_layers (int): number of transformer blocks to stack + + transformer_num_heads (int): number of attention heads for each + transformer block - must divide @transformer_embed_dim evenly. Self-attention is + computed over this many partitions of the embedding dimension separately. + + transformer_context_length (int): expected length of input sequences + + transformer_embedding_dropout (float): dropout probability for embedding inputs in transformer + + transformer_attn_dropout (float): dropout probability for attention outputs for each transformer block + + transformer_block_output_dropout (float): dropout probability for final outputs for each transformer block + + goal_shapes (OrderedDict): a dictionary that maps modality to + expected shapes for goal observations. + + encoder_kwargs (dict or None): If None, results in default encoder_kwargs being applied. Otherwise, should + be nested dictionary containing relevant per-modality information for encoder networks. + Should be of form: + + obs_modality1: dict + feature_dimension: int + core_class: str + core_kwargs: dict + ... + ... + obs_randomizer_class: str + obs_randomizer_kwargs: dict + ... + ... + obs_modality2: dict + ... + """ + self.ac_dim = ac_dim + + assert isinstance(obs_shapes, OrderedDict) + self.obs_shapes = obs_shapes + + self.transformer_nn_parameter_for_timesteps = transformer_nn_parameter_for_timesteps + + # set up different observation groups for @RNN_MIMO_MLP + observation_group_shapes = OrderedDict() + observation_group_shapes["obs"] = OrderedDict(self.obs_shapes) + + self._is_goal_conditioned = False + if goal_shapes is not None and len(goal_shapes) > 0: + assert isinstance(goal_shapes, OrderedDict) + self._is_goal_conditioned = True + self.goal_shapes = OrderedDict(goal_shapes) + observation_group_shapes["goal"] = OrderedDict(self.goal_shapes) + else: + self.goal_shapes = OrderedDict() + + output_shapes = self._get_output_shapes() + super(TransformerActorNetwork, self).__init__( + input_obs_group_shapes=observation_group_shapes, + output_shapes=output_shapes, + transformer_embed_dim=transformer_embed_dim, + transformer_num_layers=transformer_num_layers, + transformer_num_heads=transformer_num_heads, + transformer_context_length=transformer_context_length, + transformer_emb_dropout=transformer_emb_dropout, + transformer_attn_dropout=transformer_attn_dropout, + transformer_block_output_dropout=transformer_block_output_dropout, + transformer_sinusoidal_embedding=transformer_sinusoidal_embedding, + transformer_activation=transformer_activation, + transformer_nn_parameter_for_timesteps=transformer_nn_parameter_for_timesteps, + + encoder_kwargs=encoder_kwargs, + ) + + def _get_output_shapes(self): + """ + Allow subclasses to re-define outputs from @MIMO_Transformer, since we won't + always directly predict actions, but may instead predict the parameters + of a action distribution. + """ + output_shapes = OrderedDict(action=(self.ac_dim,)) + return output_shapes + + def output_shape(self, input_shape): + # note: @input_shape should be dictionary (key: mod) + # infers temporal dimension from input shape + mod = list(self.obs_shapes.keys())[0] + T = input_shape[mod][0] + TensorUtils.assert_size_at_dim(input_shape, size=T, dim=0, + msg="TransformerActorNetwork: input_shape inconsistent in temporal dimension") + return [T, self.ac_dim] + + def forward(self, obs_dict, actions=None, goal_dict=None): + """ + Forward a sequence of inputs through the Transformer. + Args: + obs_dict (dict): batch of observations - each tensor in the dictionary + should have leading dimensions batch and time [B, T, ...] + actions (torch.Tensor): batch of actions of shape [B, T, D] + goal_dict (dict): if not None, batch of goal observations + Returns: + outputs (torch.Tensor or dict): contains predicted action sequence, or dictionary + with predicted action sequence and predicted observation sequences + """ + if self._is_goal_conditioned: + assert goal_dict is not None + # repeat the goal observation in time to match dimension with obs_dict + mod = list(obs_dict.keys())[0] + goal_dict = TensorUtils.unsqueeze_expand_at(goal_dict, size=obs_dict[mod].shape[1], dim=1) + + forward_kwargs = dict(obs=obs_dict, goal=goal_dict) + outputs = super(TransformerActorNetwork, self).forward(**forward_kwargs) + + # apply tanh squashing to ensure actions are in [-1, 1] + outputs["action"] = torch.tanh(outputs["action"]) + + return outputs["action"] # only action sequences + + def _to_string(self): + """Info to pretty print.""" + return "action_dim={}".format(self.ac_dim) + + +class TransformerGMMActorNetwork(TransformerActorNetwork): + """ + A Transformer GMM policy network that predicts sequences of action distributions from observation + sequences (assumed to be frame stacked from previous observations). + """ + def __init__( + self, + obs_shapes, + ac_dim, + transformer_embed_dim, + transformer_num_layers, + transformer_num_heads, + transformer_context_length, + transformer_emb_dropout=0.1, + transformer_attn_dropout=0.1, + transformer_block_output_dropout=0.1, + transformer_sinusoidal_embedding=False, + transformer_activation="gelu", + transformer_nn_parameter_for_timesteps=False, + num_modes=5, + min_std=0.01, + std_activation="softplus", + low_noise_eval=True, + use_tanh=False, + goal_shapes=None, + encoder_kwargs=None, + ): + """ + Args: + + obs_shapes (OrderedDict): a dictionary that maps modality to + expected shapes for observations. + + ac_dim (int): dimension of action space. + + transformer_embed_dim (int): dimension for embeddings used by transformer + + transformer_num_layers (int): number of transformer blocks to stack + + transformer_num_heads (int): number of attention heads for each + transformer block - must divide @transformer_embed_dim evenly. Self-attention is + computed over this many partitions of the embedding dimension separately. + + transformer_context_length (int): expected length of input sequences + + transformer_embedding_dropout (float): dropout probability for embedding inputs in transformer + + transformer_attn_dropout (float): dropout probability for attention outputs for each transformer block + + transformer_block_output_dropout (float): dropout probability for final outputs for each transformer block + + num_modes (int): number of GMM modes + + min_std (float): minimum std output from network + + std_activation (None or str): type of activation to use for std deviation. Options are: + + `'softplus'`: Softplus activation applied + + `'exp'`: Exp applied; this corresponds to network output being interpreted as log_std instead of std + + low_noise_eval (float): if True, model will sample from GMM with low std, so that + one of the GMM modes will be sampled (approximately) + + use_tanh (bool): if True, use a tanh-Gaussian distribution + + encoder_kwargs (dict or None): If None, results in default encoder_kwargs being applied. Otherwise, should + be nested dictionary containing relevant per-modality information for encoder networks. + Should be of form: + + obs_modality1: dict + feature_dimension: int + core_class: str + core_kwargs: dict + ... + ... + obs_randomizer_class: str + obs_randomizer_kwargs: dict + ... + ... + obs_modality2: dict + ... + """ + + # parameters specific to GMM actor + self.num_modes = num_modes + self.min_std = min_std + self.low_noise_eval = low_noise_eval + self.use_tanh = use_tanh + + # Define activations to use + self.activations = { + "softplus": F.softplus, + "exp": torch.exp, + } + assert std_activation in self.activations, \ + "std_activation must be one of: {}; instead got: {}".format(self.activations.keys(), std_activation) + self.std_activation = std_activation + + super(TransformerGMMActorNetwork, self).__init__( + obs_shapes=obs_shapes, + ac_dim=ac_dim, + transformer_embed_dim=transformer_embed_dim, + transformer_num_layers=transformer_num_layers, + transformer_num_heads=transformer_num_heads, + transformer_context_length=transformer_context_length, + transformer_emb_dropout=transformer_emb_dropout, + transformer_attn_dropout=transformer_attn_dropout, + transformer_block_output_dropout=transformer_block_output_dropout, + transformer_sinusoidal_embedding=transformer_sinusoidal_embedding, + transformer_activation=transformer_activation, + transformer_nn_parameter_for_timesteps=transformer_nn_parameter_for_timesteps, + encoder_kwargs=encoder_kwargs, + goal_shapes=goal_shapes, + ) + + def _get_output_shapes(self): + """ + Tells @MIMO_Transformer superclass about the output dictionary that should be generated + at the last layer. Network outputs parameters of GMM distribution. + """ + return OrderedDict( + mean=(self.num_modes, self.ac_dim), + scale=(self.num_modes, self.ac_dim), + logits=(self.num_modes,), + ) + + def forward_train(self, obs_dict, actions=None, goal_dict=None, low_noise_eval=None): + """ + Return full GMM distribution, which is useful for computing + quantities necessary at train-time, like log-likelihood, KL + divergence, etc. + Args: + obs_dict (dict): batch of observations + actions (torch.Tensor): batch of actions + goal_dict (dict): if not None, batch of goal observations + Returns: + dists (Distribution): sequence of GMM distributions over the timesteps + """ + if self._is_goal_conditioned: + assert goal_dict is not None + # repeat the goal observation in time to match dimension with obs_dict + mod = list(obs_dict.keys())[0] + goal_dict = TensorUtils.unsqueeze_expand_at(goal_dict, size=obs_dict[mod].shape[1], dim=1) + + forward_kwargs = dict(obs=obs_dict, goal=goal_dict) + + outputs = MIMO_Transformer.forward(self, **forward_kwargs) + + means = outputs["mean"] + scales = outputs["scale"] + logits = outputs["logits"] + + # apply tanh squashing to mean if not using tanh-GMM to ensure means are in [-1, 1] + if not self.use_tanh: + means = torch.tanh(means) + + if low_noise_eval is None: + low_noise_eval = self.low_noise_eval + if low_noise_eval and (not self.training): + # low-noise for all Gaussian dists + scales = torch.ones_like(means) * 1e-4 + else: + # post-process the scale accordingly + scales = self.activations[self.std_activation](scales) + self.min_std + + # mixture components - make sure that `batch_shape` for the distribution is equal + # to (batch_size, timesteps, num_modes) since MixtureSameFamily expects this shape + component_distribution = D.Normal(loc=means, scale=scales) + component_distribution = D.Independent(component_distribution, 1) # shift action dim to event shape + + # unnormalized logits to categorical distribution for mixing the modes + mixture_distribution = D.Categorical(logits=logits) + + dists = D.MixtureSameFamily( + mixture_distribution=mixture_distribution, + component_distribution=component_distribution, + ) + + if self.use_tanh: + # Wrap distribution with Tanh + dists = TanhWrappedDistribution(base_dist=dists, scale=1.) + + return dists + + def forward(self, obs_dict, actions=None, goal_dict=None): + """ + Samples actions from the policy distribution. + Args: + obs_dict (dict): batch of observations + actions (torch.Tensor): batch of actions + goal_dict (dict): if not None, batch of goal observations + Returns: + action (torch.Tensor): batch of actions from policy distribution + """ + out = self.forward_train(obs_dict=obs_dict, actions=actions, goal_dict=goal_dict) + return out.sample() + + def _to_string(self): + """Info to pretty print.""" + msg = "action_dim={}, std_activation={}, low_noise_eval={}, num_nodes={}, min_std={}".format( + self.ac_dim, self.std_activation, self.low_noise_eval, self.num_modes, self.min_std) + return msg + + class VAEActor(Module): """ A VAE that models a distribution of actions conditioned on observations. diff --git a/robomimic/models/transformers.py b/robomimic/models/transformers.py new file mode 100644 index 00000000..309bff30 --- /dev/null +++ b/robomimic/models/transformers.py @@ -0,0 +1,426 @@ +""" +Implementation of transformers, mostly based on Andrej's minGPT model. +See https://github.com/karpathy/minGPT/blob/master/mingpt/model.py +for more details. +""" + +import math +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from robomimic.models.base_nets import Module +import robomimic.utils.tensor_utils as TensorUtils +import robomimic.utils.torch_utils as TorchUtils + +class GEGLU(nn.Module): + """ + References: + Shazeer et al., "GLU Variants Improve Transformer," 2020. + https://arxiv.org/abs/2002.05202 + Implementation: https://github.com/pfnet-research/deep-table/blob/237c8be8a405349ce6ab78075234c60d9bfe60b7/deep_table/nn/layers/activation.py + """ + + def geglu(self, x): + assert x.shape[-1] % 2 == 0 + a, b = x.chunk(2, dim=-1) + return a * F.gelu(b) + + def forward(self, x): + return self.geglu(x) + + +class PositionalEncoding(nn.Module): + """ + Taken from https://pytorch.org/tutorials/beginner/transformer_tutorial.html. + """ + + def __init__(self, embed_dim): + """ + Standard sinusoidal positional encoding scheme in transformers. + + Positional encoding of the k'th position in the sequence is given by: + p(k, 2i) = sin(k/n^(i/d)) + p(k, 2i+1) = sin(k/n^(i/d)) + + n: set to 10K in original Transformer paper + d: the embedding dimension + i: positions along the projected embedding space (ranges from 0 to d/2) + + Args: + embed_dim: The number of dimensions to project the timesteps into. + """ + super().__init__() + self.embed_dim = embed_dim + + def forward(self, x): + """ + Input timestep of shape BxT + """ + position = x + + # computing 1/n^(i/d) in log space and then exponentiating and fixing the shape + div_term = ( + torch.exp( + torch.arange(0, self.embed_dim, 2, device=x.device) + * (-math.log(10000.0) / self.embed_dim) + ) + .unsqueeze(0) + .unsqueeze(0) + .repeat(x.shape[0], x.shape[1], 1) + ) + pe = torch.zeros((x.shape[0], x.shape[1], self.embed_dim), device=x.device) + pe[:, :, 0::2] = torch.sin(position.unsqueeze(-1) * div_term) + pe[:, :, 1::2] = torch.cos(position.unsqueeze(-1) * div_term) + return pe.detach() + + +class CausalSelfAttention(Module): + def __init__( + self, + embed_dim, + num_heads, + context_length, + attn_dropout=0.1, + output_dropout=0.1, + ): + """ + Multi-head masked self-attention layer + projection (MLP layer). + + For normal self-attention (@num_heads = 1), every single input in the sequence is + mapped to a key, query, and value embedding of size @embed_dim. For each input, + its query vector is compared (using dot-product) with all other key vectors in the + sequence, and softmax normalized to compute an attention over all members of the + sequence. This is used to take a linear combination of corresponding value embeddings. + + The @num_heads argument is for multi-head attention, where the self-attention operation above + is performed in parallel over equal size partitions of the @embed_dim, allowing for different + portions of the embedding dimension to model different kinds of attention. The attention + output for each head is concatenated together. + + Finally, we use a causal mask here to ensure that each output only depends on inputs that come + before it. + + Args: + embed_dim (int): dimension of embeddings to use for keys, queries, and values + used in self-attention + + num_heads (int): number of attention heads - must divide @embed_dim evenly. Self-attention is + computed over this many partitions of the embedding dimension separately. + + context_length (int): expected length of input sequences + + attn_dropout (float): dropout probability for attention outputs + + output_dropout (float): dropout probability for final outputs + """ + super(CausalSelfAttention, self).__init__() + + assert ( + embed_dim % num_heads == 0 + ), "num_heads: {} does not divide embed_dim: {} exactly".format(num_heads, embed_dim) + + self.embed_dim = embed_dim + self.num_heads = num_heads + self.context_length = context_length + self.attn_dropout = attn_dropout + self.output_dropout = output_dropout + self.nets = nn.ModuleDict() + + # projection layers for key, query, value, across all attention heads + self.nets["qkv"] = nn.Linear(self.embed_dim, 3 * self.embed_dim, bias=False) + + # dropout layers + self.nets["attn_dropout"] = nn.Dropout(self.attn_dropout) + self.nets["output_dropout"] = nn.Dropout(self.output_dropout) + + # output layer + self.nets["output"] = nn.Linear(self.embed_dim, self.embed_dim) + + # causal mask (ensures attention is only over previous inputs) - just a lower triangular matrix of 1s + mask = torch.tril(torch.ones(context_length, context_length)).view( + 1, 1, context_length, context_length + ) + self.register_buffer("mask", mask) + + def forward(self, x): + """ + Forward pass through Self-Attention block. + Input should be shape (B, T, D) where B is batch size, T is seq length (@self.context_length), and + D is input dimension (@self.embed_dim). + """ + + # enforce shape consistency + assert len(x.shape) == 3 + B, T, D = x.shape + assert ( + T <= self.context_length + ), "self-attention module can only handle sequences up to {} in length but got length {}".format( + self.context_length, T + ) + assert D == self.embed_dim + NH = self.num_heads # number of attention heads + DH = D // NH # embed dimension for each attention head + + # compute key, query, and value vectors for each member of sequence, and split across attention heads + qkv = self.nets["qkv"](x) + q, k, v = torch.chunk(qkv, 3, dim=-1) + k = k.view(B, T, NH, DH).transpose(1, 2) # [B, NH, T, DH] + q = q.view(B, T, NH, DH).transpose(1, 2) # [B, NH, T, DH] + v = v.view(B, T, NH, DH).transpose(1, 2) # [B, NH, T, DH] + + # causal self-attention mechanism + + # batched matrix multiplication between queries and keys to get all pair-wise dot-products. + # We broadcast across batch and attention heads and get pair-wise dot-products between all pairs of timesteps + # [B, NH, T, DH] x [B, NH, DH, T] -> [B, NH, T, T] + att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1))) + + # use mask to replace entries in dot products with negative inf to ensure they don't contribute to softmax, + # then take softmax over last dimension to end up with attention score for each member of sequence. + # Note the use of [:T, :T] - this makes it so we can handle sequences less than @self.context_length in length. + att = att.masked_fill(self.mask[..., :T, :T] == 0, float("-inf")) + att = F.softmax( + att, dim=-1 + ) # shape [B, NH, T, T], last dimension has score over all T for each sequence member + + # dropout on attention + att = self.nets["attn_dropout"](att) + + # take weighted sum of value vectors over whole sequence according to attention, with batched matrix multiplication + # [B, NH, T, T] x [B, NH, T, DH] -> [B, NH, T, DH] + y = att @ v + # reshape [B, NH, T, DH] -> [B, T, NH, DH] -> [B, T, NH * DH] = [B, T, D] + y = y.transpose(1, 2).contiguous().view(B, T, D) + + # pass through output layer + dropout + y = self.nets["output"](y) + y = self.nets["output_dropout"](y) + return y + + def output_shape(self, input_shape=None): + """ + Function to compute output shape from inputs to this module. + + Args: + input_shape (iterable of int): shape of input. Does not include batch dimension. + Some modules may not need this argument, if their output does not depend + on the size of the input, or if they assume fixed size input. + + Returns: + out_shape ([int]): list of integers corresponding to output shape + """ + + # this module doesn't modify the size of the input, it goes from (B, T, D) -> (B, T, D) + return list(input_shape) + + +class SelfAttentionBlock(Module): + """ + A single Transformer Block, that can be chained together repeatedly. + It consists of a @CausalSelfAttention module and a small MLP, along with + layer normalization and residual connections on each input. + """ + + def __init__( + self, + embed_dim, + num_heads, + context_length, + attn_dropout=0.1, + output_dropout=0.1, + activation=nn.GELU(), + ): + """ + Args: + embed_dim (int): dimension of embeddings to use for keys, queries, and values + used in self-attention + + num_heads (int): number of attention heads - must divide @embed_dim evenly. Self-attention is + computed over this many partitions of the embedding dimension separately. + + context_length (int): expected length of input sequences + + attn_dropout (float): dropout probability for attention outputs + + output_dropout (float): dropout probability for final outputs + + activation (str): string denoting the activation function to use in each transformer block + """ + super(SelfAttentionBlock, self).__init__() + + self.embed_dim = embed_dim + self.num_heads = num_heads + self.context_length = context_length + self.attn_dropout = attn_dropout + self.output_dropout = output_dropout + self.nets = nn.ModuleDict() + + # self-attention block + self.nets["attention"] = CausalSelfAttention( + embed_dim=embed_dim, + num_heads=num_heads, + context_length=context_length, + attn_dropout=attn_dropout, + output_dropout=output_dropout, + ) + + if type(activation) == GEGLU: + mult = 2 + else: + mult = 1 + + # small 2-layer MLP + self.nets["mlp"] = nn.Sequential( + nn.Linear(embed_dim, 4 * embed_dim * mult), + activation, + nn.Linear(4 * embed_dim, embed_dim), + nn.Dropout(output_dropout) + ) + + # layer normalization for inputs to self-attention module and MLP + self.nets["ln1"] = nn.LayerNorm(embed_dim) + self.nets["ln2"] = nn.LayerNorm(embed_dim) + + def forward(self, x): + """ + Forward pass - chain self-attention + MLP blocks, with residual connections and layer norms. + """ + x = x + self.nets["attention"](self.nets["ln1"](x)) + x = x + self.nets["mlp"](self.nets["ln2"](x)) + return x + + def output_shape(self, input_shape=None): + """ + Function to compute output shape from inputs to this module. + + Args: + input_shape (iterable of int): shape of input. Does not include batch dimension. + Some modules may not need this argument, if their output does not depend + on the size of the input, or if they assume fixed size input. + + Returns: + out_shape ([int]): list of integers corresponding to output shape + """ + + # this module doesn't modify the size of the input, it goes from (B, T, D) -> (B, T, D) + return list(input_shape) + + +class GPT_Backbone(Module): + """the GPT model, with a context size of block_size""" + + def __init__( + self, + embed_dim, + context_length, + attn_dropout=0.1, + block_output_dropout=0.1, + num_layers=6, + num_heads=8, + activation="gelu", + ): + """ + Args: + embed_dim (int): dimension of embeddings to use for keys, queries, and values + used in self-attention + + context_length (int): expected length of input sequences + + attn_dropout (float): dropout probability for attention outputs for each transformer block + + block_output_dropout (float): dropout probability for final outputs for each transformer block + + num_layers (int): number of transformer blocks to stack + + num_heads (int): number of attention heads - must divide @embed_dim evenly. Self-attention is + computed over this many partitions of the embedding dimension separately. + + activation (str): string denoting the activation function to use in each transformer block + + """ + super(GPT_Backbone, self).__init__() + + self.embed_dim = embed_dim + self.num_layers = num_layers + self.num_heads = num_heads + self.context_length = context_length + self.attn_dropout = attn_dropout + self.block_output_dropout = block_output_dropout + + if activation == "gelu": + self.activation = nn.GELU() + elif activation == "geglu": + self.activation = GEGLU() + + # create networks + self._create_networks() + + # initialize weights + self.apply(self._init_weights) + + print( + "Created {} model with number of parameters: {}".format( + self.__class__.__name__, sum(p.numel() for p in self.parameters()) + ) + ) + + def _create_networks(self): + """ + Helper function to create networks. + """ + self.nets = nn.ModuleDict() + + # transformer - cascaded transformer blocks + self.nets["transformer"] = nn.Sequential( + *[ + SelfAttentionBlock( + embed_dim=self.embed_dim, + num_heads=self.num_heads, + context_length=self.context_length, + attn_dropout=self.attn_dropout, + output_dropout=self.block_output_dropout, + activation=self.activation, + ) + for _ in range(self.num_layers) + ] + ) + + # decoder head + self.nets["output_ln"] = nn.LayerNorm(self.embed_dim) + + def _init_weights(self, module): + """ + Weight initializer. + """ + if isinstance(module, (nn.Linear, nn.Embedding)): + module.weight.data.normal_(mean=0.0, std=0.02) + if isinstance(module, nn.Linear) and module.bias is not None: + module.bias.data.zero_() + elif isinstance(module, nn.LayerNorm): + module.bias.data.zero_() + module.weight.data.fill_(1.0) + + def output_shape(self, input_shape=None): + """ + Function to compute output shape from inputs to this module. + + Args: + input_shape (iterable of int): shape of input. Does not include batch dimension. + Some modules may not need this argument, if their output does not depend + on the size of the input, or if they assume fixed size input. + + Returns: + out_shape ([int]): list of integers corresponding to output shape + """ + + # this module takes inputs (B, T, @self.input_dim) and produces outputs (B, T, @self.output_dim) + return input_shape[:-1] + [self.output_dim] + + def forward(self, inputs): + assert inputs.shape[1:] == (self.context_length, self.embed_dim), inputs.shape + x = self.nets["transformer"](inputs) + transformer_output = self.nets["output_ln"](x) + return transformer_output \ No newline at end of file diff --git a/robomimic/scripts/conversion/convert_d4rl.py b/robomimic/scripts/conversion/convert_d4rl.py index 733547a9..99fc1d93 100644 --- a/robomimic/scripts/conversion/convert_d4rl.py +++ b/robomimic/scripts/conversion/convert_d4rl.py @@ -20,10 +20,10 @@ Example usage: # downloads to default path at robomimic/datasets/d4rl - python convert_d4rl.py --env walker2d-medium-expert-v0 + python convert_d4rl.py --env walker2d-medium-expert-v2 # download to custom path - python convert_d4rl.py --env walker2d-medium-expert-v0 --folder /path/to/folder + python convert_d4rl.py --env walker2d-medium-expert-v2 --folder /path/to/folder """ import os diff --git a/robomimic/scripts/conversion/convert_robosuite.py b/robomimic/scripts/conversion/convert_robosuite.py index c85fb3db..88258698 100644 --- a/robomimic/scripts/conversion/convert_robosuite.py +++ b/robomimic/scripts/conversion/convert_robosuite.py @@ -41,6 +41,7 @@ env_meta = dict( type=EB.EnvType.ROBOSUITE_TYPE, env_name=env_name, + env_version=f["data"].attrs["repository_version"], env_kwargs=env_info, ) if "env_args" in f["data"].attrs: diff --git a/robomimic/scripts/dataset_states_to_obs.py b/robomimic/scripts/dataset_states_to_obs.py index 008d955a..6665ab17 100644 --- a/robomimic/scripts/dataset_states_to_obs.py +++ b/robomimic/scripts/dataset_states_to_obs.py @@ -33,6 +33,12 @@ python dataset_states_to_obs.py --dataset /path/to/demo.hdf5 --output_name image.hdf5 \ --done_mode 2 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 + # (space saving option) extract 84x84 image observations with compression and without + # extracting next obs (not needed for pure imitation learning algos) + python dataset_states_to_obs.py --dataset /path/to/demo.hdf5 --output_name image.hdf5 \ + --done_mode 2 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 \ + --compress --exclude-next-obs + # use dense rewards, and only annotate the end of trajectories with done signal python dataset_states_to_obs.py --dataset /path/to/demo.hdf5 --output_name image_dense_done_1.hdf5 \ --done_mode 1 --dense --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 @@ -210,8 +216,15 @@ def dataset_states_to_obs(args): ep_data_grp.create_dataset("rewards", data=np.array(traj["rewards"])) ep_data_grp.create_dataset("dones", data=np.array(traj["dones"])) for k in traj["obs"]: - ep_data_grp.create_dataset("obs/{}".format(k), data=np.array(traj["obs"][k])) - ep_data_grp.create_dataset("next_obs/{}".format(k), data=np.array(traj["next_obs"][k])) + if args.compress: + ep_data_grp.create_dataset("obs/{}".format(k), data=np.array(traj["obs"][k]), compression="gzip") + else: + ep_data_grp.create_dataset("obs/{}".format(k), data=np.array(traj["obs"][k])) + if not args.exclude_next_obs: + if args.compress: + ep_data_grp.create_dataset("next_obs/{}".format(k), data=np.array(traj["next_obs"][k]), compression="gzip") + else: + ep_data_grp.create_dataset("next_obs/{}".format(k), data=np.array(traj["next_obs"][k])) # episode metadata if is_robosuite_env: @@ -315,5 +328,19 @@ def dataset_states_to_obs(args): help="(optional) copy dones from source file instead of inferring them", ) + # flag to exclude next obs in dataset + parser.add_argument( + "--exclude-next-obs", + action='store_true', + help="(optional) exclude next obs in dataset", + ) + + # flag to compress observations with gzip option in hdf5 + parser.add_argument( + "--compress", + action='store_true', + help="(optional) compress observations with gzip option in hdf5", + ) + args = parser.parse_args() dataset_states_to_obs(args) diff --git a/robomimic/scripts/download_datasets.py b/robomimic/scripts/download_datasets.py index 3ad45965..caf3a280 100644 --- a/robomimic/scripts/download_datasets.py +++ b/robomimic/scripts/download_datasets.py @@ -93,7 +93,7 @@ type=str, nargs='+', default=["low_dim"], - help="hdf5 types to download datasets for (e.g. raw, low_dim, image). Defaults to low_dim. Pass 'all' \ + help="hdf5 types to download datasets for (e.g. raw, low_dim, image). Defaults to raw. Pass 'all' \ to download datasets for all available hdf5 types per task and dataset, or directly specify the list\ of hdf5 types.", ) @@ -144,6 +144,13 @@ download_dir = os.path.abspath(os.path.join(default_base_dir, task, dataset_type)) print("\nDownloading dataset:\n task: {}\n dataset type: {}\n hdf5 type: {}\n download path: {}" .format(task, dataset_type, hdf5_type, download_dir)) + url = DATASET_REGISTRY[task][dataset_type][hdf5_type]["url"] + if url is None: + print( + "Skipping {}-{}-{}, no url for dataset exists.".format(task, dataset_type, hdf5_type) + + " Create this dataset locally by running the appropriate command from robomimic/scripts/extract_obs_from_raw_datasets.sh." + ) + continue if args.dry_run: print("\ndry run: skip download") else: diff --git a/robomimic/scripts/extract_obs_from_raw_datasets.sh b/robomimic/scripts/extract_obs_from_raw_datasets.sh index b3070561..00fc78f8 100644 --- a/robomimic/scripts/extract_obs_from_raw_datasets.sh +++ b/robomimic/scripts/extract_obs_from_raw_datasets.sh @@ -15,35 +15,35 @@ echo "Using base dataset directory: $BASE_DATASET_DIR" # lift - mg, sparse python dataset_states_to_obs.py --done_mode 0 \ ---dataset $BASE_DATASET_DIR/lift/mg/demo.hdf5 \ ---output_name low_dim_sparse.hdf5 +--dataset $BASE_DATASET_DIR/lift/mg/demo_v141.hdf5 \ +--output_name low_dim_sparse_v141.hdf5 python dataset_states_to_obs.py --done_mode 0 \ ---dataset $BASE_DATASET_DIR/lift/mg/demo.hdf5 \ ---output_name image_sparse.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 +--dataset $BASE_DATASET_DIR/lift/mg/demo_v141.hdf5 \ +--output_name image_sparse_v141.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 # lift - mg, dense python dataset_states_to_obs.py --done_mode 0 --shaped \ ---dataset $BASE_DATASET_DIR/lift/mg/demo.hdf5 \ ---output_name low_dim_dense.hdf5 +--dataset $BASE_DATASET_DIR/lift/mg/demo_v141.hdf5 \ +--output_name low_dim_dense_v141.hdf5 python dataset_states_to_obs.py --done_mode 0 --shaped \ ---dataset $BASE_DATASET_DIR/lift/mg/demo.hdf5 \ ---output_name image_dense.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 +--dataset $BASE_DATASET_DIR/lift/mg/demo_v141.hdf5 \ +--output_name image_dense_v141.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 # can - mg, sparse python dataset_states_to_obs.py --done_mode 0 \ ---dataset $BASE_DATASET_DIR/can/mg/demo.hdf5 \ ---output_name low_dim_sparse.hdf5 +--dataset $BASE_DATASET_DIR/can/mg/demo_v141.hdf5 \ +--output_name low_dim_sparse_v141.hdf5 python dataset_states_to_obs.py --done_mode 0 \ ---dataset $BASE_DATASET_DIR/can/mg/demo.hdf5 \ ---output_name image_sparse.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 +--dataset $BASE_DATASET_DIR/can/mg/demo_v141.hdf5 \ +--output_name image_sparse_v141.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 # can - mg, dense python dataset_states_to_obs.py --done_mode 0 --shaped \ ---dataset $BASE_DATASET_DIR/can/mg/demo.hdf5 \ ---output_name low_dim_dense.hdf5 +--dataset $BASE_DATASET_DIR/can/mg/demo_v141.hdf5 \ +--output_name low_dim_dense_v141.hdf5 python dataset_states_to_obs.py --done_mode 0 --shaped \ ---dataset $BASE_DATASET_DIR/can/mg/demo.hdf5 \ ---output_name image_dense.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 +--dataset $BASE_DATASET_DIR/can/mg/demo_v141.hdf5 \ +--output_name image_dense_v141.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 ### NOTE: we use done-mode 2 for PH / MH (dones on task success and end of trajectory) ### @@ -54,43 +54,43 @@ python dataset_states_to_obs.py --done_mode 0 --shaped \ # lift - ph python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/lift/ph/demo.hdf5 \ ---output_name low_dim.hdf5 +--dataset $BASE_DATASET_DIR/lift/ph/demo_v141.hdf5 \ +--output_name low_dim_v141.hdf5 python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/lift/ph/demo.hdf5 \ ---output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 +--dataset $BASE_DATASET_DIR/lift/ph/demo_v141.hdf5 \ +--output_name image_v141.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 # can - ph python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/can/ph/demo.hdf5 \ ---output_name low_dim.hdf5 +--dataset $BASE_DATASET_DIR/can/ph/demo_v141.hdf5 \ +--output_name low_dim_v141.hdf5 python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/can/ph/demo.hdf5 \ ---output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 +--dataset $BASE_DATASET_DIR/can/ph/demo_v141.hdf5 \ +--output_name image_v141.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 # square - ph python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/square/ph/demo.hdf5 \ ---output_name low_dim.hdf5 +--dataset $BASE_DATASET_DIR/square/ph/demo_v141.hdf5 \ +--output_name low_dim_v141.hdf5 python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/square/ph/demo.hdf5 \ ---output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 +--dataset $BASE_DATASET_DIR/square/ph/demo_v141.hdf5 \ +--output_name image_v141.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 # transport - ph python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/transport/ph/demo.hdf5 \ ---output_name low_dim.hdf5 +--dataset $BASE_DATASET_DIR/transport/ph/demo_v141.hdf5 \ +--output_name low_dim_v141.hdf5 python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/transport/ph/demo.hdf5 \ ---output_name image.hdf5 --camera_names shouldercamera0 shouldercamera1 robot0_eye_in_hand robot1_eye_in_hand --camera_height 84 --camera_width 84 +--dataset $BASE_DATASET_DIR/transport/ph/demo_v141.hdf5 \ +--output_name image_v141.hdf5 --camera_names shouldercamera0 shouldercamera1 robot0_eye_in_hand robot1_eye_in_hand --camera_height 84 --camera_width 84 # tool hang - ph python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/tool_hang/ph/demo.hdf5 \ ---output_name low_dim.hdf5 +--dataset $BASE_DATASET_DIR/tool_hang/ph/demo_v141.hdf5 \ +--output_name low_dim_v141.hdf5 python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/tool_hang/ph/demo.hdf5 \ ---output_name image.hdf5 --camera_names sideview robot0_eye_in_hand --camera_height 240 --camera_width 240 +--dataset $BASE_DATASET_DIR/tool_hang/ph/demo_v141.hdf5 \ +--output_name image_v141.hdf5 --camera_names sideview robot0_eye_in_hand --camera_height 240 --camera_width 240 ### mh ### @@ -98,43 +98,43 @@ python dataset_states_to_obs.py --done_mode 2 \ # lift - mh python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/lift/mh/demo.hdf5 \ ---output_name low_dim.hdf5 +--dataset $BASE_DATASET_DIR/lift/mh/demo_v141.hdf5 \ +--output_name low_dim_v141.hdf5 python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/lift/mh/demo.hdf5 \ ---output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 +--dataset $BASE_DATASET_DIR/lift/mh/demo_v141.hdf5 \ +--output_name image_v141.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 # can - mh python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/can/mh/demo.hdf5 \ ---output_name low_dim.hdf5 +--dataset $BASE_DATASET_DIR/can/mh/demo_v141.hdf5 \ +--output_name low_dim_v141.hdf5 python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/can/mh/demo.hdf5 \ ---output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 +--dataset $BASE_DATASET_DIR/can/mh/demo_v141.hdf5 \ +--output_name image_v141.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 # square - mh python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/square/mh/demo.hdf5 \ ---output_name low_dim.hdf5 +--dataset $BASE_DATASET_DIR/square/mh/demo_v141.hdf5 \ +--output_name low_dim_v141.hdf5 python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/square/mh/demo.hdf5 \ ---output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 +--dataset $BASE_DATASET_DIR/square/mh/demo_v141.hdf5 \ +--output_name image_v141.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 # transport - mh python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/transport/mh/demo.hdf5 \ ---output_name low_dim.hdf5 +--dataset $BASE_DATASET_DIR/transport/mh/demo_v141.hdf5 \ +--output_name low_dim_v141.hdf5 python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/transport/mh/demo.hdf5 \ ---output_name image.hdf5 --camera_names shouldercamera0 shouldercamera1 robot0_eye_in_hand robot1_eye_in_hand --camera_height 84 --camera_width 84 +--dataset $BASE_DATASET_DIR/transport/mh/demo_v141.hdf5 \ +--output_name image_v141.hdf5 --camera_names shouldercamera0 shouldercamera1 robot0_eye_in_hand robot1_eye_in_hand --camera_height 84 --camera_width 84 ### can-paired ### python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/can/paired/demo.hdf5 \ ---output_name low_dim.hdf5 +--dataset $BASE_DATASET_DIR/can/paired/demo_v141.hdf5 \ +--output_name low_dim_v141.hdf5 python dataset_states_to_obs.py --done_mode 2 \ ---dataset $BASE_DATASET_DIR/can/paired/demo.hdf5 \ ---output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 +--dataset $BASE_DATASET_DIR/can/paired/demo_v141.hdf5 \ +--output_name image_v141.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 diff --git a/robomimic/scripts/generate_paper_configs.py b/robomimic/scripts/generate_paper_configs.py index a455d406..52ed7d5b 100644 --- a/robomimic/scripts/generate_paper_configs.py +++ b/robomimic/scripts/generate_paper_configs.py @@ -215,17 +215,34 @@ def modify_config_for_dataset(config, task_name, dataset_type, hdf5_type, base_d if dataset_type == "mg": # machine-generated datasets did not use validation config.experiment.validate = False + else: + # all other datasets used validation + config.experiment.validate = True if is_real_dataset: # no evaluation rollouts for real robot training config.experiment.rollout.enabled = False with config.train.values_unlocked(): - # set dataset path and possibly filter key - file_name = DATASET_REGISTRY[task_name][dataset_type][hdf5_type]["url"].split("/")[-1] + # set dataset path and possibly filter keys + url = DATASET_REGISTRY[task_name][dataset_type][hdf5_type]["url"] + if url is None: + # infer file_name + if task_name in ["lift", "can", "square", "tool_hang", "transport"]: + file_name = "{}_v141.hdf5".format(hdf5_type) + elif task_name in ["lift_real", "can_real", "tool_hang_real"]: + file_name = "{}.hdf5".format(hdf5_type) + else: + raise ValueError("Unknown dataset type") + else: + file_name = url.split("/")[-1] config.train.data = os.path.join(base_dataset_dir, task_name, dataset_type, file_name) - if filter_key is not None: - config.train.hdf5_filter_key = filter_key + config.train.hdf5_filter_key = None if filter_key is None else filter_key + config.train.hdf5_validation_filter_key = None + if config.experiment.validate: + # set train and valid keys for validation + config.train.hdf5_filter_key = "train" if filter_key is None else "{}_train".format(filter_key) + config.train.hdf5_validation_filter_key = "valid" if filter_key is None else "{}_valid".format(filter_key) with config.observation.values_unlocked(): # maybe modify observation names and randomization sizes (since image size might be different) @@ -1168,7 +1185,7 @@ def bcq_algo_config_modifier(config): def cql_algo_config_modifier(config): with config.algo.values_unlocked(): - # taken from TD3-BC settings describe in their paper + # taken from TD3-BC settings described in their paper config.algo.optim_params.critic.learning_rate.initial = 3e-4 config.algo.optim_params.actor.learning_rate.initial = 3e-5 config.algo.actor.bc_start_steps = 40000 # pre-training steps for actor @@ -1179,27 +1196,40 @@ def cql_algo_config_modifier(config): config.algo.actor.layer_dims = (256, 256, 256) # MLP sizes config.algo.critic.layer_dims = (256, 256, 256) return config + + def iql_algo_config_modifier(config): + with config.algo.values_unlocked(): + # taken from IQL settings described in their paper + config.algo.target_tau = 0.005 + config.algo.vf_quantile = 0.7 + config.algo.adv.beta = 3.0 + config.algo.optim_params.critic.learning_rate.initial = 3e-4 + config.algo.optim_params.vf.learning_rate.initial = 3e-4 + config.algo.optim_params.actor.learning_rate.initial = 3e-4 + config.algo.actor.layer_dims = (256, 256, 256) # MLP sizes + config.algo.critic.layer_dims = (256, 256, 256) + return config d4rl_tasks = [ - # "halfcheetah-random-v0", - # "hopper-random-v0", - # "walker2d-random-v0", - "halfcheetah-medium-v0", - "hopper-medium-v0", - "walker2d-medium-v0", - "halfcheetah-expert-v0", - "hopper-expert-v0", - "walker2d-expert-v0", - "halfcheetah-medium-expert-v0", - "hopper-medium-expert-v0", - "walker2d-medium-expert-v0", - # "halfcheetah-medium-replay-v0", - # "hopper-medium-replay-v0", - # "walker2d-medium-replay-v0", + # "halfcheetah-random-v2", + # "hopper-random-v2", + # "walker2d-random-v2", + "halfcheetah-medium-v2", + "hopper-medium-v2", + "walker2d-medium-v2", + "halfcheetah-expert-v2", + "hopper-expert-v2", + "walker2d-expert-v2", + "halfcheetah-medium-expert-v2", + "hopper-medium-expert-v2", + "walker2d-medium-expert-v2", + # "halfcheetah-medium-replay-v2", + # "hopper-medium-replay-v2", + # "walker2d-medium-replay-v2", ] d4rl_json_paths = Config() # use for convenient nested dict for task_name in d4rl_tasks: - for algo_name in ["bcq", "cql", "td3_bc"]: + for algo_name in ["bcq", "cql", "td3_bc", "iql"]: config = config_factory(algo_name=algo_name) # hack: copy experiment and train sections from td3-bc, since that has defaults for training with D4RL @@ -1216,6 +1246,8 @@ def cql_algo_config_modifier(config): config = bcq_algo_config_modifier(config) elif algo_name == "cql": config = cql_algo_config_modifier(config) + elif algo_name == "iql": + config = iql_algo_config_modifier(config) # set experiment name with config.experiment.values_unlocked(): @@ -1223,8 +1255,10 @@ def cql_algo_config_modifier(config): # set output folder and dataset with config.train.values_unlocked(): if base_output_dir is None: - base_output_dir = "../{}_trained_models".format(algo_name) - config.train.output_dir = os.path.join(base_output_dir, "d4rl", algo_name, task_name, "trained_models") + base_output_dir_for_algo = "../{}_trained_models".format(algo_name) + else: + base_output_dir_for_algo = base_output_dir + config.train.output_dir = os.path.join(base_output_dir_for_algo, "d4rl", algo_name, task_name, "trained_models") config.train.data = os.path.join(base_dataset_dir, "d4rl", "converted", "{}.hdf5".format(task_name.replace("-", "_"))) diff --git a/robomimic/scripts/setup_macros.py b/robomimic/scripts/setup_macros.py new file mode 100644 index 00000000..92c47271 --- /dev/null +++ b/robomimic/scripts/setup_macros.py @@ -0,0 +1,32 @@ +""" +This script sets up a private macros file. + +The private macros file (macros_private.py) is not tracked by git, +allowing user-specific settings that are not tracked by git. + +This script checks if macros_private.py exists. +If applicable, it creates the private macros at robomimic/macros_private.py +""" + +import os +import robomimic +import shutil + +if __name__ == "__main__": + base_path = robomimic.__path__[0] + macros_path = os.path.join(base_path, "macros.py") + macros_private_path = os.path.join(base_path, "macros_private.py") + + if not os.path.exists(macros_path): + print("{} does not exist! Aborting...".format(macros_path)) + + if os.path.exists(macros_private_path): + ans = input("{} already exists! \noverwrite? (y/n)\n".format(macros_private_path)) + + if ans == "y": + print("REMOVING") + else: + exit() + + shutil.copyfile(macros_path, macros_private_path) + print("copied {}\nto {}".format(macros_path, macros_private_path)) diff --git a/robomimic/scripts/train.py b/robomimic/scripts/train.py index 9d63eb98..a92e3e66 100644 --- a/robomimic/scripts/train.py +++ b/robomimic/scripts/train.py @@ -39,7 +39,7 @@ import robomimic.utils.file_utils as FileUtils from robomimic.config import config_factory from robomimic.algo import algo_factory, RolloutPolicy -from robomimic.utils.log_utils import PrintLogger, DataLogger +from robomimic.utils.log_utils import PrintLogger, DataLogger, flush_warnings def train(config, device): @@ -51,6 +51,8 @@ def train(config, device): np.random.seed(config.train.seed) torch.manual_seed(config.train.seed) + torch.set_num_threads(2) + print("\n============= New Training Run with Config =============") print(config) print("") @@ -101,6 +103,7 @@ def train(config, device): render_offscreen=config.experiment.render_video, use_image_obs=shape_meta["use_images"], ) + env = EnvUtils.wrap_env_from_config(env, config=config) # apply environment warpper, if applicable envs[env.name] = env print(envs[env.name]) @@ -109,7 +112,9 @@ def train(config, device): # setup for a new training run data_logger = DataLogger( log_dir, + config, log_tb=config.experiment.logging.log_tb, + log_wandb=config.experiment.logging.log_wandb, ) model = algo_factory( algo_name=config.algo_name, @@ -134,6 +139,10 @@ def train(config, device): print("\n============= Training Dataset =============") print(trainset) print("") + if validset is not None: + print("\n============= Validation Dataset =============") + print(validset) + print("") # maybe retreve statistics for normalizing observations obs_normalization_stats = None @@ -165,6 +174,13 @@ def train(config, device): else: valid_loader = None + # print all warnings before training begins + print("*" * 50) + print("Warnings generated by robomimic have been duplicated here (from above) for convenience. Please check them carefully.") + flush_warnings() + print("*" * 50) + print("") + # main training loop best_valid_loss = None best_return = {k: -np.inf for k in envs} if config.experiment.rollout.enabled else None @@ -176,7 +192,13 @@ def train(config, device): valid_num_steps = config.experiment.validation_epoch_every_n_steps for epoch in range(1, config.train.num_epochs + 1): # epoch numbers start at 1 - step_log = TrainUtils.run_epoch(model=model, data_loader=train_loader, epoch=epoch, num_steps=train_num_steps) + step_log = TrainUtils.run_epoch( + model=model, + data_loader=train_loader, + epoch=epoch, + num_steps=train_num_steps, + obs_normalization_stats=obs_normalization_stats, + ) model.on_epoch_end(epoch) # setup checkpoint path diff --git a/robomimic/utils/dataset.py b/robomimic/utils/dataset.py index 03b7bd7b..1416a912 100644 --- a/robomimic/utils/dataset.py +++ b/robomimic/utils/dataset.py @@ -284,9 +284,9 @@ def load_dataset_in_memory(self, demo_list, hdf5_file, obs_keys, dataset_keys, l all_data[ep]["attrs"] = {} all_data[ep]["attrs"]["num_samples"] = hdf5_file["data/{}".format(ep)].attrs["num_samples"] # get obs - all_data[ep]["obs"] = {k: hdf5_file["data/{}/obs/{}".format(ep, k)][()].astype('float32') for k in obs_keys} + all_data[ep]["obs"] = {k: hdf5_file["data/{}/obs/{}".format(ep, k)][()] for k in obs_keys} if load_next_obs: - all_data[ep]["next_obs"] = {k: hdf5_file["data/{}/next_obs/{}".format(ep, k)][()].astype('float32') for k in obs_keys} + all_data[ep]["next_obs"] = {k: hdf5_file["data/{}/next_obs/{}".format(ep, k)][()] for k in obs_keys} # get other dataset keys for k in dataset_keys: if k in hdf5_file["data/{}".format(ep)]: @@ -425,6 +425,7 @@ def get_item(self, index): demo_id, index_in_demo=index_in_demo, keys=self.dataset_keys, + num_frames_to_stack=self.n_frame_stack - 1, # note: need to decrement self.n_frame_stack by one seq_length=self.seq_length ) @@ -509,11 +510,11 @@ def get_sequence_from_demo(self, demo_id, index_in_demo, keys, num_frames_to_sta seq = dict() for k in keys: data = self.get_dataset_for_ep(demo_id, k) - seq[k] = data[seq_begin_index: seq_end_index].astype("float32") + seq[k] = data[seq_begin_index: seq_end_index] seq = TensorUtils.pad_sequence(seq, padding=(seq_begin_pad, seq_end_pad), pad_same=True) pad_mask = np.array([0] * seq_begin_pad + [1] * (seq_end_index - seq_begin_index) + [0] * seq_end_pad) - pad_mask = pad_mask[:, None].astype(np.bool) + pad_mask = pad_mask[:, None].astype(bool) return seq, pad_mask @@ -543,10 +544,9 @@ def get_obs_sequence_from_demo(self, demo_id, index_in_demo, keys, num_frames_to if self.get_pad_mask: obs["pad_mask"] = pad_mask - # prepare image observations from dataset - return ObsUtils.process_obs_dict(obs) + return obs - def get_dataset_sequence_from_demo(self, demo_id, index_in_demo, keys, seq_length=1): + def get_dataset_sequence_from_demo(self, demo_id, index_in_demo, keys, num_frames_to_stack=0, seq_length=1): """ Extract a (sub)sequence of dataset items from a demo given the @keys of the items (e.g., states, actions). @@ -554,6 +554,7 @@ def get_dataset_sequence_from_demo(self, demo_id, index_in_demo, keys, seq_lengt demo_id (str): id of the demo, e.g., demo_0 index_in_demo (int): beginning index of the sequence wrt the demo keys (tuple): list of keys to extract + num_frames_to_stack (int): numbers of frame to stack. Seq gets prepended with repeated items if out of range seq_length (int): sequence length to extract. Seq gets post-pended with repeated items if out of range Returns: @@ -563,7 +564,7 @@ def get_dataset_sequence_from_demo(self, demo_id, index_in_demo, keys, seq_lengt demo_id, index_in_demo=index_in_demo, keys=keys, - num_frames_to_stack=0, # don't frame stack for meta keys + num_frames_to_stack=num_frames_to_stack, seq_length=seq_length, ) if self.get_pad_mask: @@ -582,6 +583,7 @@ def get_trajectory_at_index(self, index): demo_id, index_in_demo=0, keys=self.dataset_keys, + num_frames_to_stack=self.n_frame_stack - 1, # note: need to decrement self.n_frame_stack by one seq_length=demo_length ) meta["obs"] = self.get_obs_sequence_from_demo( diff --git a/robomimic/utils/env_utils.py b/robomimic/utils/env_utils.py index 9c722e15..d3d55ebe 100644 --- a/robomimic/utils/env_utils.py +++ b/robomimic/utils/env_utils.py @@ -5,6 +5,7 @@ """ from copy import deepcopy import robomimic.envs.env_base as EB +from robomimic.utils.log_utils import log_warning def get_env_class(env_meta=None, env_type=None, env=None): @@ -95,6 +96,39 @@ def check_env_type(type_to_check, env_meta=None, env_type=None, env=None): return (env_type == type_to_check) +def check_env_version(env, env_meta): + """ + Checks whether the passed env and env_meta dictionary having matching environment versions. + Logs warning if cannot find version or versions do not match. + + Args: + env (instance of EB.EnvBase): environment instance + + env_meta (dict): environment metadata, which should be loaded from demonstration + hdf5 with @FileUtils.get_env_metadata_from_dataset or from checkpoint (see + @FileUtils.env_from_checkpoint). Contains following key: + + :`'env_version'`: environment version, type str + """ + env_system_version = env.version + env_meta_version = env_meta.get("env_version", None) + + if env_meta_version is None: + log_warning( + "No environment version found in dataset!"\ + "\nCannot verify if dataset and installed environment versions match"\ + ) + elif env_system_version != env_meta_version: + log_warning( + "Dataset and installed environment version mismatch!"\ + "\nDataset environment version: {meta}"\ + "\nInstalled environment version: {sys}".format( + sys=env_system_version, + meta=env_meta_version, + ) + ) + + def is_robosuite_env(env_meta=None, env_type=None, env=None): """ Determines whether the environment is a robosuite environment. Accepts @@ -189,6 +223,7 @@ def create_env_from_metadata( use_image_obs=use_image_obs, **env_kwargs, ) + check_env_version(env, env_meta) return env @@ -232,7 +267,7 @@ def create_env_for_data_processing( env_kwargs.pop("camera_width", None) env_kwargs.pop("reward_shaping", None) - return env_class.create_for_data_processing( + env = env_class.create_for_data_processing( env_name=env_name, camera_names=camera_names, camera_height=camera_height, @@ -240,3 +275,17 @@ def create_env_for_data_processing( reward_shaping=reward_shaping, **env_kwargs, ) + check_env_version(env, env_meta) + return env + + +def wrap_env_from_config(env, config): + """ + Wraps environment using the provided Config object to determine which wrappers + to use (if any). + """ + if config.train.frame_stack > 1: + from robomimic.envs.wrappers import FrameStackWrapper + env = FrameStackWrapper(env, num_frames=config.train.frame_stack) + + return env diff --git a/robomimic/utils/file_utils.py b/robomimic/utils/file_utils.py index 1312bf24..a86be5ba 100644 --- a/robomimic/utils/file_utils.py +++ b/robomimic/utils/file_utils.py @@ -63,6 +63,25 @@ def create_hdf5_filter_key(hdf5_path, demo_keys, key_name): return ep_lengths +def get_demos_for_filter_key(hdf5_path, filter_key): + """ + Gets demo keys that correspond to a particular filter key. + + Args: + hdf5_path (str): path to hdf5 file + filter_key (str): name of filter key + + Returns: + demo_keys ([str]): list of demonstration keys that + correspond to this filter key. For example, ["demo_0", + "demo_1"]. + """ + f = h5py.File(hdf5_path, "r") + demo_keys = [elem.decode("utf-8") for elem in np.array(f["mask/{}".format(filter_key)][:])] + f.close() + return demo_keys + + def get_env_metadata_from_dataset(dataset_path): """ Retrieves env metadata from dataset. @@ -209,67 +228,81 @@ def update_config(cfg): cfg (dict): Raw dictionary of config values """ # Check if image modality is defined -- this means we're using an outdated config - modalities = cfg["observation"]["modalities"] - - found_img = False - for modality_group in ("obs", "subgoal", "goal"): - if modality_group in modalities: - img_modality = modalities[modality_group].pop("image", None) - if img_modality is not None: - found_img = True - cfg["observation"]["modalities"][modality_group]["rgb"] = img_modality - - if found_img: - # Also need to map encoder kwargs correctly - old_encoder_cfg = cfg["observation"].pop("encoder") - - # Create new encoder entry for RGB - rgb_encoder_cfg = { - "core_class": "VisualCore", - "core_kwargs": { - "backbone_kwargs": dict(), - "pool_kwargs": dict(), - }, - "obs_randomizer_class": None, - "obs_randomizer_kwargs": dict(), - } - - if "visual_feature_dimension" in old_encoder_cfg: - rgb_encoder_cfg["core_kwargs"]["feature_dimension"] = old_encoder_cfg["visual_feature_dimension"] - - if "visual_core" in old_encoder_cfg: - rgb_encoder_cfg["core_kwargs"]["backbone_class"] = old_encoder_cfg["visual_core"] - - for kwarg in ("pretrained", "input_coord_conv"): - if "visual_core_kwargs" in old_encoder_cfg and kwarg in old_encoder_cfg["visual_core_kwargs"]: - rgb_encoder_cfg["core_kwargs"]["backbone_kwargs"][kwarg] = old_encoder_cfg["visual_core_kwargs"][kwarg] - - # Optionally add pooling info too - if old_encoder_cfg.get("use_spatial_softmax", True): - rgb_encoder_cfg["core_kwargs"]["pool_class"] = "SpatialSoftmax" - - for kwarg in ("num_kp", "learnable_temperature", "temperature", "noise_std"): - if "spatial_softmax_kwargs" in old_encoder_cfg and kwarg in old_encoder_cfg["spatial_softmax_kwargs"]: - rgb_encoder_cfg["core_kwargs"]["pool_kwargs"][kwarg] = old_encoder_cfg["spatial_softmax_kwargs"][kwarg] - - # Update obs randomizer as well - for kwarg in ("obs_randomizer_class", "obs_randomizer_kwargs"): - if kwarg in old_encoder_cfg: - rgb_encoder_cfg[kwarg] = old_encoder_cfg[kwarg] - - # Store rgb config - cfg["observation"]["encoder"] = {"rgb": rgb_encoder_cfg} - - # Also add defaults for low dim - cfg["observation"]["encoder"]["low_dim"] = { - "core_class": None, - "core_kwargs": { - "backbone_kwargs": dict(), - "pool_kwargs": dict(), - }, - "obs_randomizer_class": None, - "obs_randomizer_kwargs": dict(), - } + # Note: There may be a nested hierarchy, so we possibly check all the nested obs cfgs which can include + # e.g. a planner and actor for HBC + + def find_obs_dicts_recursively(dic): + dics = [] + if "modalities" in dic: + dics.append(dic) + else: + for child_dic in dic.values(): + dics += find_obs_dicts_recursively(child_dic) + return dics + + obs_cfgs = find_obs_dicts_recursively(cfg["observation"]) + for obs_cfg in obs_cfgs: + modalities = obs_cfg["modalities"] + + found_img = False + for modality_group in ("obs", "subgoal", "goal"): + if modality_group in modalities: + img_modality = modalities[modality_group].pop("image", None) + if img_modality is not None: + found_img = True + modalities[modality_group]["rgb"] = img_modality + + if found_img: + # Also need to map encoder kwargs correctly + old_encoder_cfg = obs_cfg.pop("encoder") + + # Create new encoder entry for RGB + rgb_encoder_cfg = { + "core_class": "VisualCore", + "core_kwargs": { + "backbone_kwargs": dict(), + "pool_kwargs": dict(), + }, + "obs_randomizer_class": None, + "obs_randomizer_kwargs": dict(), + } + + if "visual_feature_dimension" in old_encoder_cfg: + rgb_encoder_cfg["core_kwargs"]["feature_dimension"] = old_encoder_cfg["visual_feature_dimension"] + + if "visual_core" in old_encoder_cfg: + rgb_encoder_cfg["core_kwargs"]["backbone_class"] = old_encoder_cfg["visual_core"] + + for kwarg in ("pretrained", "input_coord_conv"): + if "visual_core_kwargs" in old_encoder_cfg and kwarg in old_encoder_cfg["visual_core_kwargs"]: + rgb_encoder_cfg["core_kwargs"]["backbone_kwargs"][kwarg] = old_encoder_cfg["visual_core_kwargs"][kwarg] + + # Optionally add pooling info too + if old_encoder_cfg.get("use_spatial_softmax", True): + rgb_encoder_cfg["core_kwargs"]["pool_class"] = "SpatialSoftmax" + + for kwarg in ("num_kp", "learnable_temperature", "temperature", "noise_std"): + if "spatial_softmax_kwargs" in old_encoder_cfg and kwarg in old_encoder_cfg["spatial_softmax_kwargs"]: + rgb_encoder_cfg["core_kwargs"]["pool_kwargs"][kwarg] = old_encoder_cfg["spatial_softmax_kwargs"][kwarg] + + # Update obs randomizer as well + for kwarg in ("obs_randomizer_class", "obs_randomizer_kwargs"): + if kwarg in old_encoder_cfg: + rgb_encoder_cfg[kwarg] = old_encoder_cfg[kwarg] + + # Store rgb config + obs_cfg["encoder"] = {"rgb": rgb_encoder_cfg} + + # Also add defaults for low dim + obs_cfg["encoder"]["low_dim"] = { + "core_class": None, + "core_kwargs": { + "backbone_kwargs": dict(), + "pool_kwargs": dict(), + }, + "obs_randomizer_class": None, + "obs_randomizer_kwargs": dict(), + } def config_from_checkpoint(algo_name=None, ckpt_path=None, ckpt_dict=None, verbose=False): @@ -409,6 +442,8 @@ def env_from_checkpoint(ckpt_path=None, ckpt_dict=None, env_name=None, render=Fa render_offscreen=render_offscreen, use_image_obs=shape_meta["use_images"], ) + config, _ = config_from_checkpoint(algo_name=ckpt_dict["algo_name"], ckpt_dict=ckpt_dict, verbose=False) + env = EnvUtils.wrap_env_from_config(env, config=config) # apply environment warpper, if applicable if verbose: print("============= Loaded Environment =============") print(env) diff --git a/robomimic/utils/hyperparam_utils.py b/robomimic/utils/hyperparam_utils.py index cd3ce544..267536d3 100644 --- a/robomimic/utils/hyperparam_utils.py +++ b/robomimic/utils/hyperparam_utils.py @@ -16,7 +16,7 @@ class ConfigGenerator(object): Useful class to keep track of hyperparameters to sweep, and to generate the json configs for each experiment run. """ - def __init__(self, base_config_file, script_file): + def __init__(self, base_config_file, wandb_proj_name="debug", script_file=None, generated_config_dir=None): """ Args: base_config_file (str): path to a base json config to use as a starting point @@ -26,10 +26,21 @@ def __init__(self, base_config_file, script_file): """ assert isinstance(base_config_file, str) self.base_config_file = base_config_file - assert isinstance(script_file, str) - self.script_file = script_file + assert generated_config_dir is None or isinstance(generated_config_dir, str) + if generated_config_dir is not None: + generated_config_dir = os.path.expanduser(generated_config_dir) + self.generated_config_dir = generated_config_dir + assert script_file is None or isinstance(script_file, str) + if script_file is None: + self.script_file = os.path.join('~', 'tmp/tmpp.sh') + else: + self.script_file = script_file + self.script_file = os.path.expanduser(self.script_file) self.parameters = OrderedDict() + assert isinstance(wandb_proj_name, str) + self.wandb_proj_name = wandb_proj_name + def add_param(self, key, name, group, values, value_names=None): """ Add parameter to the hyperparameter sweep. @@ -189,10 +200,15 @@ def _generate_jsons(self): """ # base directory for saving jsons - base_dir = os.path.abspath(os.path.dirname(self.base_config_file)) + if self.generated_config_dir: + base_dir = self.generated_config_dir + if not os.path.exists(base_dir): + os.makedirs(base_dir) + else: + base_dir = os.path.abspath(os.path.dirname(self.base_config_file)) # read base json - base_config = load_json(self.base_config_file) + base_config = load_json(self.base_config_file, verbose=False) # base exp name from this base config base_exp_name = base_config['experiment']['name'] @@ -229,11 +245,35 @@ def _generate_jsons(self): for k in parameter_ranges: set_value_for_key(json_dict, k, v=parameter_ranges[k][i]) + # populate list of identifying meta for logger; + # see meta_config method in base_config.py for more info + json_dict["experiment"]["logging"]["wandb_proj_name"] = self.wandb_proj_name + if "meta" not in json_dict: + json_dict["meta"] = dict() + json_dict["meta"].update( + hp_base_config_file=self.base_config_file, + hp_keys=list(), + hp_values=list(), + ) + # logging: keep track of hyp param names and values as meta info + for k in parameter_ranges.keys(): + key_name = self.parameters[k].name + if key_name is not None and len(key_name) > 0: + if maybe_parameter_names[k] is not None: + value_name = maybe_parameter_names[k] + else: + value_name = setting[k] + + json_dict["meta"]["hp_keys"].append(key_name) + json_dict["meta"]["hp_values"].append(value_name) + # save file in same directory as old json json_path = os.path.join(base_dir, "{}.json".format(exp_name)) save_json(json_dict, json_path) json_paths.append(json_path) + print("Num exps:", len(json_paths)) + return json_paths def _script_from_jsons(self, json_paths): @@ -246,6 +286,9 @@ def _script_from_jsons(self, json_paths): for path in json_paths: # write python command to file cmd = "python train.py --config {}\n".format(path) + + print() + print(cmd) f.write(cmd) diff --git a/robomimic/utils/log_utils.py b/robomimic/utils/log_utils.py index a19de753..bc9ae55d 100644 --- a/robomimic/utils/log_utils.py +++ b/robomimic/utils/log_utils.py @@ -7,7 +7,15 @@ import numpy as np from datetime import datetime from contextlib import contextmanager +import textwrap +import time from tqdm import tqdm +from termcolor import colored + +import robomimic + +# global list of warning messages can be populated with @log_warning and flushed with @flush_warnings +WARNINGS_BUFFER = [] class PrintLogger(object): @@ -35,23 +43,64 @@ class DataLogger(object): """ Logging class to log metrics to tensorboard and/or retrieve running statistics about logged data. """ - def __init__(self, log_dir, log_tb=True): + def __init__(self, log_dir, config, log_tb=True, log_wandb=False): """ Args: log_dir (str): base path to store logs log_tb (bool): whether to use tensorboard logging """ self._tb_logger = None + self._wandb_logger = None self._data = dict() # store all the scalar data logged so far if log_tb: from tensorboardX import SummaryWriter self._tb_logger = SummaryWriter(os.path.join(log_dir, 'tb')) + if log_wandb: + import wandb + import robomimic.macros as Macros + + # set up wandb api key if specified in macros + if Macros.WANDB_API_KEY is not None: + os.environ["WANDB_API_KEY"] = Macros.WANDB_API_KEY + + assert Macros.WANDB_ENTITY is not None, "WANDB_ENTITY macro is set to None." \ + "\nSet this macro in {base_path}/macros_private.py" \ + "\nIf this file does not exist, first run {base_path}/scripts/setup_macros.py".format(base_path=robomimic.__path__[0]) + + # attempt to set up wandb 10 times. If unsuccessful after these trials, don't use wandb + num_attempts = 10 + for attempt in range(num_attempts): + try: + # set up wandb + self._wandb_logger = wandb + + self._wandb_logger.init( + entity=Macros.WANDB_ENTITY, + project=config.experiment.logging.wandb_proj_name, + name=config.experiment.name, + dir=log_dir, + mode=("offline" if attempt == num_attempts - 1 else "online"), + ) + + # set up info for identifying experiment + wandb_config = {k: v for (k, v) in config.meta.items() if k not in ["hp_keys", "hp_values"]} + for (k, v) in zip(config.meta["hp_keys"], config.meta["hp_values"]): + wandb_config[k] = v + if "algo" not in wandb_config: + wandb_config["algo"] = config.algo_name + self._wandb_logger.config.update(wandb_config) + + break + except Exception as e: + log_warning("wandb initialization error (attempt #{}): {}".format(attempt + 1, e)) + self._wandb_logger = None + time.sleep(30) + def record(self, k, v, epoch, data_type='scalar', log_stats=False): """ Record data with logger. - Args: k (str): key string v (float or image): value to store @@ -81,10 +130,22 @@ def record(self, k, v, epoch, data_type='scalar', log_stats=False): elif data_type == 'image': self._tb_logger.add_images(k, img_tensor=v, global_step=epoch, dataformats="NHWC") + if self._wandb_logger is not None: + try: + if data_type == 'scalar': + self._wandb_logger.log({k: v}, step=epoch) + if log_stats: + stats = self.get_stats(k) + for (stat_k, stat_v) in stats.items(): + self._wandb_logger.log({stat_k: stat_v}, step=epoch) + elif data_type == 'image': + raise NotImplementedError + except Exception as e: + log_warning("wandb logging: {}".format(e)) + def get_stats(self, k): """ Computes running statistics for a particular key. - Args: k (str): key string Returns: @@ -104,6 +165,9 @@ def close(self): if self._tb_logger is not None: self._tb_logger.close() + if self._wandb_logger is not None: + self._wandb_logger.finish() + class custom_tqdm(tqdm): """ @@ -131,3 +195,33 @@ def silence_stdout(): yield new_target finally: sys.stdout = old_target + + +def log_warning(message, color="yellow", print_now=True): + """ + This function logs a warning message by recording it in a global warning buffer. + The global registry will be maintained until @flush_warnings is called, at + which point the warnings will get printed to the terminal. + + Args: + message (str): warning message to display + color (str): color of message - defaults to "yellow" + print_now (bool): if True (default), will print to terminal immediately, in + addition to adding it to the global warning buffer + """ + global WARNINGS_BUFFER + buffer_message = colored("ROBOMIMIC WARNING(\n{}\n)".format(textwrap.indent(message, " ")), color) + WARNINGS_BUFFER.append(buffer_message) + if print_now: + print(buffer_message) + + +def flush_warnings(): + """ + This function flushes all warnings from the global warning buffer to the terminal and + clears the global registry. + """ + global WARNINGS_BUFFER + for msg in WARNINGS_BUFFER: + print(msg) + WARNINGS_BUFFER = [] diff --git a/robomimic/utils/macros.py b/robomimic/utils/macros.py deleted file mode 100644 index c313b827..00000000 --- a/robomimic/utils/macros.py +++ /dev/null @@ -1,6 +0,0 @@ -""" -Set of global variables shared across robomimic -""" -# Sets debugging mode. Should be set at top-level script so that internal -# debugging functionalities are made active -DEBUG = False diff --git a/robomimic/utils/obs_utils.py b/robomimic/utils/obs_utils.py index 134f976f..5768b959 100644 --- a/robomimic/utils/obs_utils.py +++ b/robomimic/utils/obs_utils.py @@ -926,7 +926,7 @@ def _default_obs_unprocessor(cls, obs): unprocessed_obs (np.array or torch.Tensor): depth passed through inverse operation of @process_depth """ - return TU.to_uint8(unprocess_frame(frame=obs, channel_dim=1, scale=1.)) + return unprocess_frame(frame=obs, channel_dim=1, scale=1.) class ScanModality(Modality): diff --git a/robomimic/utils/python_utils.py b/robomimic/utils/python_utils.py index fa47b221..5bc71bd1 100644 --- a/robomimic/utils/python_utils.py +++ b/robomimic/utils/python_utils.py @@ -3,7 +3,7 @@ """ import inspect from copy import deepcopy -import robomimic.utils.macros as Macros +import robomimic.macros as Macros def get_class_init_kwargs(cls): diff --git a/robomimic/utils/tensor_utils.py b/robomimic/utils/tensor_utils.py index 7d6cbffc..ec2063b2 100644 --- a/robomimic/utils/tensor_utils.py +++ b/robomimic/utils/tensor_utils.py @@ -482,7 +482,7 @@ def reshape_dimensions_single(x, begin_axis, end_axis, target_dims): Args: x (torch.Tensor): tensor to reshape begin_axis (int): begin dimension - end_axis (int): end dimension + end_axis (int): end dimension (inclusive) target_dims (tuple or list): target shape for the range of dimensions (@begin_axis, @end_axis) @@ -511,7 +511,7 @@ def reshape_dimensions(x, begin_axis, end_axis, target_dims): Args: x (dict or list or tuple): a possibly nested dictionary or list or tuple begin_axis (int): begin dimension - end_axis (int): end dimension + end_axis (int): end dimension (inclusive) target_dims (tuple or list): target shape for the range of dimensions (@begin_axis, @end_axis) diff --git a/robomimic/utils/test_utils.py b/robomimic/utils/test_utils.py index 148fe331..86f125e0 100644 --- a/robomimic/utils/test_utils.py +++ b/robomimic/utils/test_utils.py @@ -46,12 +46,12 @@ def example_dataset_path(): from a server if it does not exist. """ dataset_folder = os.path.join(robomimic.__path__[0], "../tests/assets/") - dataset_path = os.path.join(dataset_folder, "test.hdf5") + dataset_path = os.path.join(dataset_folder, "test_v141.hdf5") if not os.path.exists(dataset_path): print("\nWARNING: test hdf5 does not exist! Downloading from server...") os.makedirs(dataset_folder, exist_ok=True) FileUtils.download_url( - url="http://downloads.cs.stanford.edu/downloads/rt_benchmark/test.hdf5", + url="http://downloads.cs.stanford.edu/downloads/rt_benchmark/test_v141.hdf5", download_dir=dataset_folder, ) return dataset_path @@ -130,6 +130,10 @@ def get_base_config(algo_name): config.experiment.validation_epoch_every_n_steps = 3 config.train.num_epochs = 1 + # default train and validation filter keys + config.train.hdf5_filter_key = "train" + config.train.hdf5_validation_filter_key = "valid" + # ensure model saving, rollout, and offscreen video rendering are tested too config.experiment.save.enabled = True config.experiment.save.every_n_epochs = 1 diff --git a/robomimic/utils/torch_utils.py b/robomimic/utils/torch_utils.py index bb6fc372..433c8797 100644 --- a/robomimic/utils/torch_utils.py +++ b/robomimic/utils/torch_utils.py @@ -101,11 +101,21 @@ def optimizer_from_optim_params(net_optim_params, net): Returns: optimizer (torch.optim.Optimizer): optimizer """ - return optim.Adam( - params=net.parameters(), - lr=net_optim_params["learning_rate"]["initial"], - weight_decay=net_optim_params["regularization"]["L2"], - ) + optimizer_type = net_optim_params.get("optimizer_type", "adam") + lr = net_optim_params["learning_rate"]["initial"] + + if optimizer_type == "adam": + return optim.Adam( + params=net.parameters(), + lr=lr, + weight_decay=net_optim_params["regularization"]["L2"], + ) + elif optimizer_type == "adamw": + return optim.AdamW( + params=net.parameters(), + lr=lr, + weight_decay=net_optim_params["regularization"]["L2"], + ) def lr_scheduler_from_optim_params(net_optim_params, net, optimizer): @@ -126,14 +136,30 @@ def lr_scheduler_from_optim_params(net_optim_params, net, optimizer): Returns: lr_scheduler (torch.optim.lr_scheduler or None): learning rate scheduler """ + lr_scheduler_type = net_optim_params["learning_rate"].get("scheduler_type", "multistep") + epoch_schedule = net_optim_params["learning_rate"]["epoch_schedule"] + lr_scheduler = None - if len(net_optim_params["learning_rate"]["epoch_schedule"]) > 0: - # decay LR according to the epoch schedule - lr_scheduler = optim.lr_scheduler.MultiStepLR( - optimizer=optimizer, - milestones=net_optim_params["learning_rate"]["epoch_schedule"], - gamma=net_optim_params["learning_rate"]["decay_factor"], - ) + if len(epoch_schedule) > 0: + if lr_scheduler_type == "linear": + assert len(epoch_schedule) == 1 + end_epoch = epoch_schedule[0] + + return optim.lr_scheduler.LinearLR( + optimizer, + start_factor=1.0, + end_factor=net_optim_params["learning_rate"]["decay_factor"], + total_iters=end_epoch, + ) + elif lr_scheduler_type == "multistep": + return optim.lr_scheduler.MultiStepLR( + optimizer=optimizer, + milestones=epoch_schedule, + gamma=net_optim_params["learning_rate"]["decay_factor"], + ) + else: + raise ValueError("Invalid LR scheduler type: {}".format(lr_scheduler_type)) + return lr_scheduler diff --git a/robomimic/utils/train_utils.py b/robomimic/utils/train_utils.py index 40fc110c..b5fb1e48 100644 --- a/robomimic/utils/train_utils.py +++ b/robomimic/utils/train_utils.py @@ -19,9 +19,11 @@ import robomimic import robomimic.utils.tensor_utils as TensorUtils import robomimic.utils.log_utils as LogUtils +import robomimic.utils.file_utils as FileUtils from robomimic.utils.dataset import SequenceDataset from robomimic.envs.env_base import EnvBase +from robomimic.envs.wrappers import EnvWrapper from robomimic.algo import RolloutPolicy @@ -93,20 +95,31 @@ def load_data_for_training(config, obs_keys): """ # config can contain an attribute to filter on - filter_by_attribute = config.train.hdf5_filter_key + train_filter_by_attribute = config.train.hdf5_filter_key + valid_filter_by_attribute = config.train.hdf5_validation_filter_key + if valid_filter_by_attribute is not None: + assert config.experiment.validate, "specified validation filter key {}, but config.experiment.validate is not set".format(valid_filter_by_attribute) # load the dataset into memory if config.experiment.validate: assert not config.train.hdf5_normalize_obs, "no support for observation normalization with validation data yet" - train_filter_by_attribute = "train" - valid_filter_by_attribute = "valid" - if filter_by_attribute is not None: - train_filter_by_attribute = "{}_{}".format(filter_by_attribute, train_filter_by_attribute) - valid_filter_by_attribute = "{}_{}".format(filter_by_attribute, valid_filter_by_attribute) + assert (train_filter_by_attribute is not None) and (valid_filter_by_attribute is not None), \ + "did not specify filter keys corresponding to train and valid split in dataset" \ + " - please fill config.train.hdf5_filter_key and config.train.hdf5_validation_filter_key" + train_demo_keys = FileUtils.get_demos_for_filter_key( + hdf5_path=os.path.expanduser(config.train.data), + filter_key=train_filter_by_attribute, + ) + valid_demo_keys = FileUtils.get_demos_for_filter_key( + hdf5_path=os.path.expanduser(config.train.data), + filter_key=valid_filter_by_attribute, + ) + assert set(train_demo_keys).isdisjoint(set(valid_demo_keys)), "training demonstrations overlap with " \ + "validation demonstrations!" train_dataset = dataset_factory(config, obs_keys, filter_by_attribute=train_filter_by_attribute) valid_dataset = dataset_factory(config, obs_keys, filter_by_attribute=valid_filter_by_attribute) else: - train_dataset = dataset_factory(config, obs_keys, filter_by_attribute=filter_by_attribute) + train_dataset = dataset_factory(config, obs_keys, filter_by_attribute=train_filter_by_attribute) valid_dataset = None return train_dataset, valid_dataset @@ -138,11 +151,11 @@ def dataset_factory(config, obs_keys, filter_by_attribute=None, dataset_path=Non hdf5_path=dataset_path, obs_keys=obs_keys, dataset_keys=config.train.dataset_keys, - load_next_obs=True, # make sure dataset returns s' - frame_stack=1, # no frame stacking + load_next_obs=config.train.hdf5_load_next_obs, # whether to load next observations (s') from dataset + frame_stack=config.train.frame_stack, seq_length=config.train.seq_length, - pad_frame_stack=True, - pad_seq_length=True, # pad last obs per trajectory to ensure all sequences are sampled + pad_frame_stack=config.train.pad_frame_stack, + pad_seq_length=config.train.pad_seq_length, get_pad_mask=False, goal_mode=config.train.goal_mode, hdf5_cache_mode=config.train.hdf5_cache_mode, @@ -190,7 +203,7 @@ def run_rollout( results (dict): dictionary containing return, success rate, etc. """ assert isinstance(policy, RolloutPolicy) - assert isinstance(env, EnvBase) + assert isinstance(env, EnvBase) or isinstance(env, EnvWrapper) policy.start_episode() @@ -484,7 +497,7 @@ def save_model(model, config, env_meta, shape_meta, ckpt_path, obs_normalization print("save checkpoint to {}".format(ckpt_path)) -def run_epoch(model, data_loader, epoch, validate=False, num_steps=None): +def run_epoch(model, data_loader, epoch, validate=False, num_steps=None, obs_normalization_stats=None): """ Run an epoch of training or validation. @@ -502,6 +515,10 @@ def run_epoch(model, data_loader, epoch, validate=False, num_steps=None): num_steps (int): if provided, this epoch lasts for a fixed number of batches (gradient steps), otherwise the epoch is a complete pass through the training dataset + obs_normalization_stats (dict or None): if provided, this should map observation keys to dicts + with a "mean" and "std" of shape (1, ...) where ... is the default + shape for the observation. + Returns: step_log_all (dict): dictionary of logged training metrics averaged across all batches """ @@ -534,6 +551,7 @@ def run_epoch(model, data_loader, epoch, validate=False, num_steps=None): # process batch for training t = time.time() input_batch = model.process_batch_for_training(batch) + input_batch = model.postprocess_batch_for_training(input_batch, obs_normalization_stats=obs_normalization_stats) timing_stats["Process_Batch"].append(time.time() - t) # forward and backward pass diff --git a/robomimic/utils/vis_utils.py b/robomimic/utils/vis_utils.py index 9c54a37d..48abd7c1 100644 --- a/robomimic/utils/vis_utils.py +++ b/robomimic/utils/vis_utils.py @@ -3,6 +3,7 @@ These functions can be a useful debugging tool. """ import numpy as np +import matplotlib.pyplot as plt import robomimic.utils.tensor_utils as TensorUtils import robomimic.utils.obs_utils as ObsUtils @@ -52,3 +53,40 @@ def image_tensor_to_disk(image, fname): image = image[0] image = image_tensor_to_numpy(image) image_to_disk(image, fname) + + +def visualize_image_randomizer(original_image, randomized_image, randomizer_name=None): + """ + A function that visualizes the before and after of an image-based input randomizer + Args: + original_image: batch of original image shaped [B, H, W, 3] + randomized_image: randomized image shaped [B, N, H, W, 3]. N is the number of randomization per input sample + randomizer_name: (Optional) name of the randomizer + Returns: + None + """ + + B, N, H, W, C = randomized_image.shape + + # Create a grid of subplots with B rows and N+1 columns (1 for the original image, N for the randomized images) + fig, axes = plt.subplots(B, N + 1, figsize=(4 * (N + 1), 4 * B)) + + for i in range(B): + # Display the original image in the first column of each row + axes[i, 0].imshow(original_image[i]) + axes[i, 0].set_title("Original") + axes[i, 0].axis("off") + + # Display the randomized images in the remaining columns of each row + for j in range(N): + axes[i, j + 1].imshow(randomized_image[i, j]) + axes[i, j + 1].axis("off") + + title = randomizer_name if randomizer_name is not None else "Randomized" + fig.suptitle(title, fontsize=16) + + # Adjust the space between subplots for better visualization + plt.subplots_adjust(wspace=0.5, hspace=0.5) + + # Show the entire grid of subplots + plt.show() diff --git a/setup.py b/setup.py index 40c23ab4..0e1c510b 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ "tensorboardX", "imageio", "imageio-ffmpeg", + "matplotlib", "egl_probe>=1.0.1", "torch", "torchvision", @@ -33,10 +34,10 @@ include_package_data=True, python_requires='>=3', description="robomimic: A Modular Framework for Robot Learning from Demonstration", - author="Ajay Mandlekar, Danfei Xu, Josiah Wong, Soroush Nasiriany, Chen Wang", + author="Ajay Mandlekar, Danfei Xu, Josiah Wong, Soroush Nasiriany, Chen Wang, Matthew Bronars", url="https://github.com/ARISE-Initiative/robomimic", author_email="amandlek@cs.stanford.edu", - version="0.2.0", + version="0.3.0", long_description=long_description, long_description_content_type='text/markdown' ) diff --git a/tests/test_bc.py b/tests/test_bc.py index b8c83720..adc12501 100644 --- a/tests/test_bc.py +++ b/tests/test_bc.py @@ -232,6 +232,23 @@ def bc_rnn_gmm_modifier(config): return config +@register_mod("bc-transformer") +def bc_transformer_modifier(config): + config.algo.transformer.enabled = True + config.train.frame_stack = 10 + config.train.seq_length = 1 + return config + + +@register_mod("bc-transformer-gmm") +def bc_transformer_gmm_modifier(config): + config.algo.gmm.enabled = True + config.algo.transformer.enabled = True + config.train.frame_stack = 10 + config.train.seq_length = 1 + return config + + # add image version of all tests image_modifiers = OrderedDict() for test_name in MODIFIERS: diff --git a/tests/test_iql.py b/tests/test_iql.py new file mode 100644 index 00000000..e80a8f3b --- /dev/null +++ b/tests/test_iql.py @@ -0,0 +1,143 @@ +""" +Test script for IQL algorithms. Each test trains a variant of IQL +for a handful of gradient steps and tries one rollout with +the model. Excludes stdout output by default (pass --verbose +to see stdout output). +""" +import argparse +from collections import OrderedDict + +import robomimic +from robomimic.config import Config +import robomimic.utils.test_utils as TestUtils +from robomimic.utils.log_utils import silence_stdout +from robomimic.utils.torch_utils import dummy_context_mgr + + +def get_algo_base_config(): + """ + Base config for testing IQL algorithms. + """ + + # config with basic settings for quick training run + config = TestUtils.get_base_config(algo_name="iql") + + # low-level obs (note that we define it here because @observation structure might vary per algorithm, + # for example HBC) + config.observation.modalities.obs.low_dim = ["robot0_eef_pos", "robot0_eef_quat", "robot0_gripper_qpos", "object"] + config.observation.modalities.obs.rgb = [] + + return config + + +def convert_config_for_images(config): + """ + Modify config to use image observations. + """ + + # using high-dimensional images - don't load entire dataset into memory, and smaller batch size + config.train.hdf5_cache_mode = "low_dim" + config.train.num_data_workers = 0 + config.train.batch_size = 16 + + # replace object with rgb modality + config.observation.modalities.obs.low_dim = ["robot0_eef_pos", "robot0_eef_quat", "robot0_gripper_qpos"] + config.observation.modalities.obs.rgb = ["agentview_image"] + + # set up visual encoders + config.observation.encoder.rgb.core_class = "VisualCore" + config.observation.encoder.rgb.core_kwargs.feature_dimension = 64 + config.observation.encoder.rgb.core_kwargs.backbone_class = 'ResNet18Conv' # ResNet backbone for image observations (unused if no image observations) + config.observation.encoder.rgb.core_kwargs.backbone_kwargs.pretrained = False # kwargs for visual core + config.observation.encoder.rgb.core_kwargs.backbone_kwargs.input_coord_conv = False + config.observation.encoder.rgb.core_kwargs.pool_class = "SpatialSoftmax" # Alternate options are "SpatialMeanPool" or None (no pooling) + config.observation.encoder.rgb.core_kwargs.pool_kwargs.num_kp = 32 # Default arguments for "SpatialSoftmax" + config.observation.encoder.rgb.core_kwargs.pool_kwargs.learnable_temperature = False # Default arguments for "SpatialSoftmax" + config.observation.encoder.rgb.core_kwargs.pool_kwargs.temperature = 1.0 # Default arguments for "SpatialSoftmax" + config.observation.encoder.rgb.core_kwargs.pool_kwargs.noise_std = 0.0 + + # observation randomizer class - set to None to use no randomization, or 'CropRandomizer' to use crop randomization + config.observation.encoder.rgb.obs_randomizer_class = None + + return config + + +def make_image_modifier(config_modifier): + """ + turn a config modifier into its image version. Note that + this explicit function definition is needed for proper + scoping of @config_modifier + """ + return lambda x: config_modifier(convert_config_for_images(x)) + + +# mapping from test name to config modifier functions +MODIFIERS = OrderedDict() +def register_mod(test_name): + def decorator(config_modifier): + MODIFIERS[test_name] = config_modifier + return decorator + + +@register_mod("iql-gaussian") +def iql_default_modifier(config): + config.algo.actor.net.type = "gaussian" + return config + + +@register_mod("iql-gmm") +def iql_default_modifier(config): + config.algo.actor.net.type = "gmm" + return config + + +@register_mod("iql-clip-adv") +def iql_default_modifier(config): + config.algo.adv.clip_adv_value = 1.0 + return config + + +# add image version of all tests +image_modifiers = OrderedDict() +for test_name in MODIFIERS: + lst = test_name.split("-") + name = "-".join(lst[:1] + ["rgb"] + lst[1:]) + image_modifiers[name] = make_image_modifier(MODIFIERS[test_name]) +MODIFIERS.update(image_modifiers) + + +# test for image crop randomization +@register_mod("iql-image-crop") +def iql_image_crop_modifier(config): + config = convert_config_for_images(config) + + # observation randomizer class - using Crop randomizer + config.observation.encoder.rgb.obs_randomizer_class = "CropRandomizer" + + # kwargs for observation randomizers (for the CropRandomizer, this is size and number of crops) + config.observation.encoder.rgb.obs_randomizer_kwargs.crop_height = 76 + config.observation.encoder.rgb.obs_randomizer_kwargs.crop_width = 76 + config.observation.encoder.rgb.obs_randomizer_kwargs.num_crops = 1 + config.observation.encoder.rgb.obs_randomizer_kwargs.pos_enc = False + return config + + +def test_iql(silence=True): + for test_name in MODIFIERS: + context = silence_stdout() if silence else dummy_context_mgr() + with context: + base_config = get_algo_base_config() + res_str = TestUtils.test_run(base_config=base_config, config_modifier=MODIFIERS[test_name]) + print("{}: {}".format(test_name, res_str)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--verbose", + action='store_true', + help="don't suppress stdout during tests", + ) + args = parser.parse_args() + + test_iql(silence=(not args.verbose))