diff --git a/README.md b/README.md index 45b7962..341e399 100644 --- a/README.md +++ b/README.md @@ -15,16 +15,22 @@ This code is building upon Dreamer: ## Instructions -Get dependencies: +Create conda environment with all dependencies: +``` +conda env create --file conda-env.yml +``` +This already installs the requirements in `requirements.txt` for you. Make sure you have MuJoCo set up on your machine beforehand (typically in /home/yourname/.mujoco/mujoco_200/). This is not done by conda for you! +Besides the steps in the MoJoCo documentation, I also had to run: ``` -pip3 install --user tensorflow-gpu==2.2.0 -pip3 install --user tensorflow_probability -pip3 install --user git+git://github.com/deepmind/dm_control.git -pip3 install --user pandas -pip3 install --user matplotlib +export LD_LIBRARY_PATH=$HOME/.mujoco/mujoco200/bin:$LD_LIBRARY_PATH +export MUJOCO_PY_MJPRO_PATH=$HOME/.mujoco/mujoco200/ +export MUJOCO_PY_MJKEY_PATH=$HOME/.mujoco/mjkey.txt +sudo apt install libosmesa6-dev ``` +Maybe libosmesa6-dev could be included inside `conda-env.yml`, but I could not find a suitable channel for it. + Train the agent: ``` diff --git a/bigger.png b/bigger.png new file mode 100644 index 0000000..3123190 Binary files /dev/null and b/bigger.png differ diff --git a/conda-env.yml b/conda-env.yml index 71c534b..4cf08cb 100644 --- a/conda-env.yml +++ b/conda-env.yml @@ -6,5 +6,10 @@ dependencies: - python=3.8 - pip - git + - mesalib + - patchelf + - ffmpeg + - cudatoolkit=10.1.243 + - cudnn=7.6.5 - pip: - -r file:requirements.txt diff --git a/dreamer.py b/dreamer.py index 7f8d262..d7a6fb3 100644 --- a/dreamer.py +++ b/dreamer.py @@ -39,10 +39,10 @@ def define_config(): config.gpu_growth = True config.precision = 16 # Environment. - config.task = 'dmc_FetchReach-v1' + config.task = 'robotics_FetchReach-v1' config.envs = 1 config.parallel = 'none' - config.action_repeat = 2 + config.action_repeat = 1 config.time_limit = 1000 config.prefill = 5000 config.eval_noise = 0.0 @@ -61,7 +61,7 @@ def define_config(): config.weight_decay = 0.0 config.weight_decay_pattern = r'.*' # Training. - config.batch_size = 50 + config.batch_size = 100 config.batch_length = 50 config.train_every = 1000 config.train_steps = 100 @@ -163,7 +163,9 @@ def _train(self, data, log_images): image_pred = self._decode(feat) reward_pred = self._reward(feat) likes = tools.AttrDict() - likes.image = tf.reduce_mean(image_pred.log_prob(data['image'])) + # print(image_pred.log_prob(data['image']).shape) # TODO: delete this debug line + # print(image_pred.log_prob(data['image'])) # TODO: delete this debug line + likes.image = tf.reduce_mean(image_pred.log_prob(data['image'])) # TODO: weight red channel stronger? -> not needed likes.reward = tf.reduce_mean(reward_pred.log_prob(data['reward'])) if self._c.pcont: pcont_pred = self._pcont(feat) @@ -308,6 +310,7 @@ def _image_summaries(self, data, embed, image_pred): model = tf.concat([recon[:, :5] + 0.5, openl + 0.5], 1) error = (model - truth + 1) / 2 openl = tf.concat([truth, model, error], 2) + print(f'Writing image summary with step {tf.summary.experimental.get_step()}') # TODO delete this debug line tools.graph_summary( self._writer, tools.video_summary, 'agent/openl', openl) diff --git a/image (3rd copy).png b/image (3rd copy).png new file mode 100644 index 0000000..9050b15 Binary files /dev/null and b/image (3rd copy).png differ diff --git a/image (4th copy).png b/image (4th copy).png new file mode 100644 index 0000000..8445794 Binary files /dev/null and b/image (4th copy).png differ diff --git a/image (another copy).png b/image (another copy).png new file mode 100644 index 0000000..562b65a Binary files /dev/null and b/image (another copy).png differ diff --git a/image (copy).png b/image (copy).png new file mode 100644 index 0000000..78d11b5 Binary files /dev/null and b/image (copy).png differ diff --git a/image.png b/image.png index eef211b..3d753b8 100644 Binary files a/image.png and b/image.png differ diff --git a/individualImage-hand.gif b/individualImage-hand.gif new file mode 100644 index 0000000..35a2a6b Binary files /dev/null and b/individualImage-hand.gif differ diff --git a/lsjhf.gif b/lsjhf.gif new file mode 100644 index 0000000..9ab6f60 Binary files /dev/null and b/lsjhf.gif differ diff --git a/wrappers.py b/wrappers.py index 0b3dd25..68b7bbf 100644 --- a/wrappers.py +++ b/wrappers.py @@ -152,7 +152,7 @@ class Robotics: def __init__(self, name, size=(64, 64)): import gym with self.LOCK: - self._env = gym.make(name) + self._env = gym.make(name, reward_type='sparse') # TODO: Reward type dense or sparse self._size = size @property @@ -185,10 +185,14 @@ def reset(self): return obs def render(self, *args, **kwargs): - image = self._env.render(mode='rgb_array', width=self._size[0], height=self._size[1]) + # image = self._env.render(mode='rgb_array', width=128, height=128) # fetch reach env + # image = Image.fromarray(image).crop((28, 22, 28+self._size[0], 22+self._size[1])) # fetch reach env + image = self._env.render(mode='rgb_array', width=90, height=90) # hand reach env + image = Image.fromarray(image).crop((30, 15, 30+self._size[0], 15+self._size[1])) # fetch reach env # image = np.array(Image.fromarray(image).resize(self._size, Image.BILINEAR)) - image = np.clip(image, 0, 255).astype(np.uint8) - return image + # image = np.clip(image, 0, 255).astype(np.uint8) + self.save(np.array(image)) # Uncomment this line if you want to save an image locally + return np.array(image) def combine(self, image, goal_image): pass