machine transfer

epistoteles · Jul 7, 2021 · 04156bb · 04156bb
1 parent 84c32cd
commit 04156bb
Show file tree

Hide file tree

Showing 12 changed files with 32 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -15,16 +15,22 @@ This code is building upon Dreamer:
 
 ## Instructions
 
-Get dependencies:
+Create conda environment with all dependencies:
+```
+conda env create --file conda-env.yml
+```
+This already installs the requirements in `requirements.txt` for you. Make sure you have MuJoCo set up on your machine beforehand (typically in /home/yourname/.mujoco/mujoco_200/). This is not done by conda for you!
+Besides the steps in the MoJoCo documentation, I also had to run:
 
 ```
-pip3 install --user tensorflow-gpu==2.2.0
-pip3 install --user tensorflow_probability
-pip3 install --user git+git://github.com/deepmind/dm_control.git
-pip3 install --user pandas
-pip3 install --user matplotlib
+export LD_LIBRARY_PATH=$HOME/.mujoco/mujoco200/bin:$LD_LIBRARY_PATH
+export MUJOCO_PY_MJPRO_PATH=$HOME/.mujoco/mujoco200/
+export MUJOCO_PY_MJKEY_PATH=$HOME/.mujoco/mjkey.txt
+sudo apt install libosmesa6-dev
 ```
 
+Maybe libosmesa6-dev could be included inside `conda-env.yml`, but I could not find a suitable channel for it.
+
 Train the agent:
 
 ```

diff --git a/bigger.png b/bigger.png
diff --git a/conda-env.yml b/conda-env.yml
@@ -6,5 +6,10 @@ dependencies:
    - python=3.8
    - pip
    - git
+   - mesalib
+   - patchelf
+   - ffmpeg
+   - cudatoolkit=10.1.243
+   - cudnn=7.6.5
    - pip:
        - -r file:requirements.txt
diff --git a/dreamer.py b/dreamer.py
@@ -39,10 +39,10 @@ def define_config():
     config.gpu_growth = True
     config.precision = 16
     # Environment.
-    config.task = 'dmc_FetchReach-v1'
+    config.task = 'robotics_FetchReach-v1'
     config.envs = 1
     config.parallel = 'none'
-    config.action_repeat = 2
+    config.action_repeat = 1
     config.time_limit = 1000
     config.prefill = 5000
     config.eval_noise = 0.0
@@ -61,7 +61,7 @@ def define_config():
     config.weight_decay = 0.0
     config.weight_decay_pattern = r'.*'
     # Training.
-    config.batch_size = 50
+    config.batch_size = 100
     config.batch_length = 50
     config.train_every = 1000
     config.train_steps = 100
@@ -163,7 +163,9 @@ def _train(self, data, log_images):
             image_pred = self._decode(feat)
             reward_pred = self._reward(feat)
             likes = tools.AttrDict()
-            likes.image = tf.reduce_mean(image_pred.log_prob(data['image']))
+            # print(image_pred.log_prob(data['image']).shape)  # TODO: delete this debug line
+            # print(image_pred.log_prob(data['image']))  # TODO: delete this debug line
+            likes.image = tf.reduce_mean(image_pred.log_prob(data['image']))  # TODO: weight red channel stronger? -> not needed
             likes.reward = tf.reduce_mean(reward_pred.log_prob(data['reward']))
             if self._c.pcont:
                 pcont_pred = self._pcont(feat)
@@ -308,6 +310,7 @@ def _image_summaries(self, data, embed, image_pred):
         model = tf.concat([recon[:, :5] + 0.5, openl + 0.5], 1)
         error = (model - truth + 1) / 2
         openl = tf.concat([truth, model, error], 2)
+        print(f'Writing image summary with step {tf.summary.experimental.get_step()}')  # TODO delete this debug line
         tools.graph_summary(
             self._writer, tools.video_summary, 'agent/openl', openl)
 

diff --git a/image (3rd copy).png b/image (3rd copy).png
diff --git a/image (4th copy).png b/image (4th copy).png
diff --git a/image (another copy).png b/image (another copy).png
diff --git a/image (copy).png b/image (copy).png
diff --git a/image.png b/image.png
diff --git a/individualImage-hand.gif b/individualImage-hand.gif
diff --git a/lsjhf.gif b/lsjhf.gif
diff --git a/wrappers.py b/wrappers.py
@@ -152,7 +152,7 @@ class Robotics:
     def __init__(self, name, size=(64, 64)):
         import gym
         with self.LOCK:
-            self._env = gym.make(name)
+            self._env = gym.make(name, reward_type='sparse')  # TODO: Reward type dense or sparse
             self._size = size
 
     @property
@@ -185,10 +185,14 @@ def reset(self):
         return obs
 
     def render(self, *args, **kwargs):
-        image = self._env.render(mode='rgb_array', width=self._size[0], height=self._size[1])
+        # image = self._env.render(mode='rgb_array', width=128, height=128)  # fetch reach env
+        # image = Image.fromarray(image).crop((28, 22, 28+self._size[0], 22+self._size[1]))  # fetch reach env
+        image = self._env.render(mode='rgb_array', width=90, height=90)  # hand reach env
+        image = Image.fromarray(image).crop((30, 15, 30+self._size[0], 15+self._size[1]))  # fetch reach env
         # image = np.array(Image.fromarray(image).resize(self._size, Image.BILINEAR))
-        image = np.clip(image, 0, 255).astype(np.uint8)
-        return image
+        # image = np.clip(image, 0, 255).astype(np.uint8)
+        self.save(np.array(image))  # Uncomment this line if you want to save an image locally
+        return np.array(image)
 
     def combine(self, image, goal_image):
         pass