From 1c8c448ed86aced2e160e322e7067c2d6a3dfac5 Mon Sep 17 00:00:00 2001
From: Wenxuan Zhou <wenxuanz@andrew.cmu.edu>
Date: Thu, 20 Apr 2023 00:03:16 -0400
Subject: [PATCH 1/3] Create pointcloud_utils.py

---
 robohive/utils/pointcloud_utils.py | 112 +++++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 robohive/utils/pointcloud_utils.py

diff --git a/robohive/utils/pointcloud_utils.py b/robohive/utils/pointcloud_utils.py
new file mode 100644
index 00000000..6d7d1d3f
--- /dev/null
+++ b/robohive/utils/pointcloud_utils.py
@@ -0,0 +1,112 @@
+import numpy as np
+from utils.rotations import *
+import cv2
+
+
+# -------------------- Generic ----------------------------
+def get_intrinsics(fovy, img_width, img_height):
+    # fovy = self.sim.model.cam_fovy[cam_no]
+    aspect = float(img_width) / img_height
+    fovx = 2 * np.arctan(np.tan(np.deg2rad(fovy) * 0.5) * aspect)
+    fovx = np.rad2deg(fovx)
+    cx = img_width / 2.
+    cy = img_height / 2.
+    fx = cx / np.tan(np.deg2rad(fovx / 2.))
+    fy = cy / np.tan(np.deg2rad(fovy / 2.))
+    K = np.zeros((3,3), dtype=np.float64)
+    K[2][2] = 1
+    K[0][0] = fx
+    K[1][1] = fy
+    K[0][2] = cx
+    K[1][2] = cy
+    return K
+
+
+def depth2xyz(depth, cam_K):
+    h, w = depth.shape
+    ymap, xmap = np.meshgrid(np.arange(w), np.arange(h))
+
+    x = ymap
+    y = xmap
+    z = depth
+
+    x = (x - cam_K[0,2]) * z / cam_K[0,0]
+    y = (y - cam_K[1,2]) * z / cam_K[1,1]
+
+    xyz = np.stack([x, y, z], axis=2)
+    return xyz
+
+
+def visualize_point_cloud_from_nparray(d, c=None, vis_coordinate=False):
+    if c is not None:
+        if len(c.shape) == 3:
+            c = c.reshape(-1, 3)
+        if c.max() > 1:
+            c = c.astype(np.float64)/256
+
+    import open3d as o3d
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(d)
+    if c is not None:
+        pcd.colors = o3d.utility.Vector3dVector(c)
+
+    if vis_coordinate:
+        mesh = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5)
+        o3d.visualization.draw_geometries([mesh, pcd])
+    else:
+        o3d.visualization.draw_geometries([pcd])
+
+
+# -------------------- MuJoCo Specific ----------------------------
+def get_transformation_matrix(pos, quat):
+    arr = np.identity(4)
+    arr[:3, :3] = quat2mat(quat)
+    arr[:3, 3] = pos
+    return arr
+
+
+def get_transformation(env, camera_name=None):
+    if camera_name is None:
+        camera_name = env.camera_names[0]
+    cam_id = env.sim.model.camera_name2id(camera_name)
+    cam_pos = env.sim.model.cam_pos[cam_id]
+    cam_quat = env.sim.model.cam_quat[cam_id]
+    cam_quat = quat_mul(cam_quat, euler2quat([np.pi, 0, 0]))
+    return get_transformation_matrix(cam_pos, cam_quat)
+
+
+def convert_depth(env, depth):
+    # Convert depth into meter
+    extent = env.sim.model.stat.extent
+    near = env.sim.model.vis.map.znear * extent
+    far = env.sim.model.vis.map.zfar * extent
+    depth_m = depth * 2 - 1
+    depth_m = (2 * near * far) / (far + near - depth_m * (far - near))
+    # Check this as well: https://github.com/deepmind/dm_control/blob/master/dm_control/mujoco/engine.py#L734
+    return depth_m
+
+
+def get_object_point_cloud(env, depth, img):
+    depth = convert_depth(env, depth)
+    full_pc = get_point_cloud(env, depth)
+    obj_mask = get_obj_mask(img)
+    pc = full_pc[obj_mask.reshape(-1),:]
+    return pc
+
+
+def get_point_cloud(env, depth, camera_name=None):
+    # make sure to convert the raw depth image from MuJoCo using convert_depth
+    # output is flattened
+    if camera_name is None:
+        camera_name = env.camera_names[0]
+    fovy = env.sim.model.cam_fovy[env.sim.model.camera_name2id(camera_name)]
+    K = get_intrinsics(fovy, depth.shape[0], depth.shape[1])
+    pc = depth2xyz(depth, K)
+    pc = pc.reshape(-1, 3)
+
+    transform = get_transformation(env, camera_name=camera_name)
+    new_pc = np.ones((pc.shape[0], 4))
+    new_pc[:, :3] = pc
+    new_pc = (transform @ new_pc.transpose()).transpose()
+    return new_pc[:, :-1]
+

From eb8faa5ce5e85de82d13702e6e595e3600f6b428 Mon Sep 17 00:00:00 2001
From: Wenxuan Zhou <wenxuanz@andrew.cmu.edu>
Date: Sun, 12 Nov 2023 16:23:42 -0800
Subject: [PATCH 2/3] point cloud utility functions with an example

---
 .../tutorials/3_get_obs_proprio_extero.ipynb  |  50 ++++++++
 robohive/utils/pointcloud_utils.py            | 112 +++++++++---------
 2 files changed, 104 insertions(+), 58 deletions(-)

diff --git a/robohive/tutorials/3_get_obs_proprio_extero.ipynb b/robohive/tutorials/3_get_obs_proprio_extero.ipynb
index d39e8436..4d8d93c2 100644
--- a/robohive/tutorials/3_get_obs_proprio_extero.ipynb
+++ b/robohive/tutorials/3_get_obs_proprio_extero.ipynb
@@ -135,6 +135,56 @@
     "print(f\"visual_dict = {env.visual_dict.keys()}\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. convert depth into a point cloud\n",
+    "\n",
+    "If you need a point cloud observation, you can convert the depth image into the point cloud in the following way.\n",
+    "\n",
+    "First, use env.get_visuals to obtain the depth image in the obs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "obs = env.get_visuals(visual_keys=['rgb:vil_camera:224x224:2d', 'd:vil_camera:224x224:2d'])\n",
+    "depth = obs['d:vil_camera:224x224:2d'][0][-1::-1] # for some reason the depth image is upside down?\n",
+    "plt.imshow(depth)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then we convert the depth image into a point cloud based on camera intrinsics and extrinsics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from robohive.utils.pointcloud_utils import get_point_cloud, visualize_point_cloud_from_nparray\n",
+    "pcd = get_point_cloud(env, obs['d:vil_camera:224x224:2d'][0][-1::-1], 'vil_camera')\n",
+    "\n",
+    "# # Interactive point cloud visualization with open3d (requires open3d installation)\n",
+    "# visualize_point_cloud_from_nparray(pcd, obs['rgb:vil_camera:224x224:2d'], vis_coordinate=True)\n",
+    "\n",
+    "# Visualize the point cloud with matplotlib (if you are too lazy to install open3d)\n",
+    "import matplotlib.pyplot as plt\n",
+    "fig = plt.figure()\n",
+    "ax = fig.add_subplot(projection='3d')\n",
+    "ax.scatter(pcd[:,0], pcd[:,1], pcd[:,2], c=obs['rgb:vil_camera:224x224:2d'].reshape(-1, 3)/256)\n",
+    "plt.show()\n"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",
diff --git a/robohive/utils/pointcloud_utils.py b/robohive/utils/pointcloud_utils.py
index 6d7d1d3f..7ada4c33 100644
--- a/robohive/utils/pointcloud_utils.py
+++ b/robohive/utils/pointcloud_utils.py
@@ -1,11 +1,58 @@
 import numpy as np
-from utils.rotations import *
-import cv2
+from robohive.utils.quat_math import mulQuat, euler2quat, quat2mat
 
 
-# -------------------- Generic ----------------------------
+# ------ MuJoCo specific functions ------
+
+def get_point_cloud(env, depth, camera_name):
+    # Make sure the depth values are in meters. If the depth comes 
+    # from robohive, it is already in meters. If it directly comes 
+    # from mujoco, you need to use the convert_depth function below. 
+    # Output is flattened. Each row is a point in 3D space.
+    fovy = env.sim.model.cam_fovy[env.sim.model.camera_name2id(camera_name)]
+    K = get_intrinsics(fovy, depth.shape[0], depth.shape[1])
+    pc = depth2xyz(depth, K)
+    pc = pc.reshape(-1, 3)
+
+    transform = get_extrinsics(env, camera_name=camera_name)
+    new_pc = np.ones((pc.shape[0], 4))
+    new_pc[:, :3] = pc
+    new_pc = (transform @ new_pc.transpose()).transpose()
+    return new_pc[:, :-1]
+
+
+def convert_depth(env, depth):
+    # Convert raw depth values into meters
+    # Check this as well: https://github.com/deepmind/dm_control/blob/master/dm_control/mujoco/engine.py#L734
+    extent = env.sim.model.stat.extent
+    near = env.sim.model.vis.map.znear * extent
+    far = env.sim.model.vis.map.zfar * extent
+    depth_m = depth * 2 - 1
+    depth_m = (2 * near * far) / (far + near - depth_m * (far - near))
+    return depth_m
+
+
+def get_extrinsics(env, camera_name):
+    # Transformation from camera frame to world frame
+    cam_id = env.sim.model.camera_name2id(camera_name)
+    cam_pos = env.sim.model.cam_pos[cam_id]
+    cam_quat = env.sim.model.cam_quat[cam_id]
+    cam_quat = mulQuat(cam_quat, euler2quat([np.pi, 0, 0]))
+    return get_transformation_matrix(cam_pos, cam_quat)
+
+
+def get_transformation_matrix(pos, quat):
+    # Convert the pose from MuJoCo format to a 4x4 transformation matrix
+    arr = np.identity(4)
+    arr[:3, :3] = quat2mat(quat)
+    arr[:3, 3] = pos
+    return arr
+
+
+# ------ General functions ------
+
 def get_intrinsics(fovy, img_width, img_height):
-    # fovy = self.sim.model.cam_fovy[cam_no]
+    # Get the camera intrinsics matrix
     aspect = float(img_width) / img_height
     fovx = 2 * np.arctan(np.tan(np.deg2rad(fovy) * 0.5) * aspect)
     fovx = np.rad2deg(fovx)
@@ -23,6 +70,7 @@ def get_intrinsics(fovy, img_width, img_height):
 
 
 def depth2xyz(depth, cam_K):
+    # Convert depth image to point cloud
     h, w = depth.shape
     ymap, xmap = np.meshgrid(np.arange(w), np.arange(h))
 
@@ -38,6 +86,7 @@ def depth2xyz(depth, cam_K):
 
 
 def visualize_point_cloud_from_nparray(d, c=None, vis_coordinate=False):
+    # Visualize a point cloud using open3d
     if c is not None:
         if len(c.shape) == 3:
             c = c.reshape(-1, 3)
@@ -51,62 +100,9 @@ def visualize_point_cloud_from_nparray(d, c=None, vis_coordinate=False):
         pcd.colors = o3d.utility.Vector3dVector(c)
 
     if vis_coordinate:
+        # Visualize coordinate frame
         mesh = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5)
         o3d.visualization.draw_geometries([mesh, pcd])
     else:
         o3d.visualization.draw_geometries([pcd])
 
-
-# -------------------- MuJoCo Specific ----------------------------
-def get_transformation_matrix(pos, quat):
-    arr = np.identity(4)
-    arr[:3, :3] = quat2mat(quat)
-    arr[:3, 3] = pos
-    return arr
-
-
-def get_transformation(env, camera_name=None):
-    if camera_name is None:
-        camera_name = env.camera_names[0]
-    cam_id = env.sim.model.camera_name2id(camera_name)
-    cam_pos = env.sim.model.cam_pos[cam_id]
-    cam_quat = env.sim.model.cam_quat[cam_id]
-    cam_quat = quat_mul(cam_quat, euler2quat([np.pi, 0, 0]))
-    return get_transformation_matrix(cam_pos, cam_quat)
-
-
-def convert_depth(env, depth):
-    # Convert depth into meter
-    extent = env.sim.model.stat.extent
-    near = env.sim.model.vis.map.znear * extent
-    far = env.sim.model.vis.map.zfar * extent
-    depth_m = depth * 2 - 1
-    depth_m = (2 * near * far) / (far + near - depth_m * (far - near))
-    # Check this as well: https://github.com/deepmind/dm_control/blob/master/dm_control/mujoco/engine.py#L734
-    return depth_m
-
-
-def get_object_point_cloud(env, depth, img):
-    depth = convert_depth(env, depth)
-    full_pc = get_point_cloud(env, depth)
-    obj_mask = get_obj_mask(img)
-    pc = full_pc[obj_mask.reshape(-1),:]
-    return pc
-
-
-def get_point_cloud(env, depth, camera_name=None):
-    # make sure to convert the raw depth image from MuJoCo using convert_depth
-    # output is flattened
-    if camera_name is None:
-        camera_name = env.camera_names[0]
-    fovy = env.sim.model.cam_fovy[env.sim.model.camera_name2id(camera_name)]
-    K = get_intrinsics(fovy, depth.shape[0], depth.shape[1])
-    pc = depth2xyz(depth, K)
-    pc = pc.reshape(-1, 3)
-
-    transform = get_transformation(env, camera_name=camera_name)
-    new_pc = np.ones((pc.shape[0], 4))
-    new_pc[:, :3] = pc
-    new_pc = (transform @ new_pc.transpose()).transpose()
-    return new_pc[:, :-1]
-

From 424d2bedbccf55653a5c7f59f9a80c3c5b37c14f Mon Sep 17 00:00:00 2001
From: Wenxuan Zhou <wenxuanz@andrew.cmu.edu>
Date: Sat, 25 Nov 2023 23:04:38 -0800
Subject: [PATCH 3/3] flip back depth image

---
 robohive/renderer/mj_renderer.py                  |  2 +-
 robohive/tutorials/3_get_obs_proprio_extero.ipynb | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/robohive/renderer/mj_renderer.py b/robohive/renderer/mj_renderer.py
index 74daf8b5..dae39e63 100644
--- a/robohive/renderer/mj_renderer.py
+++ b/robohive/renderer/mj_renderer.py
@@ -114,7 +114,7 @@ def render_offscreen(self,
             self._renderer.enable_depth_rendering()
             self._renderer.update_scene(self._sim.data.ptr, camera=camera_id, scene_option=self._scene_option)
             dpt_arr = self._renderer.render()
-            dpt_arr = dpt_arr[::-1, :]
+            # dpt_arr = dpt_arr[::-1, :]
             self._renderer.disable_depth_rendering()
         if segmentation:
             self._renderer.enable_segmentation_rendering()
diff --git a/robohive/tutorials/3_get_obs_proprio_extero.ipynb b/robohive/tutorials/3_get_obs_proprio_extero.ipynb
index 4d8d93c2..ad9614f1 100644
--- a/robohive/tutorials/3_get_obs_proprio_extero.ipynb
+++ b/robohive/tutorials/3_get_obs_proprio_extero.ipynb
@@ -154,8 +154,12 @@
    "source": [
     "import matplotlib.pyplot as plt\n",
     "obs = env.get_visuals(visual_keys=['rgb:vil_camera:224x224:2d', 'd:vil_camera:224x224:2d'])\n",
-    "depth = obs['d:vil_camera:224x224:2d'][0][-1::-1] # for some reason the depth image is upside down?\n",
-    "plt.imshow(depth)"
+    "color = obs['rgb:vil_camera:224x224:2d']\n",
+    "depth = obs['d:vil_camera:224x224:2d'][0]\n",
+    "plt.imshow(color)\n",
+    "plt.show()\n",
+    "plt.imshow(depth)\n",
+    "plt.show()"
    ]
   },
   {
@@ -172,7 +176,7 @@
    "outputs": [],
    "source": [
     "from robohive.utils.pointcloud_utils import get_point_cloud, visualize_point_cloud_from_nparray\n",
-    "pcd = get_point_cloud(env, obs['d:vil_camera:224x224:2d'][0][-1::-1], 'vil_camera')\n",
+    "pcd = get_point_cloud(env, obs['d:vil_camera:224x224:2d'][0], 'vil_camera')\n",
     "\n",
     "# # Interactive point cloud visualization with open3d (requires open3d installation)\n",
     "# visualize_point_cloud_from_nparray(pcd, obs['rgb:vil_camera:224x224:2d'], vis_coordinate=True)\n",
@@ -252,7 +256,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.16"
+   "version": "3.10.13"
   },
   "orig_nbformat": 4
  },