vikashplus · vikashplus · Apr 18, 2024 · Apr 20, 2023 · Nov 4, 2023 · Nov 13, 2023
diff --git a/robohive/renderer/mj_renderer.py b/robohive/renderer/mj_renderer.py
@@ -114,7 +114,7 @@ def render_offscreen(self,
             self._renderer.enable_depth_rendering()
             self._renderer.update_scene(self._sim.data.ptr, camera=camera_id, scene_option=self._scene_option)
             dpt_arr = self._renderer.render()
-            dpt_arr = dpt_arr[::-1, :]
+            # dpt_arr = dpt_arr[::-1, :]
             self._renderer.disable_depth_rendering()
         if segmentation:
             self._renderer.enable_segmentation_rendering()

diff --git a/robohive/tutorials/3_get_obs_proprio_extero.ipynb b/robohive/tutorials/3_get_obs_proprio_extero.ipynb
@@ -135,6 +135,60 @@
     "print(f\"visual_dict = {env.visual_dict.keys()}\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. convert depth into a point cloud\n",
+    "\n",
+    "If you need a point cloud observation, you can convert the depth image into the point cloud in the following way.\n",
+    "\n",
+    "First, use env.get_visuals to obtain the depth image in the obs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "obs = env.get_visuals(visual_keys=['rgb:vil_camera:224x224:2d', 'd:vil_camera:224x224:2d'])\n",
+    "color = obs['rgb:vil_camera:224x224:2d']\n",
+    "depth = obs['d:vil_camera:224x224:2d'][0]\n",
+    "plt.imshow(color)\n",
+    "plt.show()\n",
+    "plt.imshow(depth)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then we convert the depth image into a point cloud based on camera intrinsics and extrinsics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from robohive.utils.pointcloud_utils import get_point_cloud, visualize_point_cloud_from_nparray\n",
+    "pcd = get_point_cloud(env, obs['d:vil_camera:224x224:2d'][0], 'vil_camera')\n",
+    "\n",
+    "# # Interactive point cloud visualization with open3d (requires open3d installation)\n",
+    "# visualize_point_cloud_from_nparray(pcd, obs['rgb:vil_camera:224x224:2d'], vis_coordinate=True)\n",
+    "\n",
+    "# Visualize the point cloud with matplotlib (if you are too lazy to install open3d)\n",
+    "import matplotlib.pyplot as plt\n",
+    "fig = plt.figure()\n",
+    "ax = fig.add_subplot(projection='3d')\n",
+    "ax.scatter(pcd[:,0], pcd[:,1], pcd[:,2], c=obs['rgb:vil_camera:224x224:2d'].reshape(-1, 3)/256)\n",
+    "plt.show()\n"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -202,7 +256,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.16"
+   "version": "3.10.13"
   },
   "orig_nbformat": 4
  },

diff --git a/robohive/utils/pointcloud_utils.py b/robohive/utils/pointcloud_utils.py
@@ -0,0 +1,108 @@
+import numpy as np
+from robohive.utils.quat_math import mulQuat, euler2quat, quat2mat
+
+
+# ------ MuJoCo specific functions ------
+
+def get_point_cloud(env, depth, camera_name):
+    # Make sure the depth values are in meters. If the depth comes 
+    # from robohive, it is already in meters. If it directly comes 
+    # from mujoco, you need to use the convert_depth function below. 
+    # Output is flattened. Each row is a point in 3D space.
+    fovy = env.sim.model.cam_fovy[env.sim.model.camera_name2id(camera_name)]
+    K = get_intrinsics(fovy, depth.shape[0], depth.shape[1])
+    pc = depth2xyz(depth, K)
+    pc = pc.reshape(-1, 3)
+
+    transform = get_extrinsics(env, camera_name=camera_name)
+    new_pc = np.ones((pc.shape[0], 4))
+    new_pc[:, :3] = pc
+    new_pc = (transform @ new_pc.transpose()).transpose()
+    return new_pc[:, :-1]
+
+
+def convert_depth(env, depth):
+    # Convert raw depth values into meters
+    # Check this as well: https://github.com/deepmind/dm_control/blob/master/dm_control/mujoco/engine.py#L734
+    extent = env.sim.model.stat.extent
+    near = env.sim.model.vis.map.znear * extent
+    far = env.sim.model.vis.map.zfar * extent
+    depth_m = depth * 2 - 1
+    depth_m = (2 * near * far) / (far + near - depth_m * (far - near))
+    return depth_m
+
+
+def get_extrinsics(env, camera_name):
+    # Transformation from camera frame to world frame
+    cam_id = env.sim.model.camera_name2id(camera_name)
+    cam_pos = env.sim.model.cam_pos[cam_id]
+    cam_quat = env.sim.model.cam_quat[cam_id]
+    cam_quat = mulQuat(cam_quat, euler2quat([np.pi, 0, 0]))
+    return get_transformation_matrix(cam_pos, cam_quat)
+
+
+def get_transformation_matrix(pos, quat):
+    # Convert the pose from MuJoCo format to a 4x4 transformation matrix
+    arr = np.identity(4)
+    arr[:3, :3] = quat2mat(quat)
+    arr[:3, 3] = pos
+    return arr
+
+
+# ------ General functions ------
+
+def get_intrinsics(fovy, img_width, img_height):
+    # Get the camera intrinsics matrix
+    aspect = float(img_width) / img_height
+    fovx = 2 * np.arctan(np.tan(np.deg2rad(fovy) * 0.5) * aspect)
+    fovx = np.rad2deg(fovx)
+    cx = img_width / 2.
+    cy = img_height / 2.
+    fx = cx / np.tan(np.deg2rad(fovx / 2.))
+    fy = cy / np.tan(np.deg2rad(fovy / 2.))
+    K = np.zeros((3,3), dtype=np.float64)
+    K[2][2] = 1
+    K[0][0] = fx
+    K[1][1] = fy
+    K[0][2] = cx
+    K[1][2] = cy
+    return K
+
+
+def depth2xyz(depth, cam_K):
+    # Convert depth image to point cloud
+    h, w = depth.shape
+    ymap, xmap = np.meshgrid(np.arange(w), np.arange(h))
+
+    x = ymap
+    y = xmap
+    z = depth
+
+    x = (x - cam_K[0,2]) * z / cam_K[0,0]
+    y = (y - cam_K[1,2]) * z / cam_K[1,1]
+
+    xyz = np.stack([x, y, z], axis=2)
+    return xyz
+
+
+def visualize_point_cloud_from_nparray(d, c=None, vis_coordinate=False):
+    # Visualize a point cloud using open3d
+    if c is not None:
+        if len(c.shape) == 3:
+            c = c.reshape(-1, 3)
+        if c.max() > 1:
+            c = c.astype(np.float64)/256
+
+    import open3d as o3d
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(d)
+    if c is not None:
+        pcd.colors = o3d.utility.Vector3dVector(c)
+
+    if vis_coordinate:
+        # Visualize coordinate frame
+        mesh = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5)
+        o3d.visualization.draw_geometries([mesh, pcd])
+    else:
+        o3d.visualization.draw_geometries([pcd])
+