From c06acbb8b16313947be2232b3f195391b8d36180 Mon Sep 17 00:00:00 2001 From: DenDen047 Date: Mon, 7 Oct 2024 18:57:20 +0200 Subject: [PATCH 1/6] feat: add .gitignore --- .gitignore | 214 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 214 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8508aad --- /dev/null +++ b/.gitignore @@ -0,0 +1,214 @@ +# Created by https://www.toptal.com/developers/gitignore/api/macos,python +# Edit at https://www.toptal.com/developers/gitignore?templates=macos,python + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +# End of https://www.toptal.com/developers/gitignore/api/macos,python + + +data/** +checkpoints/** +output/** \ No newline at end of file From 9dcdef6dba0447aef69fd1716ff1237f16f6dfbe Mon Sep 17 00:00:00 2001 From: DenDen047 Date: Mon, 7 Oct 2024 23:03:48 +0200 Subject: [PATCH 2/6] feat: visualize the depth map with user-defined camera parameters --- src/tools/visualize_depth.py | 61 ++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 src/tools/visualize_depth.py diff --git a/src/tools/visualize_depth.py b/src/tools/visualize_depth.py new file mode 100644 index 0000000..3decb0f --- /dev/null +++ b/src/tools/visualize_depth.py @@ -0,0 +1,61 @@ +import argparse +import os +import numpy as np +from PIL import Image +import open3d as o3d + +import plotly.graph_objects as go + + +def parse_args(): + parser = argparse.ArgumentParser(description="Visualize depth maps") + parser.add_argument("--input_depth", type=str, required=True, help="Path to the input depth map (npz file)") + parser.add_argument("--input_image", type=str, required=True, help="Path to the input image (png file)") + return parser.parse_args() + + +def visualize_depth_with_open3d(depth, image_rgb): + # Convert numpy arrays to Open3D Image objects + color_image = o3d.geometry.Image(image_rgb) + depth_image = o3d.geometry.Image(depth) + + rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth( + color_image, depth_image, + depth_scale=1.0, # Adjust this value based on your depth scale + depth_trunc=100.0, # Adjust this value based on your depth range + convert_rgb_to_intensity=False + ) + + pcd = o3d.geometry.PointCloud.create_from_rgbd_image( + rgbd, + o3d.camera.PinholeCameraIntrinsic( + width=depth.shape[1], + height=depth.shape[0], + fx=1500.0, + fy=1500.0, + cx=depth.shape[1] / 2, + cy=depth.shape[0] / 2 + ) + ) + + # flip the orientation, so it looks upright, not upside-down + pcd.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]) + + # Create coordinate frame + coordinate_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0]) + # Visualize the point cloud with coordinate frame + o3d.visualization.draw_geometries([pcd, coordinate_frame]) # visualize the point cloud with axes + + +if __name__ == "__main__": + args = parse_args() + + # load depth map + with np.load(args.input_depth) as data: + depth = data["depth"] + + # load image + img_pil = Image.open(args.input_image).convert("RGB") + image = np.array(img_pil) + + visualize_depth_with_open3d(depth, image) From 7c70d37094e347822e6fc171090367720d20578d Mon Sep 17 00:00:00 2001 From: DenDen047 Date: Tue, 8 Oct 2024 00:51:42 +0200 Subject: [PATCH 3/6] feat: add focal_length arg --- src/tools/visualize_depth.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/tools/visualize_depth.py b/src/tools/visualize_depth.py index 3decb0f..349ac7d 100644 --- a/src/tools/visualize_depth.py +++ b/src/tools/visualize_depth.py @@ -11,10 +11,11 @@ def parse_args(): parser = argparse.ArgumentParser(description="Visualize depth maps") parser.add_argument("--input_depth", type=str, required=True, help="Path to the input depth map (npz file)") parser.add_argument("--input_image", type=str, required=True, help="Path to the input image (png file)") + parser.add_argument("--focal_length", type=float, default=1500.0, help="Focal length of the camera") return parser.parse_args() -def visualize_depth_with_open3d(depth, image_rgb): +def visualize_depth_with_open3d(depth, image_rgb, focal_length=1500.0): # Convert numpy arrays to Open3D Image objects color_image = o3d.geometry.Image(image_rgb) depth_image = o3d.geometry.Image(depth) @@ -31,8 +32,8 @@ def visualize_depth_with_open3d(depth, image_rgb): o3d.camera.PinholeCameraIntrinsic( width=depth.shape[1], height=depth.shape[0], - fx=1500.0, - fy=1500.0, + fx=focal_length, + fy=focal_length, cx=depth.shape[1] / 2, cy=depth.shape[0] / 2 ) @@ -44,7 +45,9 @@ def visualize_depth_with_open3d(depth, image_rgb): # Create coordinate frame coordinate_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0]) # Visualize the point cloud with coordinate frame - o3d.visualization.draw_geometries([pcd, coordinate_frame]) # visualize the point cloud with axes + # Set a smaller point size for the point cloud + o3d.visualization.draw_geometries( + [pcd, coordinate_frame], point_show_normal=True) # Reduced point size to 1 if __name__ == "__main__": @@ -58,4 +61,4 @@ def visualize_depth_with_open3d(depth, image_rgb): img_pil = Image.open(args.input_image).convert("RGB") image = np.array(img_pil) - visualize_depth_with_open3d(depth, image) + visualize_depth_with_open3d(depth, image, focal_length=args.focal_length) From 733ab4dc90e3f3569bbcff54db52a52026bf4f01 Mon Sep 17 00:00:00 2001 From: DenDen047 Date: Tue, 8 Oct 2024 00:52:00 +0200 Subject: [PATCH 4/6] feat: update .gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 8508aad..2154994 100644 --- a/.gitignore +++ b/.gitignore @@ -211,4 +211,5 @@ pyrightconfig.json data/** checkpoints/** -output/** \ No newline at end of file +output/** +.vscode/** \ No newline at end of file From c3438e368992c2bc0281fcf1b2043c1d0e2bec34 Mon Sep 17 00:00:00 2001 From: DenDen047 Date: Tue, 8 Oct 2024 00:53:24 +0200 Subject: [PATCH 5/6] feat: rename the RGBD visualizer --- src/tools/{visualize_depth.py => visualize_rgbd.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/tools/{visualize_depth.py => visualize_rgbd.py} (100%) diff --git a/src/tools/visualize_depth.py b/src/tools/visualize_rgbd.py similarity index 100% rename from src/tools/visualize_depth.py rename to src/tools/visualize_rgbd.py From 64a3edc7cfdc61f4e49441b77075c63a54897fa4 Mon Sep 17 00:00:00 2001 From: DenDen047 Date: Tue, 8 Oct 2024 00:54:28 +0200 Subject: [PATCH 6/6] feat: update README --- README.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6c4ea61..f89b07c 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ ## Depth Pro: Sharp Monocular Metric Depth in Less Than a Second This software project accompanies the research paper: -**[Depth Pro: Sharp Monocular Metric Depth in Less Than a Second](https://arxiv.org/abs/2410.02073)**, +**[Depth Pro: Sharp Monocular Metric Depth in Less Than a Second](https://arxiv.org/abs/2410.02073)**, *Aleksei Bochkovskii, Amaƫl Delaunoy, Hugo Germain, Marcel Santos, Yichao Zhou, Stephan R. Richter, and Vladlen Koltun*. ![](data/depth-pro-teaser.jpg) @@ -57,7 +57,7 @@ focallength_px = prediction["focallength_px"] # Focal length in pixels. ``` -### Evaluation (boundary metrics) +### Evaluation (boundary metrics) Our boundary metrics can be found under `eval/boundary_metrics.py` and used as follows: @@ -65,10 +65,16 @@ Our boundary metrics can be found under `eval/boundary_metrics.py` and used as f # for a depth-based dataset boundary_f1 = SI_boundary_F1(predicted_depth, target_depth) -# for a mask-based dataset (image matting / segmentation) +# for a mask-based dataset (image matting / segmentation) boundary_recall = SI_boundary_Recall(predicted_depth, target_mask) ``` +### Visualization + +```bash +python src/tools/visualize_rgbd.py --input_depth output/depth.npz --input_image data/rgb.jpeg --focal_length 1500.0 +``` + ## Citation