diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2154994 --- /dev/null +++ b/.gitignore @@ -0,0 +1,215 @@ +# Created by https://www.toptal.com/developers/gitignore/api/macos,python +# Edit at https://www.toptal.com/developers/gitignore?templates=macos,python + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +# End of https://www.toptal.com/developers/gitignore/api/macos,python + + +data/** +checkpoints/** +output/** +.vscode/** \ No newline at end of file diff --git a/README.md b/README.md index 6c4ea61..f89b07c 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ ## Depth Pro: Sharp Monocular Metric Depth in Less Than a Second This software project accompanies the research paper: -**[Depth Pro: Sharp Monocular Metric Depth in Less Than a Second](https://arxiv.org/abs/2410.02073)**, +**[Depth Pro: Sharp Monocular Metric Depth in Less Than a Second](https://arxiv.org/abs/2410.02073)**, *Aleksei Bochkovskii, Amaƫl Delaunoy, Hugo Germain, Marcel Santos, Yichao Zhou, Stephan R. Richter, and Vladlen Koltun*. ![](data/depth-pro-teaser.jpg) @@ -57,7 +57,7 @@ focallength_px = prediction["focallength_px"] # Focal length in pixels. ``` -### Evaluation (boundary metrics) +### Evaluation (boundary metrics) Our boundary metrics can be found under `eval/boundary_metrics.py` and used as follows: @@ -65,10 +65,16 @@ Our boundary metrics can be found under `eval/boundary_metrics.py` and used as f # for a depth-based dataset boundary_f1 = SI_boundary_F1(predicted_depth, target_depth) -# for a mask-based dataset (image matting / segmentation) +# for a mask-based dataset (image matting / segmentation) boundary_recall = SI_boundary_Recall(predicted_depth, target_mask) ``` +### Visualization + +```bash +python src/tools/visualize_rgbd.py --input_depth output/depth.npz --input_image data/rgb.jpeg --focal_length 1500.0 +``` + ## Citation diff --git a/src/tools/visualize_rgbd.py b/src/tools/visualize_rgbd.py new file mode 100644 index 0000000..349ac7d --- /dev/null +++ b/src/tools/visualize_rgbd.py @@ -0,0 +1,64 @@ +import argparse +import os +import numpy as np +from PIL import Image +import open3d as o3d + +import plotly.graph_objects as go + + +def parse_args(): + parser = argparse.ArgumentParser(description="Visualize depth maps") + parser.add_argument("--input_depth", type=str, required=True, help="Path to the input depth map (npz file)") + parser.add_argument("--input_image", type=str, required=True, help="Path to the input image (png file)") + parser.add_argument("--focal_length", type=float, default=1500.0, help="Focal length of the camera") + return parser.parse_args() + + +def visualize_depth_with_open3d(depth, image_rgb, focal_length=1500.0): + # Convert numpy arrays to Open3D Image objects + color_image = o3d.geometry.Image(image_rgb) + depth_image = o3d.geometry.Image(depth) + + rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth( + color_image, depth_image, + depth_scale=1.0, # Adjust this value based on your depth scale + depth_trunc=100.0, # Adjust this value based on your depth range + convert_rgb_to_intensity=False + ) + + pcd = o3d.geometry.PointCloud.create_from_rgbd_image( + rgbd, + o3d.camera.PinholeCameraIntrinsic( + width=depth.shape[1], + height=depth.shape[0], + fx=focal_length, + fy=focal_length, + cx=depth.shape[1] / 2, + cy=depth.shape[0] / 2 + ) + ) + + # flip the orientation, so it looks upright, not upside-down + pcd.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]) + + # Create coordinate frame + coordinate_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0]) + # Visualize the point cloud with coordinate frame + # Set a smaller point size for the point cloud + o3d.visualization.draw_geometries( + [pcd, coordinate_frame], point_show_normal=True) # Reduced point size to 1 + + +if __name__ == "__main__": + args = parse_args() + + # load depth map + with np.load(args.input_depth) as data: + depth = data["depth"] + + # load image + img_pil = Image.open(args.input_image).convert("RGB") + image = np.array(img_pil) + + visualize_depth_with_open3d(depth, image, focal_length=args.focal_length)