From c6ead3ac27ccccc4ff6759367ca50caf94eafdc0 Mon Sep 17 00:00:00 2001
From: Shaohui Liu <b1ueber2y@gmail.com>
Date: Mon, 16 Dec 2024 10:17:36 +0100
Subject: [PATCH] Fix broken interface from Hloc for 7Scenes localization with
 depths (#113)

---
 docs/tutorials/localization.rst |  4 ++--
 requirements.txt                |  2 +-
 runners/7scenes/utils.py        | 22 ++++++++++++++++++++--
 3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/docs/tutorials/localization.rst b/docs/tutorials/localization.rst
index 6a6047e8..f3e3e918 100644
--- a/docs/tutorials/localization.rst
+++ b/docs/tutorials/localization.rst
@@ -59,7 +59,7 @@ Now, to run the localization pipeline with points and lines. As shown above, the
 .. code-block:: bash
 
     python runners/7scenes/localization.py --dataset $dataset -s stairs --skip_exists \
-                                           --localization.optimize.loss_func TrivialLoss \
+                                           --localization.optimize.loss_func TrivialLoss
 
 It is also possible to use the rendered depth with the ``--use_dense_depth`` flag, in which case the 3D line map will be built using LIMAP's Fit&Merge (enable merging by adding ``--merging.do_merging``) utilities instead of triangulation.
 
@@ -67,6 +67,6 @@ It is also possible to use the rendered depth with the ``--use_dense_depth`` fla
 
     python runners/7scenes/localization.py --dataset $dataset -s stairs --skip_exists \
                                            --use_dense_depth \
-                                           --localization.optimize.loss_func TrivialLoss \
+                                           --localization.optimize.loss_func TrivialLoss
 
 The runner scripts will also run `hloc <https://github.com/cvg/Hierarchical-Localization/tree/master/hloc/pipelines/7Scenes>`_ for extracting and matching the feature points and for comparing the results. The evaluation result will be printed in terminal after localization is finished. You could also evaluate different result ``.txt`` files using the ``--eval`` flag.
diff --git a/requirements.txt b/requirements.txt
index 70c6c17f..632919ba 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,4 +28,4 @@ clang-format==19.1.0
 pytlsd@git+https://github.com/iago-suarez/pytlsd.git@37ac583
 deeplsd@git+https://github.com/cvg/DeepLSD.git@88c589d
 gluestick@git+https://github.com/cvg/GlueStick.git@0f28efd
--e git+https://github.com/B1ueber2y/Hierarchical-Localization.git@f91076b#egg=hloc
+-e git+https://github.com/B1ueber2y/Hierarchical-Localization.git@dfe106a#egg=hloc
diff --git a/runners/7scenes/utils.py b/runners/7scenes/utils.py
index a1b5996f..f4eef0ba 100644
--- a/runners/7scenes/utils.py
+++ b/runners/7scenes/utils.py
@@ -66,7 +66,16 @@ def create_reference_sfm(full_model, ref_model, blacklist=None, ext=".bin"):
 
 def scene_coordinates(p2D, R_w2c, t_w2c, depth, camera):
     assert len(depth) == len(p2D)
-    p2D_norm = np.stack(pycolmap.Camera(camera._asdict()).image_to_world(p2D))
+    pycolmap_camera = pycolmap.Camera(
+        {
+            "camera_id": camera.id,
+            "model": camera.model,
+            "width": camera.width,
+            "height": camera.height,
+            "params": camera.params,
+        }
+    )
+    p2D_norm = pycolmap_camera.cam_from_img(p2D)
     p2D_h = np.concatenate([p2D_norm, np.ones_like(p2D_norm[:, :1])], 1)
     p3D_c = p2D_h * depth[:, None]
     p3D_w = (p3D_c - t_w2c) @ R_w2c
@@ -109,7 +118,16 @@ def project_to_image(p3D, R, t, camera, eps: float = 1e-4, pad: int = 1):
     p3D = (p3D @ R.T) + t
     visible = p3D[:, -1] >= eps  # keep points in front of the camera
     p2D_norm = p3D[:, :-1] / p3D[:, -1:].clip(min=eps)
-    p2D = np.stack(pycolmap.Camera(camera._asdict()).world_to_image(p2D_norm))
+    pycolmap_camera = pycolmap.Camera(
+        {
+            "camera_id": camera.id,
+            "model": camera.model,
+            "width": camera.width,
+            "height": camera.height,
+            "params": camera.params,
+        }
+    )
+    p2D = pycolmap_camera.img_from_cam(p2D_norm)
     size = np.array([camera.width - pad - 1, camera.height - pad - 1])
     valid = np.all((p2D >= pad) & (p2D <= size), -1)
     valid &= visible