Merge branch 'master' of https://github.com/borglab/gtsfm into featur…

…e/shonan_mst_init
borglab · Mar 12, 2024 · 51c242b · 51c242b
2 parents e089f89 + c095771
commit 51c242b
Show file tree

Hide file tree

Showing 7 changed files with 225 additions and 48 deletions.
diff --git a/gtsfm/evaluation/compare_colmap_outputs.py b/gtsfm/evaluation/compare_colmap_outputs.py
@@ -23,6 +23,7 @@
 
 
 def load_poses(colmap_dirpath: str) -> Dict[str, Pose3]:
+    """Returns mapping from image filename to associated camera pose."""
     wTi_list, img_fnames, _, _, _, _ = io_utils.read_scene_data_from_colmap_format(colmap_dirpath)
 
     return dict(zip(img_fnames, wTi_list))
@@ -32,7 +33,7 @@ def compare_poses(baseline_dirpath: str, eval_dirpath: str, output_dirpath: str)
     """Compare the pose metrics between two reconstructions (Colmap format).
 
     Args:
-        baseline_dirpath: Directory with baseline reconstruction.
+        baseline_dirpath: Directory with baseline (reference) reconstruction.
         current_dirpath: Directory with reconstruction which needs evaluation.
         output_dirpath: Directory to save the metrics.
     """
@@ -106,12 +107,12 @@ def compare_poses(baseline_dirpath: str, eval_dirpath: str, output_dirpath: str)
     parser.add_argument(
         "--baseline",
         required=True,
-        help="Path to directory containing benchmark artifacts for the baseline",
+        help="Path to directory containing benchmark artifacts for the baseline (reference).",
     )
     parser.add_argument(
         "--current",
         required=True,
-        help="Path to directory containing benchmark artifacts for the current",
+        help="Path to directory containing benchmark artifacts for the current.",
     )
     parser.add_argument("--output", required=True, help="Output for the json file for pose metrics")
     parser.add_argument(

diff --git a/gtsfm/utils/alignment.py b/gtsfm/utils/alignment.py
@@ -58,26 +58,27 @@ def align_poses_sim3_ignore_missing(
     We assume the two trajectories are of the exact same length.
 
     Args:
-        aTi_list: reference poses in frame "a" which are the targets for alignment
-        bTi_list: input poses which need to be aligned to frame "a"
+        aTi_list: Reference poses in frame "a" which are the targets for alignment.
+        bTi_list: Input poses which need to be aligned to frame "a".
 
     Returns:
-        aTi_list_: transformed input poses previously "bTi_list" but now which
+        aTi_list_: Transformed input poses previously "bTi_list" but now which
             have the same origin and scale as reference (now living in "a" frame)
         aSb: Similarity(3) object that aligns the two pose graphs.
     """
     assert len(aTi_list) == len(bTi_list)
 
-    # only choose target poses for which there is a corresponding estimated pose
+    # Only choose target poses for which there is a corresponding estimated pose.
     corresponding_aTi_list = []
     valid_camera_idxs = []
     valid_bTi_list = []
     for i, bTi in enumerate(bTi_list):
         aTi = aTi_list[i]
-        if aTi is not None and bTi is not None:
-            valid_camera_idxs.append(i)
-            valid_bTi_list.append(bTi)
-            corresponding_aTi_list.append(aTi)
+        if aTi is None or bTi is None:
+            continue
+        valid_camera_idxs.append(i)
+        valid_bTi_list.append(bTi)
+        corresponding_aTi_list.append(aTi)
 
     valid_aTi_list_, aSb = align_poses_sim3_robust(aTi_list=corresponding_aTi_list, bTi_list=valid_bTi_list)
 
@@ -100,11 +101,11 @@ def align_poses_sim3_exhaustive(aTi_list: List[Pose3], bTi_list: List[Pose3]) ->
     We assume the two trajectories are of the exact same length.
 
     Args:
-        aTi_list: reference poses in frame "a" which are the targets for alignment
-        bTi_list: input poses which need to be aligned to frame "a"
+        aTi_list: Reference poses in frame "a" which are the targets for alignment.
+        bTi_list: Input poses which need to be aligned to frame "a".
 
     Returns:
-        aTi_list_: transformed input poses previously "bTi_list" but now which
+        aTi_list_: Transformed input poses previously "bTi_list" but now which
             have the same origin and scale as reference (now living in "a" frame)
         aSb: Similarity(3) object that aligns the two pose graphs.
     """
@@ -114,17 +115,17 @@ def align_poses_sim3_exhaustive(aTi_list: List[Pose3], bTi_list: List[Pose3]) ->
         logger.error("SIM(3) alignment uses at least 2 frames; Skipping")
         return bTi_list, Similarity3(Rot3(), np.zeros((3,)), 1.0)
 
-    # Run once with all poses for initial guess
+    # Run once with all poses for initial guess.
     best_aSb = Similarity3()
     aTi_candidate_, best_aSb = align_poses_sim3(aTi_list, bTi_list)
     best_pose_auc_5deg: float = metric_utils.pose_auc_from_poses(
         computed_wTis=aTi_candidate_, ref_wTis=aTi_list, thresholds_deg=[5]
     )[0]
 
-    for i in range(n_to_align):
-        for j in range(i + 1, n_to_align):
-            aTi_sample = copy.deepcopy([aTi_list[i], aTi_list[j]])
-            bTi_sample = copy.deepcopy([bTi_list[i], bTi_list[j]])
+    for i1 in range(n_to_align):
+        for i2 in range(i1 + 1, n_to_align):
+            aTi_sample = copy.deepcopy([aTi_list[i1], aTi_list[i2]])
+            bTi_sample = copy.deepcopy([bTi_list[i1], bTi_list[i2]])
 
             _, aSb_candidate = align_poses_sim3(aTi_sample, bTi_sample)
 
@@ -172,7 +173,7 @@ def align_poses_sim3_robust(
         max_num_hypothesis: max number of RANSAC iterations.
 
     Returns:
-        aTi_list_: transformed input poses previously "bTi_list" but now which
+        aTi_list_: Transformed input poses previously "bTi_list" but now which
             have the same origin and scale as reference (now living in "a" frame).
         aSb: Similarity(3) object that aligns the two pose graphs.
     """
@@ -242,11 +243,11 @@ def align_poses_sim3(aTi_list: List[Pose3], bTi_list: List[Pose3]) -> Tuple[List
     We assume the two trajectories are of the exact same length.
 
     Args:
-        aTi_list: reference poses in frame "a" which are the targets for alignment
-        bTi_list: input poses which need to be aligned to frame "a"
+        aTi_list: Reference poses in frame "a" which are the targets for alignment
+        bTi_list: Input poses which need to be aligned to frame "a"
 
     Returns:
-        aTi_list_: transformed input poses previously "bTi_list" but now which
+        aTi_list_: Transformed input poses previously "bTi_list" but now which
             have the same origin and scale as reference (now living in "a" frame)
         aSb: Similarity(3) object that aligns the two pose graphs.
     """
@@ -265,7 +266,6 @@ def align_poses_sim3(aTi_list: List[Pose3], bTi_list: List[Pose3]) -> Tuple[List
     ab_pairs = Pose3Pairs(valid_pose_tuples)
 
     aSb = Similarity3.Align(ab_pairs)
-
     if np.isnan(aSb.scale()) or aSb.scale() == 0:
         logger.warning("GTSAM Sim3.Align failed. Aligning ourselves")
         # we have run into a case where points have no translation between them (i.e. panorama).
@@ -305,7 +305,7 @@ def align_gtsfm_data_via_Sim3_to_poses(input_data: GtsfmData, wTi_list_ref: List
     """Align GtsfmData (points and cameras) to a set of reference poses.
 
     Args:
-        wTi_list_ref: list of reference/target camera poses, ordered by camera index.
+        wTi_list_ref: List of reference/target camera poses, ordered by camera index.
 
     Returns:
         aligned_data: GtsfmData that is aligned to the poses above.

diff --git a/gtsfm/utils/io.py b/gtsfm/utils/io.py
@@ -201,11 +201,15 @@ def colmap2gtsfm(
     if len(images) == 0 and len(cameras) == 0:
         raise RuntimeError("No Image or Camera data provided to loader.")
     intrinsics_gtsfm, wTi_gtsfm, img_fnames, img_dims = [], [], [], []
-    image_id_to_idx = {}  # keeps track of discrepencies between `image_id` and List index.
+    image_id_to_idx = {}  # Keeps track of discrepencies between `image_id` and List index.
+    # We ignore missing IDs (unestimated cameras) and re-order without them.
     for idx, img in enumerate(images.values()):
         wTi_gtsfm.append(Pose3(Rot3(img.qvec2rotmat()), img.tvec).inverse())
         img_fnames.append(img.name)
         camera_model_name = cameras[img.camera_id].model
+
+        # Default to zero-valued radial distortion coefficients (quadratic and quartic).
+        k1, k2 = 0.0, 0.0
         if camera_model_name == "SIMPLE_RADIAL":
             # See https://github.com/colmap/colmap/blob/1f6812e333a1e4b2ef56aa74e2c3873e4e3a40cd/src/colmap/sensor/models.h#L212  # noqa: E501
             f, cx, cy, k = cameras[img.camera_id].params[:4]
@@ -216,10 +220,13 @@ def colmap2gtsfm(
         elif camera_model_name == "PINHOLE":
             # See https://github.com/colmap/colmap/blob/1f6812e333a1e4b2ef56aa74e2c3873e4e3a40cd/src/colmap/sensor/models.h#L196  # noqa: E501
             fx, fy, cx, cy = cameras[img.camera_id].params[:4]
+        elif camera_model_name == "RADIAL":
+            f, cx, cy, k1, k2 = cameras[img.camera_id].params[:5]
+            fx = f
         else:
             raise ValueError(f"Unsupported COLMAP camera type: {camera_model_name}")
 
-        intrinsics_gtsfm.append(Cal3Bundler(fx, 0.0, 0.0, cx, cy))
+        intrinsics_gtsfm.append(Cal3Bundler(fx, k1, k2, cx, cy))
         image_id_to_idx[img.id] = idx
         img_h, img_w = cameras[img.camera_id].height, cameras[img.camera_id].width
         img_dims.append((img_h, img_w))
@@ -241,7 +248,6 @@ def colmap2gtsfm(
                 sfmtrack.addMeasurement(image_id_to_idx[image_id], images[image_id].xys[point2d_idx])
             sfmtracks_gtsfm.append(sfmtrack)
 
-
     point_cloud = np.array([point3d.xyz for point3d in points3D.values()])
     rgb = np.array([point3d.rgb for point3d in points3D.values()])
     return img_fnames, wTi_gtsfm, intrinsics_gtsfm, sfmtracks_gtsfm, point_cloud, rgb, img_dims
@@ -255,7 +261,7 @@ def read_cameras_txt(
     Reference: https://colmap.github.io/format.html#cameras-txt
 
     Args:
-        fpaths: Path to cameras.txt file
+        fpaths: Path to cameras.txt file.
 
     Returns:
         Tuple of:
@@ -398,7 +404,7 @@ def read_images_txt(fpath: str) -> Tuple[List[Pose3], List[str]]:
 
 
 def sort_image_filenames_lexigraphically(
-        wTi_list: List[Pose3], img_fnames: List[str]
+    wTi_list: List[Pose3], img_fnames: List[str]
 ) -> Tuple[List[Pose3], List[str], List[int]]:
     """Sort a list of camera poses according to provided image file names."""
     sorted_idxs = sorted(range(len(img_fnames)), key=lambda i: img_fnames[i])

diff --git a/gtsfm/visualization/open3d_vis_utils.py b/gtsfm/visualization/open3d_vis_utils.py
@@ -126,7 +126,6 @@ def create_all_frustums_open3d(
             # color is in range [0,1]
             color = tuple(colormap[i].tolist())
             colors = [color for i in range(len(lines))]
-
             line_set = open3d.geometry.LineSet(
                 points=open3d.utility.Vector3dVector(verts_worldfr),
                 lines=open3d.utility.Vector2iVector(lines),
@@ -229,10 +228,9 @@ def draw_scene_with_gt_open3d(
     """
     frustums = create_all_frustums_open3d(wTi_list, calibrations, args.frustum_ray_len, color_names=("red", "green"))
     gt_frustums = create_all_frustums_open3d(
-        wTi_list, calibrations, args.frustum_ray_len, color_names=("blue", "purple")
+        gt_wTi_list, gt_calibrations, args.frustum_ray_len, color_names=("blue", "purple")
     )
 
-    # spheres = create_colored_spheres_open3d(point_cloud, rgb, args.sphere_radius)
     pcd = create_colored_point_cloud_open3d(point_cloud, rgb)
     geometries = frustums + gt_frustums + [pcd]  # + spheres
 

diff --git a/gtsfm/visualization/view_scene.py b/gtsfm/visualization/view_scene.py
@@ -8,12 +8,18 @@
 import argparse
 import os
 from pathlib import Path
+from typing import List, Tuple
 
 import numpy as np
-from gtsam import Pose3, Rot3
+from gtsam import Cal3Bundler, Pose3, Rot3
 
+import gtsfm.utils.alignment as alignment_utils
 import gtsfm.utils.io as io_utils
-from gtsfm.visualization.open3d_vis_utils import draw_scene_open3d
+import gtsfm.utils.logger as logger_utils
+from gtsfm.loader.olsson_loader import OlssonLoader
+from gtsfm.visualization import open3d_vis_utils
+
+logger = logger_utils.get_logger()
 
 REPO_ROOT = Path(__file__).parent.parent.parent.resolve()
 
@@ -63,7 +69,72 @@ def view_scene(args: argparse.Namespace) -> None:
     for i in range(len(wTi_list)):
         wTi_list[i] = zcwTw.compose(wTi_list[i])
 
-    draw_scene_open3d(point_cloud, rgb, wTi_list, calibrations, args)
+    if args.gt_olsson_dir is not None or args.gt_colmap_dir is not None:
+        wTi_list_gt, gt_calibrations = _load_common_gt_poses(args, img_fnames)
+        if len(gt_calibrations) == 1:
+            gt_calibrations = gt_calibrations * len(img_fnames)
+
+        for i in range(len(wTi_list_gt)):
+            wTi_list_gt[i] = zcwTw.compose(wTi_list_gt[i])
+
+        # Align the poses.
+        wTi_aligned_list, rSe = alignment_utils.align_poses_sim3_ignore_missing(wTi_list_gt, wTi_list)
+        point_cloud = np.stack([rSe.transformFrom(pt) for pt in point_cloud])
+
+        open3d_vis_utils.draw_scene_with_gt_open3d(
+            point_cloud=point_cloud,
+            rgb=rgb,
+            wTi_list=wTi_aligned_list,
+            calibrations=calibrations,
+            gt_wTi_list=wTi_list_gt,
+            gt_calibrations=gt_calibrations,
+            args=args,
+        )
+
+    else:
+        # Draw the provided scene data only.
+        open3d_vis_utils.draw_scene_open3d(point_cloud, rgb, wTi_list, calibrations, args)
+
+
+def _load_common_gt_poses(args: argparse.Namespace, img_fnames: List[str]) -> Tuple[List[Pose3], List[Cal3Bundler]]:
+    """Load GT poses that are common to the provided scene reconstruction.
+
+    Args:
+       args: Data loading options.
+       img_fnames: Filenames for images reconstructed in provided scene (not the GT). GT may include
+          additional images, and we want to keep 1:1 to be able to align later, so may need to prune away some
+          of GT poses.
+
+    Returns:
+        Ground truth poses and calibrations.
+    """
+    if args.gt_olsson_dir is not None:
+        loader = OlssonLoader(args.gt_olsson_dir)
+        wTi_list_gt = loader._wTi_list
+        gt_calibrations = [loader.get_camera_intrinsics_full_res(0)] * loader._num_imgs
+        return wTi_list_gt, gt_calibrations
+
+    if args.gt_colmap_dir is not None:
+        # Plot both scene data, and GT data, in same coordinate frame.
+        # Read in GT data.
+        wTi_list_gt, gt_img_fnames, gt_calibrations, _, _, _ = io_utils.read_scene_data_from_colmap_format(
+            args.gt_colmap_dir
+        )
+
+        gt_pose_dict = dict(zip(gt_img_fnames, wTi_list_gt))
+        common_fnames = set(img_fnames) & set(gt_img_fnames)
+        logger.info(
+            "#Images Provided scene: %d, #Images GT: %d , common: %d poses",
+            len(img_fnames),
+            len(gt_img_fnames),
+            len(common_fnames),
+        )
+
+        common_gt_wTi_list: List[Pose3] = []
+        for fname in img_fnames:
+            common_gt_wTi_list.append(gt_pose_dict.get(fname))
+
+    return common_gt_wTi_list, gt_calibrations
 
 
 if __name__ == "__main__":
@@ -76,6 +147,22 @@ def view_scene(args: argparse.Namespace) -> None:
         "This directory should contain 3 files: either `cameras.txt`, `images.txt`, and `points3D.txt`"
         " or `cameras.bin`, `images.bin`, and `points3D.bin`.",
     )
+    parser.add_argument(
+        "--gt_olsson_dir",
+        type=str,
+        required=False,
+        default=None,
+        help="If provided, will plot Olsson-format GT data alongside provided scene data in `output_dir`.",
+    )
+    parser.add_argument(
+        "--gt_colmap_dir",
+        type=str,
+        required=False,
+        default=None,
+        help="If provided, will plot COLMAP-format GT data alongside provided scene data in `output_dir`. `gt_dir` "
+        "should be a path to a separate directory containing GT camera poses. This directory should contain 3 files: "
+        "cameras.txt, images.txt, and points3D.txt or `cameras.bin`, `images.bin`, and `points3D.bin`.",
+    )
     parser.add_argument(
         "--rendering_style",
         type=str,

diff --git a/rtf_vis_tool/package-lock.json b/rtf_vis_tool/package-lock.json