borglab · johnwlambert · Dec 3, 2023 · Jul 4, 2023 · Jul 4, 2023 · Jul 4, 2023
diff --git a/.github/scripts/download_single_benchmark.sh b/.github/scripts/download_single_benchmark.sh
@@ -67,9 +67,15 @@ function download_and_unzip_dataset_files {
     WGET_URL1=https://github.com/johnwlambert/gtsfm-datasets-mirror/releases/download/gerrard-hall-100/gerrard-hall-100.zip
     ZIP_FNAME=gerrard-hall-100.zip
 
+  elif [ "$DATASET_NAME" == "tanks-and-temples-barn-410" ]; then 
+    # Tanks and Temples Dataset, "Barn" scene.
+    WGET_URL1=https://github.com/johnwlambert/gtsfm-datasets-mirror/releases/download/tanks-and-temples-barn/Tanks_and_Temples_Barn_410.zip
+    ZIP_FNAME=Tanks_and_Temples_Barn_410.zip
+
   elif [ "$DATASET_NAME" == "south-building-128" ]; then
     WGET_URL1=https://github.com/johnwlambert/gtsfm-datasets-mirror/releases/download/south-building-128/south-building-128.zip
     ZIP_FNAME=south-building-128.zip
+
   fi
 
   # Download the data.
@@ -140,6 +146,8 @@ function download_and_unzip_dataset_files {
   elif [ "$DATASET_NAME" == "south-building-128" ]; then
     unzip south-building-128.zip
 
+  elif [ "$DATASET_NAME" == "tanks-and-temples-barn-410" ]; then
+    unzip -qq Tanks_and_Temples_Barn_410.zip
   fi
 }
 

diff --git a/.github/scripts/execute_single_benchmark.sh b/.github/scripts/execute_single_benchmark.sh
@@ -29,6 +29,9 @@ elif [ "$DATASET_NAME" == "notre-dame-20" ]; then
 elif [ "$DATASET_NAME" == "gerrard-hall-100" ]; then
   IMAGES_DIR=gerrard-hall-100/images
   COLMAP_FILES_DIRPATH=gerrard-hall-100/colmap-3.7-sparse-txt-2023-07-27
+elif [ "$DATASET_NAME" == "tanks-and-temples-barn-410" ]; then
+  DATASET_ROOT="Tanks_and_Temples_Barn_410"
+  SCENE_NAME="Barn"
 elif [ "$DATASET_NAME" == "south-building-128" ]; then
   IMAGES_DIR=south-building-128/images
   #COLMAP_FILES_DIRPATH=south-building-128/colmap-official-2016-10-05
@@ -77,4 +80,10 @@ elif [ "$LOADER_NAME" == "astrovision" ]; then
     --max_resolution ${MAX_RESOLUTION} \
     ${SHARE_INTRINSICS_ARG} \
     --mvs_off
+
+elif [ "$LOADER_NAME" == "tanks-and-temples" ]; then
+  python gtsfm/runner/run_scene_optimizer_synthetic_tanks_and_temples.py \
+    --config_name ${CONFIG_NAME}.yaml \
+    --dataset_root $DATASET_ROOT \
+    --scene_name $SCENE_NAME
 fi
diff --git a/.github/scripts/execute_single_benchmark_self_hosted.sh b/.github/scripts/execute_single_benchmark_self_hosted.sh
@@ -58,4 +58,5 @@ elif [ "$LOADER_NAME" == "astrovision" ]; then
     --max_frame_lookahead $MAX_FRAME_LOOKAHEAD \
     --max_resolution ${MAX_RESOLUTION} \
     ${SHARE_INTRINSICS_ARG}
+
 fi
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -26,7 +26,8 @@ jobs:
             [sift,                gerrard-hall-100,      15,         jpg,           wget,       colmap-loader,  760,     true],
             [lightglue,           gerrard-hall-100,      15,         jpg,           wget,       colmap-loader,  760,     true],
             [sift,                south-building-128,    15,         jpg,           wget,       colmap-loader,  760,     true],
-            [lightglue,           south-building-128,    15,         jpg,           wget,       colmap-loader,  760,     true],  
+            [lightglue,           south-building-128,    15,         jpg,           wget,       colmap-loader,  760,     true],
+            [synthetic_front_end, tanks-and-temples-barn-410, 4,     jpg,           wget,    tanks-and-temples, 1080,    true],
           ]
     defaults:
       run:

diff --git a/gtsfm/configs/synthetic_front_end.yaml b/gtsfm/configs/synthetic_front_end.yaml
@@ -0,0 +1,102 @@
+# Synthetic front-end configuration specifically for the Tanks & Temples dataset.
+
+SceneOptimizer:
+  _target_: gtsfm.scene_optimizer.SceneOptimizer
+  save_gtsfm_data: True
+  save_two_view_correspondences_viz: False
+  save_3d_viz: True
+  pose_angular_error_thresh: 5 # degrees
+
+  image_pairs_generator:
+    _target_: gtsfm.retriever.image_pairs_generator.ImagePairsGenerator
+    global_descriptor:
+      _target_: gtsfm.frontend.cacher.global_descriptor_cacher.GlobalDescriptorCacher
+      global_descriptor_obj:
+        _target_: gtsfm.frontend.global_descriptor.netvlad_global_descriptor.NetVLADGlobalDescriptor
+    retriever:
+      _target_: gtsfm.retriever.joint_netvlad_sequential_retriever.JointNetVLADSequentialRetriever
+      num_matched: 2
+      min_score: 0.2
+      max_frame_lookahead: 3
+
+    # retriever:
+    #   _target_: gtsfm.retriever.sequential_retriever.SequentialRetriever
+    #   max_frame_lookahead: 4
+
+    # retriever:
+    #   _target_: gtsfm.retriever.netvlad_retriever.NetVLADRetriever
+    #   num_matched: 50
+    #   min_score: 0.3
+
+  correspondence_generator:
+    _target_: gtsfm.frontend.correspondence_generator.synthetic_correspondence_generator.SyntheticCorrespondenceGenerator
+    #dataset_root: /Users/johnlambert/Downloads/Tanks_and_Temples_Barn_410
+    #dataset_root: /usr/local/gtsfm-data/Tanks_and_Temples_Barn_410
+    dataset_root: /home/runner/work/gtsfm/gtsfm/Tanks_and_Temples_Barn_410 # Path for CI.
+    scene_name: Barn
+
+  two_view_estimator:
+    _target_: gtsfm.two_view_estimator.TwoViewEstimator
+    bundle_adjust_2view: False
+    eval_threshold_px: 4 # in px
+    ba_reproj_error_thresholds: [0.5]
+    bundle_adjust_2view_maxiters: 100
+
+    verifier:
+      _target_: gtsfm.frontend.verifier.loransac.LoRansac
+      use_intrinsics_in_verification: True
+      estimation_threshold_px: 0.5 # for H/E/F estimators
+
+    triangulation_options:
+      _target_: gtsfm.data_association.point3d_initializer.TriangulationOptions
+      mode:
+        _target_: gtsfm.data_association.point3d_initializer.TriangulationSamplingMode
+        value: NO_RANSAC
+
+    inlier_support_processor:
+      _target_: gtsfm.two_view_estimator.InlierSupportProcessor
+      min_num_inliers_est_model: 15
+      min_inlier_ratio_est_model: 0.1
+
+  multiview_optimizer:
+    _target_: gtsfm.multi_view_optimizer.MultiViewOptimizer
+
+    # comment out to not run
+    view_graph_estimator:
+      _target_: gtsfm.view_graph_estimator.cycle_consistent_rotation_estimator.CycleConsistentRotationViewGraphEstimator
+      edge_error_aggregation_criterion: MEDIAN_EDGE_ERROR
+
+    rot_avg_module:
+      _target_: gtsfm.averaging.rotation.shonan.ShonanRotationAveraging
+      # Use a very low value.
+      two_view_rotation_sigma: 0.1
+
+    trans_avg_module:
+      _target_: gtsfm.averaging.translation.averaging_1dsfm.TranslationAveraging1DSFM
+      robust_measurement_noise: True
+      projection_sampling_method: SAMPLE_INPUT_MEASUREMENTS
+
+    data_association_module:
+      _target_: gtsfm.data_association.data_assoc.DataAssociation
+      min_track_len: 2
+      triangulation_options:
+        _target_: gtsfm.data_association.point3d_initializer.TriangulationOptions
+        reproj_error_threshold: 10
+        mode:
+          _target_: gtsfm.data_association.point3d_initializer.TriangulationSamplingMode
+          value: RANSAC_SAMPLE_UNIFORM
+        max_num_hypotheses: 100
+      save_track_patches_viz: False
+
+    bundle_adjustment_module:
+      _target_: gtsfm.bundle.bundle_adjustment.BundleAdjustmentOptimizer
+      reproj_error_thresholds: [10, 5, 3] # for (multistage) post-optimization filtering
+      robust_measurement_noise: True
+      shared_calib: False
+      cam_pose3_prior_noise_sigma: 0.01
+      calibration_prior_noise_sigma: 1e-5
+      measurement_noise_sigma: 1.0
+
+  # # comment out to not run
+  # dense_multiview_optimizer:
+  #   _target_: gtsfm.densify.mvs_patchmatchnet.MVSPatchmatchNet
diff --git a/gtsfm/frontend/correspondence_generator/synthetic_correspondence_generator.py b/gtsfm/frontend/correspondence_generator/synthetic_correspondence_generator.py
@@ -0,0 +1,119 @@
+"""Correspondence generator that creates synthetic keypoint correspondences using a 3d mesh.
+
+Authors: John Lambert
+"""
+import tempfile
+from typing import Dict, List, Tuple
+
+from dask.distributed import Client, Future
+import numpy as np
+import open3d
+
+from gtsfm.common.keypoints import Keypoints
+from gtsfm.common.types import CAMERA_TYPE
+from gtsfm.frontend.correspondence_generator.correspondence_generator_base import CorrespondenceGeneratorBase
+from gtsfm.frontend.correspondence_generator.keypoint_aggregator.keypoint_aggregator_base import KeypointAggregatorBase
+from gtsfm.frontend.correspondence_generator.keypoint_aggregator.keypoint_aggregator_dedup import (
+    KeypointAggregatorDedup,
+)
+from gtsfm.frontend.correspondence_generator.keypoint_aggregator.keypoint_aggregator_unique import (
+    KeypointAggregatorUnique,
+)
+from gtsfm.loader.loader_base import LoaderBase
+from gtsfm.loader.tanks_and_temples_loader import TanksAndTemplesLoader
+
+
+class SyntheticCorrespondenceGenerator(CorrespondenceGeneratorBase):
+    """Pair-wise synthetic keypoint correspondence generator."""
+
+    def __init__(self, dataset_root: str, scene_name: str, deduplicate: bool = True) -> None:
+        """
+        Args:
+            dataset_root: Path to where Tanks & Temples dataset is stored.
+            scene_name: Name of scene from Tanks & Temples dataset.
+            deduplicate: Whether to de-duplicate with a single image the detections received from each image pair.
+        """
+        self._dataset_root = dataset_root
+        self._scene_name = scene_name
+        self._aggregator: KeypointAggregatorBase = (
+            KeypointAggregatorDedup() if deduplicate else KeypointAggregatorUnique()
+        )
+
+    def generate_correspondences(
+        self,
+        client: Client,
+        images: List[Future],
+        image_pairs: List[Tuple[int, int]],
+        num_sampled_3d_points: int = 500,
+    ) -> Tuple[List[Keypoints], Dict[Tuple[int, int], np.ndarray]]:
+        """Apply the correspondence generator to generate putative correspondences.
+
+        Args:
+            client: Dask client, used to execute the front-end as futures.
+            images: List of all images, as futures.
+            image_pairs: Indices of the pairs of images to estimate two-view pose and correspondences.
+
+        Returns:
+            List of keypoints, with one entry for each input image.
+            Putative correspondences as indices of keypoints (N,2), for pairs of images (i1,i2).
+        """
+        dataset_root = self._dataset_root
+        scene_name = self._scene_name
+
+        img_dir = f"{dataset_root}/{scene_name}"
+        poses_fpath = f"{dataset_root}/{scene_name}_COLMAP_SfM.log"
+        lidar_ply_fpath = f"{dataset_root}/{scene_name}.ply"
+        colmap_ply_fpath = f"{dataset_root}/{scene_name}_COLMAP.ply"
+        ply_alignment_fpath = f"{dataset_root}/{scene_name}_trans.txt"
+        bounding_polyhedron_json_fpath = f"{dataset_root}/{scene_name}.json"
+        loader = TanksAndTemplesLoader(
+            img_dir=img_dir,
+            poses_fpath=poses_fpath,
+            lidar_ply_fpath=lidar_ply_fpath,
+            ply_alignment_fpath=ply_alignment_fpath,
+            bounding_polyhedron_json_fpath=bounding_polyhedron_json_fpath,
+            colmap_ply_fpath=colmap_ply_fpath,
+        )
+
+        mesh = loader.reconstruct_mesh()
+
+        # Sample random 3d points. This sampling must occur only once, to avoid clusters from repeated sampling.
+        pcd = mesh.sample_points_uniformly(number_of_points=num_sampled_3d_points)
+        pcd = mesh.sample_points_poisson_disk(number_of_points=num_sampled_3d_points, pcl=pcd)
+        sampled_points = np.asarray(pcd.points)
+
+        # TODO(jolambert): File Open3d bug to add pickle support for TriangleMesh.
+        open3d_mesh_path = tempfile.NamedTemporaryFile(suffix=".obj").name
+        open3d.io.write_triangle_mesh(filename=open3d_mesh_path, mesh=mesh)
+
+        loader_future = client.scatter(loader, broadcast=False)
+
+        def apply_synthetic_corr_generator(
+            loader_: LoaderBase,
+            camera_i1: CAMERA_TYPE,
+            camera_i2: CAMERA_TYPE,
+            open3d_mesh_fpath: str,
+            points: np.ndarray,
+        ) -> Tuple[Keypoints, Keypoints]:
+            return loader_.generate_synthetic_correspondences_for_image_pair(
+                camera_i1, camera_i2, open3d_mesh_fpath, points
+            )
+
+        pairwise_correspondence_futures = {
+            (i1, i2): client.submit(
+                apply_synthetic_corr_generator,
+                loader_future,
+                loader.get_camera(index=i1),
+                loader.get_camera(index=i2),
+                open3d_mesh_path,
+                sampled_points,
+            )
+            for i1, i2 in image_pairs
+        }
+
+        pairwise_correspondences: Dict[Tuple[int, int], Tuple[Keypoints, Keypoints]] = client.gather(
+            pairwise_correspondence_futures
+        )
+
+        keypoints_list, putative_corr_idxs_dict = self._aggregator.aggregate(keypoints_dict=pairwise_correspondences)
+        return keypoints_list, putative_corr_idxs_dict