Merge pull request #815 from borglab/add/colmap-sift

Add Colmap SIFT Features
borglab · Nov 6, 2024 · 3c767b8 · 3c767b8
2 parents fe386f2 + 02bf2d0
commit 3c767b8
Show file tree

Hide file tree

Showing 7 changed files with 198 additions and 70 deletions.
diff --git a/environment_linux.yml b/environment_linux.yml
@@ -38,7 +38,7 @@ dependencies:
   - pytorch-cuda=11.8
   - pytorch
   - torchvision>=0.13.0
-  - kornia
+  - kornia==0.7.3
   - pycolmap
   # io
   - h5py

diff --git a/environment_linux_cpuonly.yml b/environment_linux_cpuonly.yml
@@ -37,7 +37,7 @@ dependencies:
   - cpuonly # replacement of cudatoolkit for cpu only machines
   - pytorch>=1.12.0
   - torchvision>=0.13.0
-  - kornia
+  - kornia==0.7.3
   - pycolmap
   # io
   - h5py

diff --git a/gtsfm/configs/colmap_front_end.yaml b/gtsfm/configs/colmap_front_end.yaml
@@ -0,0 +1,92 @@
+# Front-end configuration that leverages PyColmap for feature extraction and verfiication.
+
+SceneOptimizer:
+  _target_: gtsfm.scene_optimizer.SceneOptimizer
+  save_gtsfm_data: True
+  save_two_view_correspondences_viz: False
+  save_3d_viz: False
+  pose_angular_error_thresh: 5 # degrees
+
+  image_pairs_generator:
+    _target_: gtsfm.retriever.image_pairs_generator.ImagePairsGenerator
+    retriever:
+      _target_: gtsfm.retriever.exhaustive_retriever.ExhaustiveRetriever
+
+  correspondence_generator:
+    _target_: gtsfm.frontend.correspondence_generator.det_desc_correspondence_generator.DetDescCorrespondenceGenerator
+
+    detector_descriptor:
+      _target_: gtsfm.frontend.cacher.detector_descriptor_cacher.DetectorDescriptorCacher
+      detector_descriptor_obj:
+        _target_: gtsfm.frontend.detector_descriptor.colmap_sift.ColmapSIFTDetectorDescriptor
+        max_keypoints: 8192
+
+    matcher:
+      _target_: gtsfm.frontend.cacher.matcher_cacher.MatcherCacher
+      matcher_obj:
+        _target_: gtsfm.frontend.matcher.twoway_matcher.TwoWayMatcher
+        ratio_test_threshold: 0.8
+
+  two_view_estimator:
+    _target_: gtsfm.two_view_estimator_cacher.TwoViewEstimatorCacher
+    two_view_estimator_obj:
+      _target_: gtsfm.two_view_estimator.TwoViewEstimator
+      bundle_adjust_2view: True
+      eval_threshold_px: 4 # in px
+      ba_reproj_error_thresholds: [0.5]
+      bundle_adjust_2view_maxiters: 100
+
+      verifier:
+        _target_: gtsfm.frontend.verifier.loransac.LoRansac
+        use_intrinsics_in_verification: True
+        estimation_threshold_px: 4 # for H/E/F estimators
+
+      triangulation_options:
+        _target_: gtsfm.data_association.point3d_initializer.TriangulationOptions
+        mode:
+          _target_: gtsfm.data_association.point3d_initializer.TriangulationSamplingMode
+          value: NO_RANSAC
+
+      inlier_support_processor:
+        _target_: gtsfm.two_view_estimator.InlierSupportProcessor
+        min_num_inliers_est_model: 15
+        min_inlier_ratio_est_model: 0.1
+
+  multiview_optimizer:
+    _target_: gtsfm.multi_view_optimizer.MultiViewOptimizer
+
+    # comment out to not run
+    view_graph_estimator:
+      _target_: gtsfm.view_graph_estimator.cycle_consistent_rotation_estimator.CycleConsistentRotationViewGraphEstimator
+      edge_error_aggregation_criterion: MIN_EDGE_ERROR
+
+    rot_avg_module:
+      _target_: gtsfm.averaging.rotation.shonan.ShonanRotationAveraging
+      weight_by_inliers: True
+
+    trans_avg_module:
+      _target_: gtsfm.averaging.translation.averaging_1dsfm.TranslationAveraging1DSFM
+      robust_measurement_noise: True
+      projection_sampling_method: SAMPLE_INPUT_MEASUREMENTS
+      reject_outliers: True
+
+    data_association_module:
+      _target_: gtsfm.data_association.data_assoc.DataAssociation
+      min_track_len: 3
+      triangulation_options:
+        _target_: gtsfm.data_association.point3d_initializer.TriangulationOptions
+        reproj_error_threshold: 10
+        mode:
+          _target_: gtsfm.data_association.point3d_initializer.TriangulationSamplingMode
+          value: RANSAC_SAMPLE_UNIFORM
+        max_num_hypotheses: 100
+      save_track_patches_viz: False
+
+    bundle_adjustment_module:
+      _target_: gtsfm.bundle.bundle_adjustment.BundleAdjustmentOptimizer
+      reproj_error_thresholds: [10, 5, 3] # for (multistage) post-optimization filtering
+      robust_measurement_noise: True
+      shared_calib: False
+      cam_pose3_prior_noise_sigma: 0.1
+      calibration_prior_noise_sigma: 1e-5
+      measurement_noise_sigma: 1.0
diff --git a/gtsfm/frontend/detector_descriptor/colmap_sift.py b/gtsfm/frontend/detector_descriptor/colmap_sift.py
@@ -0,0 +1,54 @@
+"""SIFT Detector-Descriptor implementation.
+
+The detector was proposed in 'Distinctive Image Features from Scale-Invariant Keypoints' and is implemented by wrapping
+over OpenCV's API.
+
+References:
+- https://www.cs.ubc.ca/~lowe/papers/ijcv04.pdf
+- https://docs.opencv.org/3.4.2/d5/d3c/classcv_1_1xfeatures2d_1_1SIFT.html
+
+Authors: Ayush Baid
+"""
+from typing import Tuple
+
+import numpy as np
+import pycolmap
+
+import gtsfm.utils.images as image_utils
+from gtsfm.common.image import Image
+from gtsfm.common.keypoints import Keypoints
+from gtsfm.frontend.detector_descriptor.detector_descriptor_base import DetectorDescriptorBase
+
+
+class ColmapSIFTDetectorDescriptor(DetectorDescriptorBase):
+    """SIFT detector-descriptor using Colmap's implementation."""
+
+    def detect_and_describe(self, image: Image) -> Tuple[Keypoints, np.ndarray]:
+        """Perform feature detection as well as their description.
+
+        Refer to detect() in DetectorBase and describe() in DescriptorBase for details about the output format.
+
+        Args:
+            image: the input image.
+
+        Returns:
+            Detected keypoints, with length N <= max_keypoints.
+            Corr. descriptors, of shape (N, D) where D is the dimension of each descriptor.
+        """
+
+        # Convert to grayscale.
+        gray_image = image_utils.rgb_to_gray_cv(image)
+
+        # Create pycolmap object every time as the object is not pickle-able.
+        # Note: cannot use SiftGPU as wheels are not built with CUDA support.
+        options = pycolmap.SiftExtractionOptions(max_num_features=self.max_keypoints)
+        sift_obj = pycolmap.Sift(options)
+
+        # Extract features.
+        features, descriptors = sift_obj.extract(gray_image.value_array.astype(np.float32))
+
+        # Convert to GTSFM's keypoints.
+        # Note: Columns of features is x-coordinate, y-coordinate, scale, and orientation, respectively.
+        keypoints = Keypoints(coordinates=features[..., :2], scales=features[:, 2])
+
+        return keypoints, descriptors
diff --git a/gtsfm/frontend/verifier/loransac.py b/gtsfm/frontend/verifier/loransac.py
@@ -16,7 +16,6 @@
 import pycolmap
 from gtsam import Cal3Bundler, Rot3, Unit3
 
-import gtsfm.frontend.verifier.verifier_base as verifier_base
 import gtsfm.utils.logger as logger_utils
 import gtsfm.utils.pycolmap_utils as pycolmap_utils
 import gtsfm.utils.verification as verification_utils
@@ -26,17 +25,22 @@
 logger = logger_utils.get_logger()
 
 
+# Default Colmap params.
 MIN_INLIER_RATIO = 0.01
-MIN_NUM_TRIALS = 100000
-MAX_NUM_TRIALS = 1000000
-CONFIDENCE = 0.999999
+MIN_NUM_TRIALS = 1000
+MAX_NUM_TRIALS = 10000
+CONFIDENCE = 0.9999
 
 
 class LoRansac(VerifierBase):
     def __init__(
         self,
         use_intrinsics_in_verification: bool,
         estimation_threshold_px: float,
+        min_inlier_ratio: float = MIN_INLIER_RATIO,
+        min_num_trials: int = MIN_NUM_TRIALS,
+        max_num_trials: int = MAX_NUM_TRIALS,
+        confidence: float = CONFIDENCE,
     ) -> None:
         """Initializes the verifier.
 
@@ -50,15 +54,16 @@ def __init__(
                 Sampson distance.
         """
         super().__init__(use_intrinsics_in_verification, estimation_threshold_px)
-        self._min_matches = (
-            verifier_base.NUM_MATCHES_REQ_E_MATRIX
-            if self._use_intrinsics_in_verification
-            else verifier_base.NUM_MATCHES_REQ_F_MATRIX
+        self._ransac_options = pycolmap.RANSACOptions(
+            {
+                "max_error": self._estimation_threshold_px,
+                "min_num_trials": min_num_trials,
+                "max_num_trials": max_num_trials,
+                "min_inlier_ratio": min_inlier_ratio,
+                "confidence": confidence,
+            }
         )
 
-        # for failure, i2Ri1 = None, and i2Ui1 = None, and no verified correspondences, and inlier_ratio_est_model = 0
-        self._failure_result = (None, None, np.array([], dtype=np.uint64), 0.0)
-
     def __estimate_essential_matrix(
         self,
         uv_i1: np.ndarray,
@@ -81,19 +86,7 @@ def __estimate_essential_matrix(
         camera_i2: pycolmap.Camera = pycolmap_utils.get_pycolmap_camera(camera_intrinsics_i2)
 
         result_dict = pycolmap.essential_matrix_estimation(
-            uv_i1,
-            uv_i2,
-            camera_i1,
-            camera_i2,
-            pycolmap.RANSACOptions(
-                {
-                    "max_error": self._estimation_threshold_px,
-                    "min_num_trials": MIN_NUM_TRIALS,
-                    "max_num_trials": MAX_NUM_TRIALS,
-                    "min_inlier_ratio": MIN_INLIER_RATIO,
-                    "confidence": CONFIDENCE,
-                }
-            ),
+            uv_i1, uv_i2, camera_i1, camera_i2, self._ransac_options,
         )
         return result_dict
 
@@ -131,19 +124,7 @@ def verify(
         if self._use_intrinsics_in_verification:
             result_dict = self.__estimate_essential_matrix(uv_i1, uv_i2, camera_intrinsics_i1, camera_intrinsics_i2)
         else:
-            result_dict = pycolmap.fundamental_matrix_estimation(
-                uv_i1,
-                uv_i2,
-                pycolmap.RANSACOptions(
-                    {
-                        "max_error": self._estimation_threshold_px,
-                        "min_num_trials": MIN_NUM_TRIALS,
-                        "max_num_trials": MAX_NUM_TRIALS,
-                        "min_inlier_ratio": MIN_INLIER_RATIO,
-                        "confidence": CONFIDENCE,
-                    }
-                ),
-            )
+            result_dict = pycolmap.fundamental_matrix_estimation(uv_i1, uv_i2, self._ransac_options)
 
         if not result_dict:
             matrix_type = "Essential" if self._use_intrinsics_in_verification else "Fundamental"

diff --git a/tests/frontend/detector_descriptor/test_colmap_sift.py b/tests/frontend/detector_descriptor/test_colmap_sift.py
@@ -0,0 +1,32 @@
+"""Tests for SIFT detector descriptor
+
+Authors: Ayush Baid
+"""
+import unittest
+
+import tests.frontend.detector_descriptor.test_detector_descriptor_base as test_detector_descriptor_base
+from gtsfm.frontend.detector.detector_from_joint_detector_descriptor import DetectorFromDetectorDescriptor
+from gtsfm.frontend.detector_descriptor.colmap_sift import ColmapSIFTDetectorDescriptor
+
+
+class TestColmapSIFTDetectorDescriptor(test_detector_descriptor_base.TestDetectorDescriptorBase):
+    """Main test class for detector-description combination base class in frontend.
+
+    All unit test functions defined in TestDetectorDescriptorBase are run automatically.
+    """
+
+    def setUp(self):
+        """Setup the attributes for the tests."""
+        super().setUp()
+        # Note: pycolmap does not guarantee that the number of keypoints will not exceed the specified maximum, as there
+        # can be ties in terms of the response scores. E.g., if the 5000th keypoint and the 5001st keypoint have the
+        # same response, pycolmap will return 5001 keypoints. Setting the number of maximum keypoints lower reduces the
+        # risk of this happening.
+        self.detector_descriptor = ColmapSIFTDetectorDescriptor(max_keypoints=2000)
+
+        # explicitly set the detector
+        self.detector = DetectorFromDetectorDescriptor(self.detector_descriptor)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/frontend/detector_descriptor/test_colmap_sift_detector_descriptor.py b/tests/frontend/detector_descriptor/test_colmap_sift_detector_descriptor.py